From 48e428bbd741882119df6aba6b21637373e5dfac Mon Sep 17 00:00:00 2001 From: aredenba-rh Date: Wed, 20 May 2026 01:07:22 -0400 Subject: [PATCH 001/153] manage kg specs --- specs/extraction/agent-sessions.spec.md | 47 +++++++++++++ specs/extraction/operations.spec.md | 69 +++++++++++++++++++ specs/graph/mutations.spec.md | 15 ++++ specs/graph/schema-authoring.spec.md | 53 ++++++++++++++ specs/index.spec.md | 11 +++ .../knowledge-graph-workspace.spec.md | 60 ++++++++++++++++ specs/nfr/workload-execution.spec.md | 59 ++++++++++++++++ specs/ui/experience.spec.md | 60 ++++++++++++++++ 8 files changed, 374 insertions(+) create mode 100644 specs/extraction/agent-sessions.spec.md create mode 100644 specs/extraction/operations.spec.md create mode 100644 specs/graph/schema-authoring.spec.md create mode 100644 specs/management/knowledge-graph-workspace.spec.md create mode 100644 specs/nfr/workload-execution.spec.md diff --git a/specs/extraction/agent-sessions.spec.md b/specs/extraction/agent-sessions.spec.md new file mode 100644 index 000000000..020b1f0f7 --- /dev/null +++ b/specs/extraction/agent-sessions.spec.md @@ -0,0 +1,47 @@ +# Agent Sessions + +## Purpose +Agent sessions provide long-running conversational extraction workflows scoped to user, knowledge graph, and mode. Sessions remain active until explicitly cleared, while preserving auditable run history and metrics. + +## Requirements + +### Requirement: Session Scope +The system SHALL scope extraction agent sessions per user, knowledge graph, and mode. + +#### Scenario: Scope isolation +- GIVEN two users working on the same knowledge graph +- WHEN they open extraction agent sessions +- THEN each user receives a separate session +- AND session state is not shared across users + +#### Scenario: Mode isolation +- GIVEN a user session in bootstrap mode and a session in extraction mode +- WHEN both sessions exist for the same knowledge graph +- THEN each session keeps separate context and runtime state + +### Requirement: Long-Running Session Lifecycle +The system SHALL keep sessions active until explicit reset. + +#### Scenario: Persistent session context +- GIVEN an active extraction agent session +- WHEN the user sends follow-up messages over time +- THEN prior session context remains available for continued conversation + +### Requirement: Clear Chat Reset +The system SHALL provide an explicit "Clear chat" action that resets runtime context. + +#### Scenario: Full reset on clear +- GIVEN an active session with runtime context +- WHEN the user clicks "Clear chat" +- THEN message history and runtime context are reset +- AND a new clean session is started for that user/knowledge-graph/mode scope + +### Requirement: Session Archival and Retention +The system SHALL retain completed session and run records indefinitely. + +#### Scenario: Historical session visibility +- GIVEN prior sessions and mutation runs +- WHEN users or administrators query session history +- THEN archived sessions and associated run records remain available +- AND each record includes last-updated timestamps and run-level metrics + diff --git a/specs/extraction/operations.spec.md b/specs/extraction/operations.spec.md new file mode 100644 index 000000000..40110e1ee --- /dev/null +++ b/specs/extraction/operations.spec.md @@ -0,0 +1,69 @@ +# Operations + +## Purpose +Extraction operations define the mode-specific behaviors for schema bootstrap, extraction job setup, and minor direct edits. All write behavior is expressed as MutationLogs associated with a knowledge graph and session. + +## Requirements + +### Requirement: Mode-Specific Skill Sets +The system SHALL provide different default skill sets for bootstrap and extraction operations modes. + +#### Scenario: Bootstrap skills +- GIVEN a knowledge graph in `schema_bootstrap` +- WHEN an extraction agent session starts +- THEN the default skill set is schema-bootstrap oriented +- AND it prioritizes complete entity/relationship modeling and prepopulated instance coverage + +#### Scenario: Extraction skills +- GIVEN a knowledge graph in `extraction_operations` +- WHEN an extraction agent session starts +- THEN the default skill set is extraction-job-setup and minor-direct-edit oriented +- AND schema edit skills remain available but are not the primary framing + +### Requirement: Skill Resolution Model +The system SHALL resolve agent skills using global templates with knowledge-graph overrides. + +#### Scenario: Global template with override +- GIVEN a knowledge graph with custom skill overrides +- WHEN an extraction session resolves skill instructions +- THEN global skill templates are loaded first +- AND knowledge-graph overrides are applied on top + +### Requirement: Unified Extraction and Manual Edit Surface +The system SHALL provide one operational area for extraction jobs and minor direct graph edits. + +#### Scenario: Unified write path +- GIVEN a user in extraction operations mode +- WHEN the user runs extraction jobs or performs minor direct edits +- THEN both behaviors emit MutationLogs +- AND both target the same knowledge graph + +### Requirement: Validate-Then-Transition Workflow +The system SHALL gate transition from bootstrap mode through explicit validation and user action. + +#### Scenario: Validation gate +- GIVEN a knowledge graph in `schema_bootstrap` +- WHEN the user clicks Validate +- THEN validation results are returned and persisted +- AND transition remains unavailable until checks pass + +#### Scenario: Explicit transition action +- GIVEN validation has passed in `schema_bootstrap` +- WHEN the user clicks "Go to Extraction/Mutations" +- THEN the knowledge graph transitions to `extraction_operations` +- AND a new extraction-mode agent session is started + +### Requirement: MutationLog Session Association +The system SHALL associate MutationLogs with both knowledge graph and session/run identity. + +#### Scenario: Session-linked mutation runs +- GIVEN a session producing mutation operations +- WHEN MutationLogs are persisted +- THEN each log run stores session ID, knowledge graph ID, actor identity, and timestamps + +#### Scenario: Per-run operation metrics +- GIVEN a persisted mutation log run +- WHEN metrics are recorded +- THEN operation counts are captured by operation class (for example create/update for entity and relationship instances) +- AND token usage and cost metrics are captured for the run + diff --git a/specs/graph/mutations.spec.md b/specs/graph/mutations.spec.md index 50a90da5c..22dce6d9e 100644 --- a/specs/graph/mutations.spec.md +++ b/specs/graph/mutations.spec.md @@ -157,3 +157,18 @@ The system SHALL enforce correct ordering of operations to maintain referential - AND DELETE operations run next (edges before nodes) - AND CREATE operations follow (nodes before edges) - AND UPDATE operations run last + +### Requirement: MutationLog Run Metadata +The system SHALL persist run-level metadata for mutation logs. + +#### Scenario: Session and scope association +- GIVEN mutations produced by an extraction or manual-edit session +- WHEN the mutation log run is persisted +- THEN the run is associated with session ID and knowledge graph ID +- AND actor identity and run timestamps are recorded + +#### Scenario: Metrics capture +- GIVEN a persisted mutation log run +- WHEN run metrics are finalized +- THEN token usage and cost totals are stored +- AND operation counts are stored by operation class diff --git a/specs/graph/schema-authoring.spec.md b/specs/graph/schema-authoring.spec.md new file mode 100644 index 000000000..0505852ef --- /dev/null +++ b/specs/graph/schema-authoring.spec.md @@ -0,0 +1,53 @@ +# Schema Authoring + +## Purpose +Schema authoring defines how entity and relationship type definitions are created and evolved in the graph through mutation logs. It supports a bootstrap flow for first-time schema establishment and ongoing schema evolution during extraction operations. + +## Requirements + +### Requirement: Graph-Native Type Definitions +The system SHALL treat graph-stored type definitions as the canonical schema source. + +#### Scenario: Canonical storage +- GIVEN schema mutations are applied +- WHEN entity and relationship type definitions are persisted +- THEN canonical schema state is stored in the graph schema layer +- AND no parallel "design artifact" source of truth is required + +### Requirement: Bootstrap Authoring Flow +The system SHALL support schema authoring during `schema_bootstrap` mode through mutation logs. + +#### Scenario: Bootstrap schema creation +- GIVEN a knowledge graph in `schema_bootstrap` +- WHEN an agent or user creates entity and relationship types +- THEN changes are written via mutation logs +- AND resulting graph schema reflects those mutations + +#### Scenario: Capabilities-driven start +- GIVEN a new bootstrap session +- WHEN the schema agent starts +- THEN it asks for user capabilities/goals +- AND it offers two paths: an immediate first-pass schema attempt, or guided question-by-question co-design + +### Requirement: Ongoing Schema Evolution +The system SHALL allow schema updates during `extraction_operations` mode. + +#### Scenario: Additive schema change in extraction mode +- GIVEN a knowledge graph in `extraction_operations` +- WHEN a user or agent adds a new property or type +- THEN the change is accepted through mutation logs +- AND extraction operations continue using the updated schema + +### Requirement: Prepopulated Type Semantics +The system SHALL enforce `prepopulated=true` as a transition-blocking readiness constraint. + +#### Scenario: Prepopulated type with instances +- GIVEN a type marked `prepopulated=true` +- WHEN readiness is evaluated +- THEN the type passes only if it has one or more instances + +#### Scenario: Prepopulated type without instances +- GIVEN a type marked `prepopulated=true` with zero instances +- WHEN readiness is evaluated +- THEN validation fails and transition to extraction mode is blocked + diff --git a/specs/index.spec.md b/specs/index.spec.md index a28e9c70e..5cce73fa4 100644 --- a/specs/index.spec.md +++ b/specs/index.spec.md @@ -29,6 +29,7 @@ The persistence and query engine for property graph data. | [Mutations](graph/mutations.spec.md) | Applying mutation logs to the graph | | [Queries](graph/queries.spec.md) | Reading nodes, edges, and subgraphs | | [Schema](graph/schema.spec.md) | Type definitions and schema management | +| [Schema Authoring](graph/schema-authoring.spec.md) | Bootstrap and ongoing schema authoring lifecycle | | [Bulk Loading](graph/bulk-loading.spec.md) | High-throughput graph ingestion | ### [Management](management/) — Control Plane @@ -37,6 +38,7 @@ CRUD for platform resources: knowledge graphs, data sources, credentials. | Spec | Scope | |------|-------| | [Knowledge Graphs](management/knowledge-graphs.spec.md) | Knowledge graph configuration lifecycle | +| [Knowledge Graph Workspace](management/knowledge-graph-workspace.spec.md) | Knowledge graph mode lifecycle and workspace status | | [Data Sources](management/data-sources.spec.md) | Data source configuration and sync runs | | [Credentials](management/credentials.spec.md) | Encrypted credential storage | @@ -56,6 +58,14 @@ Connecting to external sources, detecting changes, and packaging raw content for | [Adapters](ingestion/adapters.spec.md) | Adapter port, GitHub adapter, dlt framework integration | | [Sync Lifecycle](ingestion/sync-lifecycle.spec.md) | Event-driven state machine, status tracking, staleness detection | +### [Extraction](extraction/) — Agent-Orchestrated Mutation Production +AI-assisted schema and extraction workflows that emit MutationLogs for Graph application. + +| Spec | Scope | +|------|-------| +| [Operations](extraction/operations.spec.md) | Mode-specific agent operations and mutation-log production | +| [Agent Sessions](extraction/agent-sessions.spec.md) | Session lifecycle, reset behavior, and session metrics | + ### [Shared Kernel](shared-kernel/) — Cross-Cutting Contracts Capabilities shared across bounded contexts. @@ -88,3 +98,4 @@ The web interface for platform setup, data source management, and graph explorat | [CORS](nfr/cors.spec.md) | Cross-origin resource sharing policy | | [Application Lifecycle](nfr/application-lifecycle.spec.md) | Startup bootstrap, shutdown, default configuration | | [API Conventions](nfr/api-conventions.spec.md) | URL structure, status codes, error format, request/response models | +| [Workload Execution](nfr/workload-execution.spec.md) | Container execution model, credential injection, and workload isolation | diff --git a/specs/management/knowledge-graph-workspace.spec.md b/specs/management/knowledge-graph-workspace.spec.md new file mode 100644 index 000000000..a3ed74b00 --- /dev/null +++ b/specs/management/knowledge-graph-workspace.spec.md @@ -0,0 +1,60 @@ +# Knowledge Graph Workspace + +## Purpose +A knowledge graph workspace provides a mode-aware control surface for progressing from initial schema bootstrap to ongoing extraction and mutation operations. It exposes lifecycle state, readiness checks, and navigation contracts consumed by the UI and extraction agents. + +## Requirements + +### Requirement: Workspace Mode Lifecycle +The system SHALL track each knowledge graph in one of two modes: `schema_bootstrap` and `extraction_operations`. + +#### Scenario: Default mode on creation +- GIVEN a newly created knowledge graph +- WHEN the knowledge graph record is persisted +- THEN its workspace mode is `schema_bootstrap` + +#### Scenario: Irreversible transition +- GIVEN a knowledge graph in `schema_bootstrap` +- WHEN the user completes validation and transitions to extraction operations +- THEN the mode changes to `extraction_operations` +- AND the mode cannot be changed back to `schema_bootstrap` + +### Requirement: Workspace Status Projection +The system SHALL expose a knowledge-graph workspace status projection for UI rendering. + +#### Scenario: Status includes mode and readiness +- GIVEN a knowledge graph workspace request +- WHEN the status projection is returned +- THEN it includes current mode, validation readiness flags, and a transition eligibility flag + +#### Scenario: Status includes session pointers +- GIVEN one or more extraction agent sessions associated with the knowledge graph +- WHEN the status projection is returned +- THEN it includes pointers to the current active session per mode and the most recent completed session + +### Requirement: Bootstrap Readiness Validation +The system SHALL define schema bootstrap readiness checks for transition eligibility. + +#### Scenario: Minimum schema readiness +- GIVEN a knowledge graph in `schema_bootstrap` +- WHEN readiness is evaluated +- THEN validation fails unless there is at least one entity type and at least one relationship type + +#### Scenario: Prepopulated instance readiness +- GIVEN one or more types marked `prepopulated=true` +- WHEN readiness is evaluated +- THEN validation fails if any such type has zero instances + +### Requirement: Transition Authorization +The system SHALL require `edit` permission on the knowledge graph for bootstrap validation and mode transition. + +#### Scenario: Authorized validate and transition +- GIVEN a user with `edit` permission on the knowledge graph +- WHEN the user invokes validate and transition actions +- THEN both actions are permitted + +#### Scenario: Unauthorized validate and transition +- GIVEN a user without `edit` permission on the knowledge graph +- WHEN the user invokes validate or transition actions +- THEN the action is rejected with a forbidden error + diff --git a/specs/nfr/workload-execution.spec.md b/specs/nfr/workload-execution.spec.md new file mode 100644 index 000000000..ce2149997 --- /dev/null +++ b/specs/nfr/workload-execution.spec.md @@ -0,0 +1,59 @@ +# Workload Execution + +NFR: This spec describes execution, isolation, and credential-injection constraints for agent workloads. + +## Purpose +Kartograph executes extraction agent workloads in containers with a hybrid model: sticky conversational containers per session and ephemeral worker containers for extraction execution. Runtime credentials are injected securely and scoped with least privilege. + +## Requirements + +### Requirement: Container-Only Agent Runtime +The system SHALL run extraction agents in containers for both local development and deployed environments. + +#### Scenario: Local development execution +- GIVEN local development workflows +- WHEN extraction agents are started +- THEN they run inside local containers rather than host-native processes + +#### Scenario: Deployed execution +- GIVEN a deployed environment +- WHEN extraction workloads are started +- THEN they run in pod containers managed by the platform + +### Requirement: Hybrid Container Model +The system SHALL use sticky containers for chat sessions and ephemeral containers for extraction execution workers. + +#### Scenario: Sticky session container +- GIVEN a user starts an extraction chat session +- WHEN the session remains active +- THEN the session reuses the same container context until clear/reset or timeout + +#### Scenario: Ephemeral execution workers +- GIVEN extraction jobs are launched +- WHEN worker tasks execute +- THEN they run in ephemeral worker containers +- AND worker containers are terminated after job completion or failure + +### Requirement: Runtime Credential Injection +The system SHALL provide runtime credentials to agent containers through secure injection. + +#### Scenario: Workload authentication material +- GIVEN a workload container requires access to platform services +- WHEN the workload starts +- THEN short-lived authentication credentials are injected at runtime +- AND credentials are not hardcoded in repository files, container images, or mutation logs + +#### Scenario: Least-privilege scope +- GIVEN an extraction workload for a knowledge graph +- WHEN credentials are issued +- THEN permissions are limited to required tenant and knowledge-graph scope operations + +### Requirement: Skill and Context Availability +The system SHALL provide required runtime context in workload containers. + +#### Scenario: Built-in context +- GIVEN an extraction workload container +- WHEN the workload initializes +- THEN ingestion context resources and repository files needed for processing are available +- AND the skills directory is available to the agent runtime + diff --git a/specs/ui/experience.spec.md b/specs/ui/experience.spec.md index eb43171e4..373d727e7 100644 --- a/specs/ui/experience.spec.md +++ b/specs/ui/experience.spec.md @@ -511,3 +511,63 @@ The system SHALL support light and dark color schemes. - GIVEN the user interface - THEN a dark mode toggle is available in the header - AND the preference persists across sessions + +### Requirement: Knowledge Graph Manage Actions +The system SHALL expose knowledge graph row actions as Manage, Query, and Delete. + +#### Scenario: Knowledge graph action set +- GIVEN the knowledge graph list +- THEN each knowledge graph row shows actions for Manage, Query, and Delete +- AND legacy actions not in this set are not shown in the row action cluster + +#### Scenario: Manage navigation +- GIVEN a user clicks Manage on a knowledge graph row +- WHEN navigation completes +- THEN the user lands on that knowledge graph's mode-aware workspace page + +### Requirement: Bootstrap to Extraction Transition +The system SHALL provide a UI-gated transition from schema bootstrap mode to extraction operations mode. + +#### Scenario: Validate action +- GIVEN a user with `edit` permission on a knowledge graph in bootstrap mode +- WHEN the user clicks Validate +- THEN validation results are displayed in the workspace +- AND transition action remains unavailable until validation passes + +#### Scenario: Go to extraction action +- GIVEN bootstrap validation has passed +- WHEN the user clicks "Go to Extraction/Mutations" +- THEN the UI transitions the knowledge graph into extraction operations mode +- AND a new extraction-mode agent session is started + +### Requirement: Unified Extraction Workspace +The system SHALL present extraction jobs and minor direct edits in one workspace. + +#### Scenario: Conversation-first layout +- GIVEN a user in extraction operations mode +- THEN the conversation panel remains visible as the primary surface +- AND the lower workspace area is tabbed for operational views + +#### Scenario: Clear chat reset +- GIVEN an active extraction conversation session +- WHEN the user clicks Clear chat +- THEN the current chat history is cleared +- AND a new clean session is started for the same user and knowledge graph + +#### Scenario: Tabbed operations area +- GIVEN the extraction workspace +- WHEN the user switches tabs +- THEN extraction-job controls, manual mutation tools, and run/log views are available without leaving the page + +### Requirement: MutationLog Browser +The system SHALL provide a knowledge-graph-scoped MutationLog browser. + +#### Scenario: Scoped listing +- GIVEN the user is viewing a specific knowledge graph +- WHEN the user opens MutationLogs +- THEN only mutation log runs associated with that knowledge graph are listed + +#### Scenario: Run detail panel +- GIVEN a mutation log run is selected +- WHEN details are shown +- THEN the UI displays run summary, per-entry operation previews, token/cost metrics, and operation counts by type From fcdbe4dd1b8b7ab2bdc3bf77f81b75fef9644250 Mon Sep 17 00:00:00 2001 From: aredenba-rh Date: Wed, 20 May 2026 01:31:53 -0400 Subject: [PATCH 002/153] minor edits to specs; github issues created --- specs/ingestion/sync-lifecycle.spec.md | 22 +++++++++++++++++++ specs/management/data-sources.spec.md | 30 ++++++++++++++++++++++++++ specs/ui/experience.spec.md | 18 ++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/specs/ingestion/sync-lifecycle.spec.md b/specs/ingestion/sync-lifecycle.spec.md index c2713ec96..07fe77bef 100644 --- a/specs/ingestion/sync-lifecycle.spec.md +++ b/specs/ingestion/sync-lifecycle.spec.md @@ -70,6 +70,28 @@ The system SHALL support both manual and scheduled sync triggers. - WHEN the schedule fires - THEN a sync is initiated as if manually triggered +### Requirement: Commit-Baseline-Aware Ingestion +The system SHALL maintain commit-aware ingestion context for Git-backed sources. + +#### Scenario: Baseline at extraction start +- GIVEN a Git-backed data source with a local clone +- WHEN a sync run starts +- THEN the run baseline is set to `commit_during_last_extraction` +- AND incremental extraction compares current source state against that baseline + +#### Scenario: Branch head refresh for ingestion readiness +- GIVEN a Git-backed data source with a tracked branch +- WHEN sync orchestration prepares ingestion context +- THEN the latest tracked branch HEAD is resolved and stored as `tracked_branch_head_commit` +- AND ingestion context for that run is prepared from the corresponding latest files + +#### Scenario: No-new-commit outcome +- GIVEN `tracked_branch_head_commit` equals `commit_during_last_extraction` +- WHEN a sync run is requested +- THEN the system may short-circuit heavy extraction work +- AND a sync run record is still created for auditability +- AND run status and logs indicate no source changes were detected + ### Requirement: Staleness-Based Node Lifecycle The system SHALL use timestamp comparison to detect stale graph nodes instead of explicit delete events. diff --git a/specs/management/data-sources.spec.md b/specs/management/data-sources.spec.md index fe056dceb..da21a5ce3 100644 --- a/specs/management/data-sources.spec.md +++ b/specs/management/data-sources.spec.md @@ -126,6 +126,36 @@ The system SHALL track the execution status of each sync operation. - WHEN the data source is deleted - THEN all associated sync runs are cascade-deleted +### Requirement: Source Commit Reference Tracking +The system SHALL track source-repository commit references for Git-based data sources. + +#### Scenario: Local clone commit tracking +- GIVEN a Git-backed data source with a local clone available to ingestion tooling +- WHEN source commit references are refreshed +- THEN a clone-head commit reference is recorded as the ingestion clone HEAD for the tracked branch + +#### Scenario: Commit during last extraction tracking +- GIVEN a sync run starts for a Git-backed data source +- WHEN extraction begins +- THEN a last-extraction baseline commit reference is recorded from local clone state at run start +- AND this value remains fixed for that run even if branch HEAD changes later + +#### Scenario: Tracked branch head commit tracking +- GIVEN a Git-backed data source configured with a tracked branch +- WHEN source commit references are refreshed +- THEN a tracked-branch head commit reference is recorded from the latest known remote branch HEAD + +#### Scenario: UI label compatibility +- GIVEN commit references are displayed in the UI +- WHEN labels are rendered +- THEN labels may use either legacy terms ("Local clone commit", "Commit during last extraction") or clearer equivalents +- AND displayed labels map unambiguously to clone-head, last-extraction-baseline, and tracked-branch-head references + +#### Scenario: Adapter scope +- GIVEN a non-Git adapter type +- WHEN source commit references are requested +- THEN Git-specific commit fields are absent or null + ### Requirement: Adapter Connection Config Normalization Each adapter SHALL accept user-friendly connection parameters and normalize them internally. diff --git a/specs/ui/experience.spec.md b/specs/ui/experience.spec.md index 373d727e7..dd1f091d1 100644 --- a/specs/ui/experience.spec.md +++ b/specs/ui/experience.spec.md @@ -144,6 +144,24 @@ The system SHALL show sync progress and status for each data source. - WHEN the user triggers a sync - THEN a new sync run begins and progress is shown +#### Scenario: Commit-hash status cues +- GIVEN a Git-backed data source card in the UI +- WHEN commit reference data is available +- THEN the UI displays `Local clone commit`, `Commit during last extraction`, and tracked branch head commit values +- AND the UI visually indicates whether new commits are available since the last extraction baseline + +#### Scenario: Maintenance-readiness cue +- GIVEN a Git-backed data source where tracked branch head differs from commit during last extraction +- WHEN the user views data source status +- THEN the UI highlights that maintenance/extraction work can be run for new source changes + +#### Scenario: Diff summary cue +- GIVEN a Git-backed data source with commit references for baseline and latest tracked branch head +- WHEN the user opens sync/maintenance details +- THEN the UI shows a diff summary relative to the last extraction baseline suitable for deciding whether to run maintenance +- AND the summary includes aggregate counts and a changed-file list +- AND the changed-file list is collapsed by default and expanded on demand to avoid overwhelming the page + ### Requirement: Get Started Querying (MCP Connection) The system SHALL make it easy for users to connect AI agents to their knowledge graph via MCP. From d48c322673e768de69e7fb0f42f7989d25a05510 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 01:52:10 -0400 Subject: [PATCH 003/153] feat(management): add workspace mode lifecycle (#643) (#680) * chore(skills): add subagent delivery execution protocol Add a reusable subagent skill that standardizes issue-based branching, TDD execution, PR structure, and merge/conflict handling into feature/manage-knowledge-graph. Co-authored-by: Cursor * feat(management): add knowledge graph workspace mode lifecycle Implement schema_bootstrap as the default workspace mode and persist irreversible transition state to extraction_operations across domain, repository, API responses, and migration coverage. Co-authored-by: Cursor --------- Co-authored-by: Cursor --- skills/subagent-delivery/SKILL.md | 83 +++++++++++++++++++ ..._add_workspace_mode_to_knowledge_graphs.py | 39 +++++++++ .../domain/aggregates/knowledge_graph.py | 18 +++- src/api/management/domain/exceptions.py | 6 ++ src/api/management/domain/value_objects.py | 7 ++ .../infrastructure/models/knowledge_graph.py | 7 ++ .../knowledge_graph_repository.py | 9 +- .../presentation/knowledge_graphs/models.py | 6 ++ .../test_knowledge_graph_repository.py | 27 ++++++ .../test_knowledge_graphs_routes.py | 7 +- .../unit/management/test_knowledge_graph.py | 36 +++++++- 11 files changed, 241 insertions(+), 4 deletions(-) create mode 100644 skills/subagent-delivery/SKILL.md create mode 100644 src/api/infrastructure/migrations/versions/f4a5b6c7d8e9_add_workspace_mode_to_knowledge_graphs.py diff --git a/skills/subagent-delivery/SKILL.md b/skills/subagent-delivery/SKILL.md new file mode 100644 index 000000000..0273dba82 --- /dev/null +++ b/skills/subagent-delivery/SKILL.md @@ -0,0 +1,83 @@ +--- +name: subagent-delivery +description: > + Executes a GitHub issue end-to-end with consistent branch, test, PR, and merge behavior. + Use when implementing units of work with sub-agents, preparing pull requests, resolving merge + conflicts, or when the user asks to run issue-by-issue delivery into feature/manage-knowledge-graph. +--- + +# Subagent Delivery Protocol + +Follow this protocol for every assigned issue. + +## Scope and Inputs + +Before coding, gather: + +1. Issue number and acceptance criteria. +2. Target branch: `feature/manage-knowledge-graph`. +3. Current repository state (`git status`, `git branch -vv`). + +If acceptance criteria are ambiguous, ask one focused question before implementation. + +## Git Workflow + +1. Ensure local target branch is up to date: + - `git checkout feature/manage-knowledge-graph` + - `git pull --ff-only` +2. Create a dedicated branch per issue: + - `feat/issue--` for features + - `fix/issue--` for fixes +3. Never mix multiple issues in one branch. +4. Keep commits atomic and conventional (`feat:`, `fix:`, `refactor:`, `test:`). + +## Implementation Workflow (TDD Required) + +1. Read relevant spec(s) and affected bounded context code first. +2. Write/adjust tests for expected behavior before implementation. +3. Implement minimal code to satisfy tests. +4. Run focused tests first, then broader suite for touched context. +5. Run lints/type checks for changed files when applicable. +6. If behavior depends on configuration, use settings/DI instead of hardcoding. + +## PR Workflow + +1. Push branch to origin with upstream tracking. +2. Open PR against `feature/manage-knowledge-graph`. +3. Use this body structure: + +```markdown +## Summary +- +- + +## Testing +- [x] +- [x] +- [ ] + +## Risks +- or +``` + +4. Link the issue in PR body using `Closes #` when appropriate. + +## Merge and Conflict Handling + +1. Before merge, ensure CI checks are green. +2. If branch is stale, rebase or merge target branch cleanly. +3. Resolve conflicts preserving: + - Spec-required behavior + - Existing user changes + - Authorization and tenancy boundaries +4. Re-run tests after conflict resolution. +5. Merge into `feature/manage-knowledge-graph` only after verification. + +## Non-Negotiables + +- Do not use destructive git commands. +- Do not skip tests. +- Do not disable hooks. +- Do not commit secrets or credentials. +- Prefer fakes over mocks in unit tests when testing domain/application behavior. + diff --git a/src/api/infrastructure/migrations/versions/f4a5b6c7d8e9_add_workspace_mode_to_knowledge_graphs.py b/src/api/infrastructure/migrations/versions/f4a5b6c7d8e9_add_workspace_mode_to_knowledge_graphs.py new file mode 100644 index 000000000..98ef99082 --- /dev/null +++ b/src/api/infrastructure/migrations/versions/f4a5b6c7d8e9_add_workspace_mode_to_knowledge_graphs.py @@ -0,0 +1,39 @@ +"""add workspace_mode to knowledge_graphs + +Adds lifecycle mode tracking to KnowledgeGraph records with a non-null +default of ``schema_bootstrap``. + +Revision ID: f4a5b6c7d8e9 +Revises: e2f3a4b5c6d7 +Create Date: 2026-05-14 12:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "f4a5b6c7d8e9" +down_revision: Union[str, Sequence[str], None] = "e2f3a4b5c6d7" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Add workspace_mode with bootstrap default for existing rows.""" + op.add_column( + "knowledge_graphs", + sa.Column( + "workspace_mode", + sa.String(length=64), + nullable=False, + server_default="schema_bootstrap", + ), + ) + + +def downgrade() -> None: + """Drop workspace_mode from knowledge_graphs.""" + op.drop_column("knowledge_graphs", "workspace_mode") diff --git a/src/api/management/domain/aggregates/knowledge_graph.py b/src/api/management/domain/aggregates/knowledge_graph.py index 63d10cefb..542d6eebd 100644 --- a/src/api/management/domain/aggregates/knowledge_graph.py +++ b/src/api/management/domain/aggregates/knowledge_graph.py @@ -15,12 +15,17 @@ AggregateDeletedError, InvalidIdentifierError, InvalidKnowledgeGraphNameError, + InvalidWorkspaceModeTransitionError, ) from management.domain.observability import ( DefaultKnowledgeGraphProbe, KnowledgeGraphProbe, ) -from management.domain.value_objects import KnowledgeGraphId, OntologyConfig +from management.domain.value_objects import ( + KnowledgeGraphId, + OntologyConfig, + WorkspaceMode, +) if TYPE_CHECKING: from management.domain.events import DomainEvent @@ -51,6 +56,7 @@ class KnowledgeGraph: created_at: datetime updated_at: datetime ontology: OntologyConfig | None = field(default=None) + workspace_mode: WorkspaceMode = field(default=WorkspaceMode.SCHEMA_BOOTSTRAP) _pending_events: list[DomainEvent] = field(default_factory=list, repr=False) _probe: KnowledgeGraphProbe = field( default_factory=DefaultKnowledgeGraphProbe, @@ -63,6 +69,7 @@ def __post_init__(self) -> None: self._validate_name(self.name) self._validate_identifier(self.tenant_id, "tenant_id") self._validate_identifier(self.workspace_id, "workspace_id") + self.workspace_mode = WorkspaceMode(self.workspace_mode) def _validate_name(self, name: str) -> None: """Validate knowledge graph name length. @@ -230,6 +237,15 @@ def clear_ontology(self) -> None: self.ontology = None self.updated_at = datetime.now(UTC) + def transition_to_extraction_operations(self) -> None: + """Transition workspace mode from bootstrap to extraction operations.""" + if self.workspace_mode == WorkspaceMode.EXTRACTION_OPERATIONS: + raise InvalidWorkspaceModeTransitionError( + "Workspace mode is already extraction_operations" + ) + self.workspace_mode = WorkspaceMode.EXTRACTION_OPERATIONS + self.updated_at = datetime.now(UTC) + def mark_for_deletion( self, *, diff --git a/src/api/management/domain/exceptions.py b/src/api/management/domain/exceptions.py index 3f225fdf2..06b4f35a7 100644 --- a/src/api/management/domain/exceptions.py +++ b/src/api/management/domain/exceptions.py @@ -29,3 +29,9 @@ class InvalidIdentifierError(Exception): """Raised when a cross-context identifier (tenant_id, workspace_id, etc.) is empty or whitespace.""" pass + + +class InvalidWorkspaceModeTransitionError(Exception): + """Raised when a workspace mode transition is invalid.""" + + pass diff --git a/src/api/management/domain/value_objects.py b/src/api/management/domain/value_objects.py index 0fc20b7ab..eddf2dedb 100644 --- a/src/api/management/domain/value_objects.py +++ b/src/api/management/domain/value_objects.py @@ -94,6 +94,13 @@ class ScheduleType(StrEnum): INTERVAL = "interval" +class WorkspaceMode(StrEnum): + """Lifecycle mode of a knowledge-graph workspace.""" + + SCHEMA_BOOTSTRAP = "schema_bootstrap" + EXTRACTION_OPERATIONS = "extraction_operations" + + @dataclass(frozen=True) class Schedule: """Schedule configuration for data source synchronization. diff --git a/src/api/management/infrastructure/models/knowledge_graph.py b/src/api/management/infrastructure/models/knowledge_graph.py index 36a1d70bd..51125f721 100644 --- a/src/api/management/infrastructure/models/knowledge_graph.py +++ b/src/api/management/infrastructure/models/knowledge_graph.py @@ -10,6 +10,7 @@ from sqlalchemy.orm import Mapped, mapped_column from infrastructure.database.models import Base, TimestampMixin +from management.domain.value_objects import WorkspaceMode class KnowledgeGraphModel(Base, TimestampMixin): @@ -30,6 +31,12 @@ class KnowledgeGraphModel(Base, TimestampMixin): workspace_id: Mapped[str] = mapped_column(String(26), nullable=False) name: Mapped[str] = mapped_column(String(255), nullable=False) description: Mapped[str] = mapped_column(sa.Text, nullable=False) + workspace_mode: Mapped[str] = mapped_column( + String(64), + nullable=False, + default=WorkspaceMode.SCHEMA_BOOTSTRAP.value, + server_default=WorkspaceMode.SCHEMA_BOOTSTRAP.value, + ) ontology: Mapped[dict | None] = mapped_column(JSONB, nullable=True, default=None) __table_args__ = ( diff --git a/src/api/management/infrastructure/repositories/knowledge_graph_repository.py b/src/api/management/infrastructure/repositories/knowledge_graph_repository.py index abb5aff83..432ad7699 100644 --- a/src/api/management/infrastructure/repositories/knowledge_graph_repository.py +++ b/src/api/management/infrastructure/repositories/knowledge_graph_repository.py @@ -13,7 +13,11 @@ from sqlalchemy.ext.asyncio import AsyncSession from management.domain.aggregates import KnowledgeGraph -from management.domain.value_objects import KnowledgeGraphId, OntologyConfig +from management.domain.value_objects import ( + KnowledgeGraphId, + OntologyConfig, + WorkspaceMode, +) from management.infrastructure.models import KnowledgeGraphModel from management.infrastructure.observability import ( DefaultKnowledgeGraphRepositoryProbe, @@ -67,6 +71,7 @@ async def save(self, knowledge_graph: KnowledgeGraph) -> None: if model: model.name = knowledge_graph.name model.description = knowledge_graph.description + model.workspace_mode = knowledge_graph.workspace_mode.value model.updated_at = knowledge_graph.updated_at else: model = KnowledgeGraphModel( @@ -75,6 +80,7 @@ async def save(self, knowledge_graph: KnowledgeGraph) -> None: workspace_id=knowledge_graph.workspace_id, name=knowledge_graph.name, description=knowledge_graph.description, + workspace_mode=knowledge_graph.workspace_mode.value, created_at=knowledge_graph.created_at, updated_at=knowledge_graph.updated_at, ) @@ -219,4 +225,5 @@ def _to_domain(self, model: KnowledgeGraphModel) -> KnowledgeGraph: created_at=model.created_at, updated_at=model.updated_at, ontology=ontology, + workspace_mode=WorkspaceMode(model.workspace_mode), ) diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index 4594c6427..eccffc525 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -11,6 +11,7 @@ EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig, + WorkspaceMode, ) @@ -71,6 +72,10 @@ class KnowledgeGraphResponse(BaseModel): workspace_id: str = Field(..., description="Workspace ID this KG belongs to") name: str = Field(..., description="Knowledge graph name") description: str = Field(..., description="Knowledge graph description") + workspace_mode: WorkspaceMode = Field( + ..., + description="Workspace lifecycle mode for this knowledge graph", + ) created_at: datetime = Field(..., description="When the KG was created") updated_at: datetime = Field(..., description="When the KG was last updated") @@ -90,6 +95,7 @@ def from_domain(cls, kg: KnowledgeGraph) -> KnowledgeGraphResponse: workspace_id=kg.workspace_id, name=kg.name, description=kg.description, + workspace_mode=kg.workspace_mode, created_at=kg.created_at, updated_at=kg.updated_at, ) diff --git a/src/api/tests/integration/management/test_knowledge_graph_repository.py b/src/api/tests/integration/management/test_knowledge_graph_repository.py index 66cac6197..d7e6b7c56 100644 --- a/src/api/tests/integration/management/test_knowledge_graph_repository.py +++ b/src/api/tests/integration/management/test_knowledge_graph_repository.py @@ -22,6 +22,7 @@ KnowledgeGraphRepository, ) from management.ports.exceptions import DuplicateKnowledgeGraphNameError +from management.domain.value_objects import WorkspaceMode from shared_kernel.datasource_types import DataSourceAdapterType pytestmark = pytest.mark.integration @@ -84,6 +85,32 @@ async def test_saves_and_retrieves_with_description( assert retrieved is not None assert retrieved.description == "" + @pytest.mark.asyncio + async def test_saves_and_retrieves_workspace_mode( + self, + knowledge_graph_repository: KnowledgeGraphRepository, + async_session, + test_tenant: str, + test_workspace: str, + clean_management_data, + ): + """Should persist workspace mode transition state.""" + kg = KnowledgeGraph.create( + tenant_id=test_tenant, + workspace_id=test_workspace, + name="Workspace Mode KG", + description="Tracks mode lifecycle", + ) + kg.transition_to_extraction_operations() + + async with async_session.begin(): + await knowledge_graph_repository.save(kg) + + retrieved = await knowledge_graph_repository.get_by_id(kg.id) + + assert retrieved is not None + assert retrieved.workspace_mode == WorkspaceMode.EXTRACTION_OPERATIONS + class TestKnowledgeGraphUpdate: """Tests for updating knowledge graphs.""" diff --git a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py index 4c5e6c009..31806882e 100644 --- a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py +++ b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py @@ -19,7 +19,7 @@ KnowledgeGraphService, ) from management.domain.aggregates import KnowledgeGraph -from management.domain.value_objects import KnowledgeGraphId +from management.domain.value_objects import KnowledgeGraphId, WorkspaceMode from management.ports.exceptions import ( DuplicateKnowledgeGraphNameError, KnowledgeGraphNotFoundError, @@ -100,6 +100,10 @@ def test_list_knowledge_graphs_returns_200( assert len(result["knowledge_graphs"]) == 1 assert result["knowledge_graphs"][0]["id"] == sample_knowledge_graph.id.value assert result["knowledge_graphs"][0]["name"] == sample_knowledge_graph.name + assert ( + result["knowledge_graphs"][0]["workspace_mode"] + == WorkspaceMode.SCHEMA_BOOTSTRAP.value + ) def test_list_knowledge_graphs_calls_list_all_with_view_permission_by_default( self, @@ -254,6 +258,7 @@ def test_get_knowledge_graph_returns_200( assert result["description"] == sample_knowledge_graph.description assert result["tenant_id"] == sample_knowledge_graph.tenant_id assert result["workspace_id"] == sample_knowledge_graph.workspace_id + assert result["workspace_mode"] == WorkspaceMode.SCHEMA_BOOTSTRAP.value def test_get_knowledge_graph_calls_service_with_user_id( self, diff --git a/src/api/tests/unit/management/test_knowledge_graph.py b/src/api/tests/unit/management/test_knowledge_graph.py index 01ae468f3..c30d5e0b0 100644 --- a/src/api/tests/unit/management/test_knowledge_graph.py +++ b/src/api/tests/unit/management/test_knowledge_graph.py @@ -17,11 +17,12 @@ AggregateDeletedError, InvalidIdentifierError, InvalidKnowledgeGraphNameError, + InvalidWorkspaceModeTransitionError, ) from management.domain.observability import ( KnowledgeGraphProbe, ) -from management.domain.value_objects import KnowledgeGraphId +from management.domain.value_objects import KnowledgeGraphId, WorkspaceMode class TestKnowledgeGraphCreate: @@ -43,6 +44,7 @@ def test_create_sets_all_fields(self): assert isinstance(kg.created_at, datetime) assert isinstance(kg.updated_at, datetime) assert kg.created_at == kg.updated_at + assert kg.workspace_mode == WorkspaceMode.SCHEMA_BOOTSTRAP def test_create_generates_unique_id(self): """Each create() call should generate a unique ID.""" @@ -219,6 +221,38 @@ def test_update_raises_after_deletion(self): kg.update(name="Should fail", description="") +class TestKnowledgeGraphWorkspaceMode: + """Tests for workspace mode lifecycle transitions.""" + + def _create_kg(self, **kwargs): + defaults = { + "tenant_id": "t", + "workspace_id": "w", + "name": "Original", + "description": "Original desc", + } + defaults.update(kwargs) + kg = KnowledgeGraph.create(**defaults) + kg.collect_events() + return kg + + def test_transition_to_extraction_operations(self): + """Transition should move mode to extraction_operations.""" + kg = self._create_kg() + + kg.transition_to_extraction_operations() + + assert kg.workspace_mode == WorkspaceMode.EXTRACTION_OPERATIONS + + def test_transition_is_irreversible(self): + """Transitioning after extraction_operations should fail.""" + kg = self._create_kg() + kg.transition_to_extraction_operations() + + with pytest.raises(InvalidWorkspaceModeTransitionError): + kg.transition_to_extraction_operations() + + class TestKnowledgeGraphMarkForDeletion: """Tests for KnowledgeGraph.mark_for_deletion() method.""" From 75d58e5b8cf5eb3cfecd455e298de8191ecb2abb Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 01:55:35 -0400 Subject: [PATCH 004/153] feat(management): expose knowledge graph workspace status projection (#681) Add a workspace-status API projection with mode, readiness flags, transition eligibility, and session pointers, including service and route authorization coverage for manage workspace rendering. Co-authored-by: Cursor --- .../services/knowledge_graph_service.py | 55 +++++++++++++- src/api/management/domain/value_objects.py | 38 ++++++++++ .../presentation/knowledge_graphs/models.py | 63 ++++++++++++++++ .../presentation/knowledge_graphs/routes.py | 38 ++++++++++ .../test_knowledge_graph_service.py | 75 +++++++++++++++++++ .../test_knowledge_graphs_routes.py | 66 +++++++++++++++- 6 files changed, 333 insertions(+), 2 deletions(-) diff --git a/src/api/management/application/services/knowledge_graph_service.py b/src/api/management/application/services/knowledge_graph_service.py index e32bac3b7..2de278904 100644 --- a/src/api/management/application/services/knowledge_graph_service.py +++ b/src/api/management/application/services/knowledge_graph_service.py @@ -14,7 +14,14 @@ KnowledgeGraphServiceProbe, ) from management.domain.aggregates import KnowledgeGraph -from management.domain.value_objects import KnowledgeGraphId, OntologyConfig +from management.domain.value_objects import ( + KnowledgeGraphId, + KnowledgeGraphWorkspaceStatus, + OntologyConfig, + WorkspaceMode, + WorkspaceReadinessStatus, + WorkspaceSessionPointers, +) from management.ports.exceptions import ( DuplicateKnowledgeGraphNameError, KnowledgeGraphNotFoundError, @@ -580,3 +587,49 @@ async def save_ontology( await self._session.commit() return config + + def _evaluate_workspace_readiness( + self, kg: KnowledgeGraph + ) -> WorkspaceReadinessStatus: + """Evaluate transition readiness flags for workspace status projection.""" + node_type_count = len(kg.ontology.node_types) if kg.ontology else 0 + edge_type_count = len(kg.ontology.edge_types) if kg.ontology else 0 + + # Prepopulated-instance validation is delivered by later units of work. + return WorkspaceReadinessStatus( + has_minimum_entity_types=node_type_count >= 1, + has_minimum_relationship_types=edge_type_count >= 1, + prepopulated_types_ready=True, + ) + + async def get_workspace_status( + self, + user_id: str, + kg_id: str, + ) -> KnowledgeGraphWorkspaceStatus | None: + """Get mode/readiness/session projection for a knowledge graph workspace.""" + kg = await self._kg_repo.get_by_id(KnowledgeGraphId(value=kg_id)) + if kg is None or kg.tenant_id != self._scope_to_tenant: + return None + + has_view = await self._check_permission( + user_id=user_id, + resource_type=ResourceType.KNOWLEDGE_GRAPH, + resource_id=kg_id, + permission=Permission.VIEW, + ) + if not has_view: + return None + + readiness = self._evaluate_workspace_readiness(kg) + transition_eligible = ( + kg.workspace_mode == WorkspaceMode.SCHEMA_BOOTSTRAP and readiness.is_ready + ) + + return KnowledgeGraphWorkspaceStatus( + knowledge_graph_id=kg.id.value, + workspace_mode=kg.workspace_mode, + readiness=readiness, + transition_eligible=transition_eligible, + session_pointers=WorkspaceSessionPointers(), + ) diff --git a/src/api/management/domain/value_objects.py b/src/api/management/domain/value_objects.py index eddf2dedb..415d6a40e 100644 --- a/src/api/management/domain/value_objects.py +++ b/src/api/management/domain/value_objects.py @@ -101,6 +101,44 @@ class WorkspaceMode(StrEnum): EXTRACTION_OPERATIONS = "extraction_operations" +@dataclass(frozen=True) +class WorkspaceReadinessStatus: + """Readiness flags used to determine bootstrap transition eligibility.""" + + has_minimum_entity_types: bool + has_minimum_relationship_types: bool + prepopulated_types_ready: bool + + @property + def is_ready(self) -> bool: + """Return true when all readiness checks pass.""" + return ( + self.has_minimum_entity_types + and self.has_minimum_relationship_types + and self.prepopulated_types_ready + ) + + +@dataclass(frozen=True) +class WorkspaceSessionPointers: + """Session pointers projected for workspace status UIs.""" + + active_schema_bootstrap_session_id: str | None = None + active_extraction_operations_session_id: str | None = None + most_recent_completed_session_id: str | None = None + + +@dataclass(frozen=True) +class KnowledgeGraphWorkspaceStatus: + """Workspace status projection for a knowledge graph.""" + + knowledge_graph_id: str + workspace_mode: WorkspaceMode + readiness: WorkspaceReadinessStatus + transition_eligible: bool + session_pointers: WorkspaceSessionPointers + + @dataclass(frozen=True) class Schedule: """Schedule configuration for data source synchronization. diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index eccffc525..7ca060a1c 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -9,8 +9,11 @@ from management.domain.aggregates import KnowledgeGraph from management.domain.value_objects import ( EdgeTypeDefinition, + KnowledgeGraphWorkspaceStatus, NodeTypeDefinition, OntologyConfig, + WorkspaceReadinessStatus, + WorkspaceSessionPointers, WorkspaceMode, ) @@ -101,6 +104,66 @@ def from_domain(cls, kg: KnowledgeGraph) -> KnowledgeGraphResponse: ) +class WorkspaceReadinessResponse(BaseModel): + """Workspace readiness flags for bootstrap transition.""" + + has_minimum_entity_types: bool + has_minimum_relationship_types: bool + prepopulated_types_ready: bool + + @classmethod + def from_domain(cls, readiness: WorkspaceReadinessStatus) -> "WorkspaceReadinessResponse": + return cls( + has_minimum_entity_types=readiness.has_minimum_entity_types, + has_minimum_relationship_types=readiness.has_minimum_relationship_types, + prepopulated_types_ready=readiness.prepopulated_types_ready, + ) + + +class WorkspaceSessionPointersResponse(BaseModel): + """Session pointer projection for workspace status UI.""" + + active_schema_bootstrap_session_id: str | None = None + active_extraction_operations_session_id: str | None = None + most_recent_completed_session_id: str | None = None + + @classmethod + def from_domain( + cls, pointers: WorkspaceSessionPointers + ) -> "WorkspaceSessionPointersResponse": + return cls( + active_schema_bootstrap_session_id=pointers.active_schema_bootstrap_session_id, + active_extraction_operations_session_id=( + pointers.active_extraction_operations_session_id + ), + most_recent_completed_session_id=pointers.most_recent_completed_session_id, + ) + + +class KnowledgeGraphWorkspaceStatusResponse(BaseModel): + """Mode/readiness/session status projection for a knowledge graph workspace.""" + + knowledge_graph_id: str + workspace_mode: WorkspaceMode + readiness: WorkspaceReadinessResponse + transition_eligible: bool + session_pointers: WorkspaceSessionPointersResponse + + @classmethod + def from_domain( + cls, status: KnowledgeGraphWorkspaceStatus + ) -> "KnowledgeGraphWorkspaceStatusResponse": + return cls( + knowledge_graph_id=status.knowledge_graph_id, + workspace_mode=status.workspace_mode, + readiness=WorkspaceReadinessResponse.from_domain(status.readiness), + transition_eligible=status.transition_eligible, + session_pointers=WorkspaceSessionPointersResponse.from_domain( + status.session_pointers + ), + ) + + # --------------------------------------------------------------------------- # Ontology models # --------------------------------------------------------------------------- diff --git a/src/api/management/presentation/knowledge_graphs/routes.py b/src/api/management/presentation/knowledge_graphs/routes.py index 3f9ca0524..abe645432 100644 --- a/src/api/management/presentation/knowledge_graphs/routes.py +++ b/src/api/management/presentation/knowledge_graphs/routes.py @@ -21,6 +21,7 @@ CreateKnowledgeGraphRequest, KnowledgeGraphListResponse, KnowledgeGraphResponse, + KnowledgeGraphWorkspaceStatusResponse, OntologyConfigRequest, OntologyConfigResponse, UpdateKnowledgeGraphRequest, @@ -156,6 +157,43 @@ async def get_knowledge_graph( ) +@router.get( + "/knowledge-graphs/{kg_id}/workspace-status", + response_model=KnowledgeGraphWorkspaceStatusResponse, + summary="Get knowledge graph workspace status projection", + description=""" +Return mode/readiness/session status used by the knowledge graph Manage workspace UI. + +Returns 404 when the knowledge graph does not exist or the caller lacks `view` +permission on the knowledge graph. +""", +) +async def get_knowledge_graph_workspace_status( + kg_id: str, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[KnowledgeGraphService, Depends(get_knowledge_graph_service)], +) -> KnowledgeGraphWorkspaceStatusResponse: + """Get workspace status projection for a knowledge graph.""" + try: + status_projection = await service.get_workspace_status( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + if status_projection is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Knowledge graph {kg_id} not found", + ) + return KnowledgeGraphWorkspaceStatusResponse.from_domain(status_projection) + except HTTPException: + raise + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to retrieve workspace status", + ) + + @router.post( "/workspaces/{workspace_id}/knowledge-graphs", status_code=status.HTTP_201_CREATED, diff --git a/src/api/tests/unit/management/application/test_knowledge_graph_service.py b/src/api/tests/unit/management/application/test_knowledge_graph_service.py index 423e2e510..8b0264ea0 100644 --- a/src/api/tests/unit/management/application/test_knowledge_graph_service.py +++ b/src/api/tests/unit/management/application/test_knowledge_graph_service.py @@ -21,9 +21,14 @@ from management.domain.aggregates import DataSource, KnowledgeGraph from management.domain.value_objects import ( DataSourceId, + EdgeTypeDefinition, + KnowledgeGraphWorkspaceStatus, KnowledgeGraphId, + NodeTypeDefinition, + OntologyConfig, Schedule, ScheduleType, + WorkspaceMode, ) from shared_kernel.datasource_types import DataSourceAdapterType from management.ports.exceptions import ( @@ -410,6 +415,76 @@ async def test_get_returns_aggregate_on_success( assert probe.knowledge_graph_retrieved_calls[0]["kg_id"] == kg.id.value +class TestKnowledgeGraphServiceWorkspaceStatus: + """Tests for KnowledgeGraphService.get_workspace_status.""" + + @pytest.mark.asyncio + async def test_workspace_status_returns_none_when_not_found(self, service, user_id): + """Should return None if KG does not exist.""" + result = await service.get_workspace_status(user_id=user_id, kg_id="missing") + assert result is None + + @pytest.mark.asyncio + async def test_workspace_status_returns_none_when_view_denied( + self, service, kg_repo, user_id + ): + """Should return None if caller lacks VIEW on KG.""" + kg = _make_kg() + kg_repo.seed(kg) + + result = await service.get_workspace_status(user_id=user_id, kg_id=kg.id.value) + assert result is None + + @pytest.mark.asyncio + async def test_workspace_status_includes_mode_readiness_and_session_pointers( + self, service, authz, kg_repo, user_id + ): + """Should project mode/readiness flags and default null session pointers.""" + kg = _make_kg() + kg.set_ontology( + OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), + ), + ), + ) + ) + kg_repo.seed(kg) + await _grant_kg_view(authz, kg.id.value, user_id) + + result = await service.get_workspace_status(user_id=user_id, kg_id=kg.id.value) + + assert isinstance(result, KnowledgeGraphWorkspaceStatus) + assert result.workspace_mode == WorkspaceMode.SCHEMA_BOOTSTRAP + assert result.readiness.has_minimum_entity_types is True + assert result.readiness.has_minimum_relationship_types is True + assert result.readiness.prepopulated_types_ready is True + assert result.transition_eligible is True + assert result.session_pointers.active_schema_bootstrap_session_id is None + assert result.session_pointers.active_extraction_operations_session_id is None + assert result.session_pointers.most_recent_completed_session_id is None + + @pytest.mark.asyncio + async def test_workspace_status_transition_not_eligible_without_schema_readiness( + self, service, authz, kg_repo, user_id + ): + """Should report transition_eligible false when readiness checks fail.""" + kg = _make_kg() + kg_repo.seed(kg) + await _grant_kg_view(authz, kg.id.value, user_id) + + result = await service.get_workspace_status(user_id=user_id, kg_id=kg.id.value) + + assert result is not None + assert result.readiness.has_minimum_entity_types is False + assert result.readiness.has_minimum_relationship_types is False + assert result.transition_eligible is False + + # ---- list_for_workspace ---- diff --git a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py index 31806882e..36bec0b54 100644 --- a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py +++ b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py @@ -19,7 +19,13 @@ KnowledgeGraphService, ) from management.domain.aggregates import KnowledgeGraph -from management.domain.value_objects import KnowledgeGraphId, WorkspaceMode +from management.domain.value_objects import ( + KnowledgeGraphId, + KnowledgeGraphWorkspaceStatus, + WorkspaceMode, + WorkspaceReadinessStatus, + WorkspaceSessionPointers, +) from management.ports.exceptions import ( DuplicateKnowledgeGraphNameError, KnowledgeGraphNotFoundError, @@ -294,6 +300,64 @@ def test_get_knowledge_graph_returns_404_when_not_found( assert response.status_code == status.HTTP_404_NOT_FOUND +class TestGetKnowledgeGraphWorkspaceStatusRoute: + """Tests for GET /management/knowledge-graphs/{kg_id}/workspace-status.""" + + def test_workspace_status_returns_200_with_projection( + self, + test_client: TestClient, + mock_kg_service: AsyncMock, + sample_knowledge_graph: KnowledgeGraph, + mock_current_user: CurrentUser, + ) -> None: + """Should return mode/readiness/session projection when authorized.""" + mock_kg_service.get_workspace_status.return_value = KnowledgeGraphWorkspaceStatus( + knowledge_graph_id=sample_knowledge_graph.id.value, + workspace_mode=WorkspaceMode.SCHEMA_BOOTSTRAP, + readiness=WorkspaceReadinessStatus( + has_minimum_entity_types=True, + has_minimum_relationship_types=False, + prepopulated_types_ready=True, + ), + transition_eligible=False, + session_pointers=WorkspaceSessionPointers(), + ) + + response = test_client.get( + f"/management/knowledge-graphs/{sample_knowledge_graph.id.value}/workspace-status" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["knowledge_graph_id"] == sample_knowledge_graph.id.value + assert payload["workspace_mode"] == WorkspaceMode.SCHEMA_BOOTSTRAP.value + assert payload["readiness"]["has_minimum_entity_types"] is True + assert payload["readiness"]["has_minimum_relationship_types"] is False + assert payload["readiness"]["prepopulated_types_ready"] is True + assert payload["transition_eligible"] is False + assert payload["session_pointers"]["active_schema_bootstrap_session_id"] is None + + mock_kg_service.get_workspace_status.assert_called_once_with( + user_id=mock_current_user.user_id.value, + kg_id=sample_knowledge_graph.id.value, + ) + + def test_workspace_status_returns_404_when_missing_or_unauthorized( + self, + test_client: TestClient, + mock_kg_service: AsyncMock, + sample_knowledge_graph: KnowledgeGraph, + ) -> None: + """Should return 404 when service returns None.""" + mock_kg_service.get_workspace_status.return_value = None + + response = test_client.get( + f"/management/knowledge-graphs/{sample_knowledge_graph.id.value}/workspace-status" + ) + + assert response.status_code == status.HTTP_404_NOT_FOUND + + class TestCreateKnowledgeGraphRoute: """Tests for POST /management/workspaces/{workspace_id}/knowledge-graphs endpoint.""" From 9cee053e75570d74840ea0f21ad24d79ff2a0095 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 01:58:47 -0400 Subject: [PATCH 005/153] feat(management): implement actionable bootstrap readiness validation (#682) Enforce workspace readiness checks for minimum entity/relationship type coverage and prepopulated type instance presence, and project blocking reasons so validate/transition workflows can render actionable feedback. Co-authored-by: Cursor --- .../services/knowledge_graph_service.py | 31 ++++++++++++-- src/api/management/domain/value_objects.py | 11 +++++ .../presentation/knowledge_graphs/models.py | 19 +++++++++ .../test_knowledge_graph_service.py | 41 +++++++++++++++++++ .../test_knowledge_graphs_routes.py | 2 + .../management/test_ontology_value_objects.py | 13 ++++++ 6 files changed, 113 insertions(+), 4 deletions(-) diff --git a/src/api/management/application/services/knowledge_graph_service.py b/src/api/management/application/services/knowledge_graph_service.py index 2de278904..50746850e 100644 --- a/src/api/management/application/services/knowledge_graph_service.py +++ b/src/api/management/application/services/knowledge_graph_service.py @@ -594,12 +594,35 @@ def _evaluate_workspace_readiness( """Evaluate transition readiness flags for workspace status projection.""" node_type_count = len(kg.ontology.node_types) if kg.ontology else 0 edge_type_count = len(kg.ontology.edge_types) if kg.ontology else 0 + prepopulated_without_instances: tuple[str, ...] = () + if kg.ontology is not None: + prepopulated_without_instances = tuple( + node_type.label + for node_type in kg.ontology.node_types + if node_type.prepopulated and node_type.prepopulated_instance_count <= 0 + ) + + has_min_entities = node_type_count >= 1 + has_min_relationships = edge_type_count >= 1 + prepopulated_ready = len(prepopulated_without_instances) == 0 + + blocking_reasons: list[str] = [] + if not has_min_entities: + blocking_reasons.append("At least one entity type is required") + if not has_min_relationships: + blocking_reasons.append("At least one relationship type is required") + if not prepopulated_ready: + labels = ", ".join(prepopulated_without_instances) + blocking_reasons.append( + f"Prepopulated types require instances before transition: {labels}" + ) - # Prepopulated-instance validation is delivered by later units of work. return WorkspaceReadinessStatus( - has_minimum_entity_types=node_type_count >= 1, - has_minimum_relationship_types=edge_type_count >= 1, - prepopulated_types_ready=True, + has_minimum_entity_types=has_min_entities, + has_minimum_relationship_types=has_min_relationships, + prepopulated_types_ready=prepopulated_ready, + prepopulated_types_without_instances=prepopulated_without_instances, + blocking_reasons=tuple(blocking_reasons), ) async def get_workspace_status( diff --git a/src/api/management/domain/value_objects.py b/src/api/management/domain/value_objects.py index 415d6a40e..185c0159e 100644 --- a/src/api/management/domain/value_objects.py +++ b/src/api/management/domain/value_objects.py @@ -108,6 +108,8 @@ class WorkspaceReadinessStatus: has_minimum_entity_types: bool has_minimum_relationship_types: bool prepopulated_types_ready: bool + prepopulated_types_without_instances: tuple[str, ...] = field(default_factory=tuple) + blocking_reasons: tuple[str, ...] = field(default_factory=tuple) @property def is_ready(self) -> bool: @@ -116,6 +118,7 @@ def is_ready(self) -> bool: self.has_minimum_entity_types and self.has_minimum_relationship_types and self.prepopulated_types_ready + and not self.prepopulated_types_without_instances ) @@ -321,11 +324,15 @@ class NodeTypeDefinition: description: str = "" required_properties: tuple[str, ...] = field(default_factory=tuple) optional_properties: tuple[str, ...] = field(default_factory=tuple) + prepopulated: bool = False + prepopulated_instance_count: int = 0 def __post_init__(self) -> None: """Validate that label is non-empty.""" if not self.label or not self.label.strip(): raise ValueError("NodeTypeDefinition label must not be empty") + if self.prepopulated_instance_count < 0: + raise ValueError("prepopulated_instance_count must be >= 0") def to_dict(self) -> dict[str, Any]: """Serialize to a plain dict suitable for JSON persistence.""" @@ -334,6 +341,8 @@ def to_dict(self) -> dict[str, Any]: "description": self.description, "required_properties": list(self.required_properties), "optional_properties": list(self.optional_properties), + "prepopulated": self.prepopulated, + "prepopulated_instance_count": self.prepopulated_instance_count, } @classmethod @@ -344,6 +353,8 @@ def from_dict(cls, data: dict[str, Any]) -> NodeTypeDefinition: description=data.get("description", ""), required_properties=tuple(data.get("required_properties", [])), optional_properties=tuple(data.get("optional_properties", [])), + prepopulated=bool(data.get("prepopulated", False)), + prepopulated_instance_count=int(data.get("prepopulated_instance_count", 0)), ) diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index 7ca060a1c..78ce70a49 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -110,6 +110,8 @@ class WorkspaceReadinessResponse(BaseModel): has_minimum_entity_types: bool has_minimum_relationship_types: bool prepopulated_types_ready: bool + prepopulated_types_without_instances: list[str] = Field(default_factory=list) + blocking_reasons: list[str] = Field(default_factory=list) @classmethod def from_domain(cls, readiness: WorkspaceReadinessStatus) -> "WorkspaceReadinessResponse": @@ -117,6 +119,10 @@ def from_domain(cls, readiness: WorkspaceReadinessStatus) -> "WorkspaceReadiness has_minimum_entity_types=readiness.has_minimum_entity_types, has_minimum_relationship_types=readiness.has_minimum_relationship_types, prepopulated_types_ready=readiness.prepopulated_types_ready, + prepopulated_types_without_instances=list( + readiness.prepopulated_types_without_instances + ), + blocking_reasons=list(readiness.blocking_reasons), ) @@ -182,6 +188,15 @@ class NodeTypeDefinitionModel(BaseModel): default_factory=list, description="Properties nodes of this type may optionally have", ) + prepopulated: bool = Field( + default=False, + description="Whether this type must have at least one instance before transition", + ) + prepopulated_instance_count: int = Field( + default=0, + ge=0, + description="Current known instance count used for readiness evaluation", + ) def to_domain(self) -> NodeTypeDefinition: """Convert to domain NodeTypeDefinition value object.""" @@ -190,6 +205,8 @@ def to_domain(self) -> NodeTypeDefinition: description=self.description, required_properties=tuple(self.required_properties), optional_properties=tuple(self.optional_properties), + prepopulated=self.prepopulated, + prepopulated_instance_count=self.prepopulated_instance_count, ) @classmethod @@ -200,6 +217,8 @@ def from_domain(cls, nt: NodeTypeDefinition) -> NodeTypeDefinitionModel: description=nt.description, required_properties=list(nt.required_properties), optional_properties=list(nt.optional_properties), + prepopulated=nt.prepopulated, + prepopulated_instance_count=nt.prepopulated_instance_count, ) diff --git a/src/api/tests/unit/management/application/test_knowledge_graph_service.py b/src/api/tests/unit/management/application/test_knowledge_graph_service.py index 8b0264ea0..52d300e90 100644 --- a/src/api/tests/unit/management/application/test_knowledge_graph_service.py +++ b/src/api/tests/unit/management/application/test_knowledge_graph_service.py @@ -463,6 +463,8 @@ async def test_workspace_status_includes_mode_readiness_and_session_pointers( assert result.readiness.has_minimum_entity_types is True assert result.readiness.has_minimum_relationship_types is True assert result.readiness.prepopulated_types_ready is True + assert result.readiness.prepopulated_types_without_instances == () + assert result.readiness.blocking_reasons == () assert result.transition_eligible is True assert result.session_pointers.active_schema_bootstrap_session_id is None assert result.session_pointers.active_extraction_operations_session_id is None @@ -482,6 +484,45 @@ async def test_workspace_status_transition_not_eligible_without_schema_readiness assert result is not None assert result.readiness.has_minimum_entity_types is False assert result.readiness.has_minimum_relationship_types is False + assert "At least one entity type is required" in result.readiness.blocking_reasons + assert ( + "At least one relationship type is required" + in result.readiness.blocking_reasons + ) + assert result.transition_eligible is False + + @pytest.mark.asyncio + async def test_workspace_status_fails_for_prepopulated_type_without_instances( + self, service, authz, kg_repo, user_id + ): + """Should block transition when prepopulated type has zero instances.""" + kg = _make_kg() + kg.set_ontology( + OntologyConfig( + node_types=( + NodeTypeDefinition( + label="Repository", + prepopulated=True, + prepopulated_instance_count=0, + ), + ), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), + ), + ), + ) + ) + kg_repo.seed(kg) + await _grant_kg_view(authz, kg.id.value, user_id) + + result = await service.get_workspace_status(user_id=user_id, kg_id=kg.id.value) + + assert result is not None + assert result.readiness.prepopulated_types_ready is False + assert result.readiness.prepopulated_types_without_instances == ("Repository",) assert result.transition_eligible is False diff --git a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py index 36bec0b54..32c68da31 100644 --- a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py +++ b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py @@ -334,6 +334,8 @@ def test_workspace_status_returns_200_with_projection( assert payload["readiness"]["has_minimum_entity_types"] is True assert payload["readiness"]["has_minimum_relationship_types"] is False assert payload["readiness"]["prepopulated_types_ready"] is True + assert payload["readiness"]["prepopulated_types_without_instances"] == [] + assert payload["readiness"]["blocking_reasons"] == [] assert payload["transition_eligible"] is False assert payload["session_pointers"]["active_schema_bootstrap_session_id"] is None diff --git a/src/api/tests/unit/management/test_ontology_value_objects.py b/src/api/tests/unit/management/test_ontology_value_objects.py index 645a66ca8..ebf872a6e 100644 --- a/src/api/tests/unit/management/test_ontology_value_objects.py +++ b/src/api/tests/unit/management/test_ontology_value_objects.py @@ -33,6 +33,8 @@ def test_valid_minimal_node_type(self): assert nt.description == "" assert nt.required_properties == () assert nt.optional_properties == () + assert nt.prepopulated is False + assert nt.prepopulated_instance_count == 0 def test_required_properties_default_empty(self): """required_properties defaults to an empty tuple.""" @@ -94,6 +96,17 @@ def test_to_dict_contains_expected_keys(self): assert "description" in d assert "required_properties" in d assert "optional_properties" in d + assert "prepopulated" in d + assert "prepopulated_instance_count" in d + + def test_prepopulated_instance_count_must_be_non_negative(self): + """NodeTypeDefinition should reject negative prepopulated instance counts.""" + with pytest.raises(ValueError, match="prepopulated_instance_count"): + NodeTypeDefinition( + label="Repo", + prepopulated=True, + prepopulated_instance_count=-1, + ) class TestEdgeTypeDefinition: From c305a447bd201eb812bf2154b608278020e3b2ea Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 02:04:31 -0400 Subject: [PATCH 006/153] feat(management): add workspace validate and transition commands (#683) Expose authorized validate and transition commands for knowledge graph workspaces, persist session pointers, and create an extraction-mode session identifier when moving from bootstrap to extraction operations. Co-authored-by: Cursor --- ...0_add_workspace_session_pointer_columns.py | 46 +++++++++ .../services/knowledge_graph_service.py | 89 ++++++++++++++++- .../domain/aggregates/knowledge_graph.py | 9 +- .../infrastructure/models/knowledge_graph.py | 9 ++ .../knowledge_graph_repository.py | 23 +++++ .../presentation/knowledge_graphs/routes.py | 73 ++++++++++++++ .../test_knowledge_graph_repository.py | 1 + .../test_knowledge_graph_service.py | 97 +++++++++++++++++++ .../test_knowledge_graphs_routes.py | 96 ++++++++++++++++++ .../unit/management/test_knowledge_graph.py | 5 +- 10 files changed, 445 insertions(+), 3 deletions(-) create mode 100644 src/api/infrastructure/migrations/versions/f5b6c7d8e9f0_add_workspace_session_pointer_columns.py diff --git a/src/api/infrastructure/migrations/versions/f5b6c7d8e9f0_add_workspace_session_pointer_columns.py b/src/api/infrastructure/migrations/versions/f5b6c7d8e9f0_add_workspace_session_pointer_columns.py new file mode 100644 index 000000000..57d19eca7 --- /dev/null +++ b/src/api/infrastructure/migrations/versions/f5b6c7d8e9f0_add_workspace_session_pointer_columns.py @@ -0,0 +1,46 @@ +"""add workspace session pointer columns to knowledge_graphs + +Adds nullable session pointer fields used by workspace status projection +and bootstrap-to-extraction transition commands. + +Revision ID: f5b6c7d8e9f0 +Revises: f4a5b6c7d8e9 +Create Date: 2026-05-14 13:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "f5b6c7d8e9f0" +down_revision: Union[str, Sequence[str], None] = "f4a5b6c7d8e9" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Add workspace session pointer columns.""" + op.add_column( + "knowledge_graphs", + sa.Column("active_schema_bootstrap_session_id", sa.String(length=26), nullable=True), + ) + op.add_column( + "knowledge_graphs", + sa.Column( + "active_extraction_operations_session_id", sa.String(length=26), nullable=True + ), + ) + op.add_column( + "knowledge_graphs", + sa.Column("most_recent_completed_session_id", sa.String(length=26), nullable=True), + ) + + +def downgrade() -> None: + """Drop workspace session pointer columns.""" + op.drop_column("knowledge_graphs", "most_recent_completed_session_id") + op.drop_column("knowledge_graphs", "active_extraction_operations_session_id") + op.drop_column("knowledge_graphs", "active_schema_bootstrap_session_id") diff --git a/src/api/management/application/services/knowledge_graph_service.py b/src/api/management/application/services/knowledge_graph_service.py index 50746850e..f4c7d2d48 100644 --- a/src/api/management/application/services/knowledge_graph_service.py +++ b/src/api/management/application/services/knowledge_graph_service.py @@ -654,5 +654,92 @@ async def get_workspace_status( workspace_mode=kg.workspace_mode, readiness=readiness, transition_eligible=transition_eligible, - session_pointers=WorkspaceSessionPointers(), + session_pointers=WorkspaceSessionPointers( + active_schema_bootstrap_session_id=kg.active_schema_bootstrap_session_id, + active_extraction_operations_session_id=( + kg.active_extraction_operations_session_id + ), + most_recent_completed_session_id=kg.most_recent_completed_session_id, + ), + ) + + async def validate_workspace( + self, + user_id: str, + kg_id: str, + ) -> KnowledgeGraphWorkspaceStatus: + """Validate bootstrap readiness with KG edit authorization.""" + has_edit = await self._check_permission( + user_id=user_id, + resource_type=ResourceType.KNOWLEDGE_GRAPH, + resource_id=kg_id, + permission=Permission.EDIT, + ) + if not has_edit: + self._probe.permission_denied( + user_id=user_id, + resource_id=kg_id, + permission=Permission.EDIT, + ) + raise UnauthorizedError( + f"User {user_id} lacks edit permission on knowledge graph {kg_id}" + ) + + kg = await self._kg_repo.get_by_id(KnowledgeGraphId(value=kg_id)) + if kg is None or kg.tenant_id != self._scope_to_tenant: + raise KnowledgeGraphNotFoundError(f"Knowledge graph {kg_id} not found") + + readiness = self._evaluate_workspace_readiness(kg) + transition_eligible = ( + kg.workspace_mode == WorkspaceMode.SCHEMA_BOOTSTRAP and readiness.is_ready + ) + return KnowledgeGraphWorkspaceStatus( + knowledge_graph_id=kg.id.value, + workspace_mode=kg.workspace_mode, + readiness=readiness, + transition_eligible=transition_eligible, + session_pointers=WorkspaceSessionPointers( + active_schema_bootstrap_session_id=kg.active_schema_bootstrap_session_id, + active_extraction_operations_session_id=( + kg.active_extraction_operations_session_id + ), + most_recent_completed_session_id=kg.most_recent_completed_session_id, + ), + ) + + async def transition_workspace_to_extraction( + self, + user_id: str, + kg_id: str, + ) -> KnowledgeGraphWorkspaceStatus: + """Transition a knowledge graph workspace to extraction_operations mode.""" + _ = await self.validate_workspace(user_id=user_id, kg_id=kg_id) + + kg = await self._kg_repo.get_by_id(KnowledgeGraphId(value=kg_id)) + if kg is None or kg.tenant_id != self._scope_to_tenant: + raise KnowledgeGraphNotFoundError(f"Knowledge graph {kg_id} not found") + + readiness = self._evaluate_workspace_readiness(kg) + if not readiness.is_ready: + joined_reasons = "; ".join(readiness.blocking_reasons) + raise ValueError( + f"Knowledge graph {kg_id} is not ready for transition: {joined_reasons}" + ) + + kg.transition_to_extraction_operations() + await self._kg_repo.save(kg) + await self._session.commit() + + return KnowledgeGraphWorkspaceStatus( + knowledge_graph_id=kg.id.value, + workspace_mode=kg.workspace_mode, + readiness=readiness, + transition_eligible=False, + session_pointers=WorkspaceSessionPointers( + active_schema_bootstrap_session_id=kg.active_schema_bootstrap_session_id, + active_extraction_operations_session_id=( + kg.active_extraction_operations_session_id + ), + most_recent_completed_session_id=kg.most_recent_completed_session_id, + ), ) diff --git a/src/api/management/domain/aggregates/knowledge_graph.py b/src/api/management/domain/aggregates/knowledge_graph.py index 542d6eebd..1067c5c19 100644 --- a/src/api/management/domain/aggregates/knowledge_graph.py +++ b/src/api/management/domain/aggregates/knowledge_graph.py @@ -6,6 +6,8 @@ from datetime import UTC, datetime from typing import TYPE_CHECKING +from ulid import ULID + from management.domain.events import ( KnowledgeGraphCreated, KnowledgeGraphDeleted, @@ -57,6 +59,9 @@ class KnowledgeGraph: updated_at: datetime ontology: OntologyConfig | None = field(default=None) workspace_mode: WorkspaceMode = field(default=WorkspaceMode.SCHEMA_BOOTSTRAP) + active_schema_bootstrap_session_id: str | None = field(default=None) + active_extraction_operations_session_id: str | None = field(default=None) + most_recent_completed_session_id: str | None = field(default=None) _pending_events: list[DomainEvent] = field(default_factory=list, repr=False) _probe: KnowledgeGraphProbe = field( default_factory=DefaultKnowledgeGraphProbe, @@ -237,14 +242,16 @@ def clear_ontology(self) -> None: self.ontology = None self.updated_at = datetime.now(UTC) - def transition_to_extraction_operations(self) -> None: + def transition_to_extraction_operations(self) -> str: """Transition workspace mode from bootstrap to extraction operations.""" if self.workspace_mode == WorkspaceMode.EXTRACTION_OPERATIONS: raise InvalidWorkspaceModeTransitionError( "Workspace mode is already extraction_operations" ) self.workspace_mode = WorkspaceMode.EXTRACTION_OPERATIONS + self.active_extraction_operations_session_id = str(ULID()) self.updated_at = datetime.now(UTC) + return self.active_extraction_operations_session_id def mark_for_deletion( self, diff --git a/src/api/management/infrastructure/models/knowledge_graph.py b/src/api/management/infrastructure/models/knowledge_graph.py index 51125f721..b22bf5487 100644 --- a/src/api/management/infrastructure/models/knowledge_graph.py +++ b/src/api/management/infrastructure/models/knowledge_graph.py @@ -37,6 +37,15 @@ class KnowledgeGraphModel(Base, TimestampMixin): default=WorkspaceMode.SCHEMA_BOOTSTRAP.value, server_default=WorkspaceMode.SCHEMA_BOOTSTRAP.value, ) + active_schema_bootstrap_session_id: Mapped[str | None] = mapped_column( + String(26), nullable=True + ) + active_extraction_operations_session_id: Mapped[str | None] = mapped_column( + String(26), nullable=True + ) + most_recent_completed_session_id: Mapped[str | None] = mapped_column( + String(26), nullable=True + ) ontology: Mapped[dict | None] = mapped_column(JSONB, nullable=True, default=None) __table_args__ = ( diff --git a/src/api/management/infrastructure/repositories/knowledge_graph_repository.py b/src/api/management/infrastructure/repositories/knowledge_graph_repository.py index 432ad7699..25de3f8cc 100644 --- a/src/api/management/infrastructure/repositories/knowledge_graph_repository.py +++ b/src/api/management/infrastructure/repositories/knowledge_graph_repository.py @@ -72,6 +72,15 @@ async def save(self, knowledge_graph: KnowledgeGraph) -> None: model.name = knowledge_graph.name model.description = knowledge_graph.description model.workspace_mode = knowledge_graph.workspace_mode.value + model.active_schema_bootstrap_session_id = ( + knowledge_graph.active_schema_bootstrap_session_id + ) + model.active_extraction_operations_session_id = ( + knowledge_graph.active_extraction_operations_session_id + ) + model.most_recent_completed_session_id = ( + knowledge_graph.most_recent_completed_session_id + ) model.updated_at = knowledge_graph.updated_at else: model = KnowledgeGraphModel( @@ -81,6 +90,15 @@ async def save(self, knowledge_graph: KnowledgeGraph) -> None: name=knowledge_graph.name, description=knowledge_graph.description, workspace_mode=knowledge_graph.workspace_mode.value, + active_schema_bootstrap_session_id=( + knowledge_graph.active_schema_bootstrap_session_id + ), + active_extraction_operations_session_id=( + knowledge_graph.active_extraction_operations_session_id + ), + most_recent_completed_session_id=( + knowledge_graph.most_recent_completed_session_id + ), created_at=knowledge_graph.created_at, updated_at=knowledge_graph.updated_at, ) @@ -226,4 +244,9 @@ def _to_domain(self, model: KnowledgeGraphModel) -> KnowledgeGraph: updated_at=model.updated_at, ontology=ontology, workspace_mode=WorkspaceMode(model.workspace_mode), + active_schema_bootstrap_session_id=model.active_schema_bootstrap_session_id, + active_extraction_operations_session_id=( + model.active_extraction_operations_session_id + ), + most_recent_completed_session_id=model.most_recent_completed_session_id, ) diff --git a/src/api/management/presentation/knowledge_graphs/routes.py b/src/api/management/presentation/knowledge_graphs/routes.py index abe645432..666997b98 100644 --- a/src/api/management/presentation/knowledge_graphs/routes.py +++ b/src/api/management/presentation/knowledge_graphs/routes.py @@ -194,6 +194,79 @@ async def get_knowledge_graph_workspace_status( ) +@router.post( + "/knowledge-graphs/{kg_id}/workspace/validate", + response_model=KnowledgeGraphWorkspaceStatusResponse, + summary="Validate bootstrap readiness for workspace transition", +) +async def validate_knowledge_graph_workspace( + kg_id: str, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[KnowledgeGraphService, Depends(get_knowledge_graph_service)], +) -> KnowledgeGraphWorkspaceStatusResponse: + """Validate workspace readiness with edit authorization.""" + try: + status_projection = await service.validate_workspace( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + return KnowledgeGraphWorkspaceStatusResponse.from_domain(status_projection) + except UnauthorizedError: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="You do not have permission to perform this action", + ) + except KnowledgeGraphNotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to validate workspace status", + ) + + +@router.post( + "/knowledge-graphs/{kg_id}/workspace/transition-to-extraction", + response_model=KnowledgeGraphWorkspaceStatusResponse, + summary="Transition workspace from bootstrap to extraction operations", +) +async def transition_workspace_to_extraction( + kg_id: str, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[KnowledgeGraphService, Depends(get_knowledge_graph_service)], +) -> KnowledgeGraphWorkspaceStatusResponse: + """Transition workspace mode after successful validation.""" + try: + status_projection = await service.transition_workspace_to_extraction( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + return KnowledgeGraphWorkspaceStatusResponse.from_domain(status_projection) + except UnauthorizedError: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="You do not have permission to perform this action", + ) + except KnowledgeGraphNotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=str(e), + ) + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to transition workspace mode", + ) + + @router.post( "/workspaces/{workspace_id}/knowledge-graphs", status_code=status.HTTP_201_CREATED, diff --git a/src/api/tests/integration/management/test_knowledge_graph_repository.py b/src/api/tests/integration/management/test_knowledge_graph_repository.py index d7e6b7c56..262130e33 100644 --- a/src/api/tests/integration/management/test_knowledge_graph_repository.py +++ b/src/api/tests/integration/management/test_knowledge_graph_repository.py @@ -110,6 +110,7 @@ async def test_saves_and_retrieves_workspace_mode( assert retrieved is not None assert retrieved.workspace_mode == WorkspaceMode.EXTRACTION_OPERATIONS + assert retrieved.active_extraction_operations_session_id is not None class TestKnowledgeGraphUpdate: diff --git a/src/api/tests/unit/management/application/test_knowledge_graph_service.py b/src/api/tests/unit/management/application/test_knowledge_graph_service.py index 52d300e90..1a26b7dae 100644 --- a/src/api/tests/unit/management/application/test_knowledge_graph_service.py +++ b/src/api/tests/unit/management/application/test_knowledge_graph_service.py @@ -526,6 +526,103 @@ async def test_workspace_status_fails_for_prepopulated_type_without_instances( assert result.transition_eligible is False +class TestKnowledgeGraphServiceWorkspaceCommands: + """Tests for validate_workspace and transition_workspace_to_extraction.""" + + @pytest.mark.asyncio + async def test_validate_workspace_requires_edit_permission( + self, service, authz, kg_repo, user_id + ): + kg = _make_kg() + kg_repo.seed(kg) + await _grant_kg_view(authz, kg.id.value, user_id) + + with pytest.raises(UnauthorizedError): + await service.validate_workspace(user_id=user_id, kg_id=kg.id.value) + + @pytest.mark.asyncio + async def test_validate_workspace_returns_projection_when_authorized( + self, service, authz, kg_repo, user_id + ): + kg = _make_kg() + kg_repo.seed(kg) + await _grant_kg_edit(authz, kg.id.value, user_id) + + result = await service.validate_workspace(user_id=user_id, kg_id=kg.id.value) + + assert result.knowledge_graph_id == kg.id.value + assert result.workspace_mode == WorkspaceMode.SCHEMA_BOOTSTRAP + + @pytest.mark.asyncio + async def test_transition_workspace_requires_edit_permission( + self, service, authz, kg_repo, user_id + ): + kg = _make_kg() + kg.set_ontology( + OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), + ), + ), + ) + ) + kg_repo.seed(kg) + await _grant_kg_view(authz, kg.id.value, user_id) + + with pytest.raises(UnauthorizedError): + await service.transition_workspace_to_extraction( + user_id=user_id, + kg_id=kg.id.value, + ) + + @pytest.mark.asyncio + async def test_transition_workspace_changes_mode_and_creates_session_pointer( + self, service, authz, kg_repo, user_id + ): + kg = _make_kg() + kg.set_ontology( + OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), + ), + ), + ) + ) + kg_repo.seed(kg) + await _grant_kg_edit(authz, kg.id.value, user_id) + + result = await service.transition_workspace_to_extraction( + user_id=user_id, + kg_id=kg.id.value, + ) + + assert result.workspace_mode == WorkspaceMode.EXTRACTION_OPERATIONS + assert result.transition_eligible is False + assert result.session_pointers.active_extraction_operations_session_id is not None + + @pytest.mark.asyncio + async def test_transition_workspace_rejects_when_not_ready( + self, service, authz, kg_repo, user_id + ): + kg = _make_kg() + kg_repo.seed(kg) + await _grant_kg_edit(authz, kg.id.value, user_id) + + with pytest.raises(ValueError, match="not ready for transition"): + await service.transition_workspace_to_extraction( + user_id=user_id, + kg_id=kg.id.value, + ) + + # ---- list_for_workspace ---- diff --git a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py index 32c68da31..e7ab13d48 100644 --- a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py +++ b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py @@ -360,6 +360,102 @@ def test_workspace_status_returns_404_when_missing_or_unauthorized( assert response.status_code == status.HTTP_404_NOT_FOUND +class TestWorkspaceCommandsRoutes: + """Tests for workspace validate/transition command endpoints.""" + + def _status_projection(self, kg_id: str) -> KnowledgeGraphWorkspaceStatus: + return KnowledgeGraphWorkspaceStatus( + knowledge_graph_id=kg_id, + workspace_mode=WorkspaceMode.SCHEMA_BOOTSTRAP, + readiness=WorkspaceReadinessStatus( + has_minimum_entity_types=True, + has_minimum_relationship_types=True, + prepopulated_types_ready=True, + ), + transition_eligible=True, + session_pointers=WorkspaceSessionPointers(), + ) + + def test_validate_workspace_returns_200( + self, + test_client: TestClient, + mock_kg_service: AsyncMock, + sample_knowledge_graph: KnowledgeGraph, + ) -> None: + mock_kg_service.validate_workspace.return_value = self._status_projection( + sample_knowledge_graph.id.value + ) + + response = test_client.post( + f"/management/knowledge-graphs/{sample_knowledge_graph.id.value}/workspace/validate" + ) + + assert response.status_code == status.HTTP_200_OK + + def test_validate_workspace_returns_403_when_unauthorized( + self, + test_client: TestClient, + mock_kg_service: AsyncMock, + sample_knowledge_graph: KnowledgeGraph, + ) -> None: + mock_kg_service.validate_workspace.side_effect = UnauthorizedError("forbidden") + + response = test_client.post( + f"/management/knowledge-graphs/{sample_knowledge_graph.id.value}/workspace/validate" + ) + + assert response.status_code == status.HTTP_403_FORBIDDEN + + def test_transition_workspace_returns_200( + self, + test_client: TestClient, + mock_kg_service: AsyncMock, + sample_knowledge_graph: KnowledgeGraph, + ) -> None: + transitioned = KnowledgeGraphWorkspaceStatus( + knowledge_graph_id=sample_knowledge_graph.id.value, + workspace_mode=WorkspaceMode.EXTRACTION_OPERATIONS, + readiness=WorkspaceReadinessStatus( + has_minimum_entity_types=True, + has_minimum_relationship_types=True, + prepopulated_types_ready=True, + ), + transition_eligible=False, + session_pointers=WorkspaceSessionPointers( + active_extraction_operations_session_id="01JPQRST1234567890ABCDEFSE" + ), + ) + mock_kg_service.transition_workspace_to_extraction.return_value = transitioned + + response = test_client.post( + f"/management/knowledge-graphs/{sample_knowledge_graph.id.value}/workspace/transition-to-extraction" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["workspace_mode"] == WorkspaceMode.EXTRACTION_OPERATIONS.value + assert ( + payload["session_pointers"]["active_extraction_operations_session_id"] + == "01JPQRST1234567890ABCDEFSE" + ) + + def test_transition_workspace_returns_409_when_not_ready( + self, + test_client: TestClient, + mock_kg_service: AsyncMock, + sample_knowledge_graph: KnowledgeGraph, + ) -> None: + mock_kg_service.transition_workspace_to_extraction.side_effect = ValueError( + "not ready" + ) + + response = test_client.post( + f"/management/knowledge-graphs/{sample_knowledge_graph.id.value}/workspace/transition-to-extraction" + ) + + assert response.status_code == status.HTTP_409_CONFLICT + + class TestCreateKnowledgeGraphRoute: """Tests for POST /management/workspaces/{workspace_id}/knowledge-graphs endpoint.""" diff --git a/src/api/tests/unit/management/test_knowledge_graph.py b/src/api/tests/unit/management/test_knowledge_graph.py index c30d5e0b0..804c76970 100644 --- a/src/api/tests/unit/management/test_knowledge_graph.py +++ b/src/api/tests/unit/management/test_knowledge_graph.py @@ -240,9 +240,12 @@ def test_transition_to_extraction_operations(self): """Transition should move mode to extraction_operations.""" kg = self._create_kg() - kg.transition_to_extraction_operations() + session_id = kg.transition_to_extraction_operations() assert kg.workspace_mode == WorkspaceMode.EXTRACTION_OPERATIONS + assert kg.active_extraction_operations_session_id == session_id + assert isinstance(session_id, str) + assert len(session_id) == 26 def test_transition_is_irreversible(self): """Transitioning after extraction_operations should fail.""" From 584c3effe7500a7cd0174313d07ce7363e7feb88 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 02:31:05 -0400 Subject: [PATCH 007/153] feat(management): persist mutation log run metadata on sync runs (#684) Add durable run-level mutation metadata storage and lifecycle persistence for session/scope identity, timestamps, token-cost totals, and operation-count summaries linked to each sync run. Co-authored-by: Cursor --- ..._mutation_log_run_metadata_to_sync_runs.py | 35 ++++++++ .../management/domain/entities/__init__.py | 7 +- .../domain/entities/data_source_sync_run.py | 60 +++++++++++++ .../models/data_source_sync_run.py | 4 + .../data_source_sync_run_repository.py | 17 +++- .../infrastructure/sync_lifecycle_handler.py | 46 ++++++++++ .../test_data_source_sync_run_repository.py | 66 +++++++++++++- .../test_sync_lifecycle_handler.py | 89 +++++++++++++++++++ 8 files changed, 320 insertions(+), 4 deletions(-) create mode 100644 src/api/infrastructure/migrations/versions/f6c7d8e9f0a1_add_mutation_log_run_metadata_to_sync_runs.py diff --git a/src/api/infrastructure/migrations/versions/f6c7d8e9f0a1_add_mutation_log_run_metadata_to_sync_runs.py b/src/api/infrastructure/migrations/versions/f6c7d8e9f0a1_add_mutation_log_run_metadata_to_sync_runs.py new file mode 100644 index 000000000..c3cd68944 --- /dev/null +++ b/src/api/infrastructure/migrations/versions/f6c7d8e9f0a1_add_mutation_log_run_metadata_to_sync_runs.py @@ -0,0 +1,35 @@ +"""add mutation_log_run metadata column to data_source_sync_runs + +Stores run-level mutation log metadata used by extraction/graph lifecycle +tracking (session, actor, timestamps, token/cost totals, operation counts). + +Revision ID: f6c7d8e9f0a1 +Revises: f5b6c7d8e9f0 +Create Date: 2026-05-14 14:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + + +# revision identifiers, used by Alembic. +revision: str = "f6c7d8e9f0a1" +down_revision: Union[str, Sequence[str], None] = "f5b6c7d8e9f0" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Add nullable JSONB mutation log run metadata column.""" + op.add_column( + "data_source_sync_runs", + sa.Column("mutation_log_run", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + ) + + +def downgrade() -> None: + """Drop mutation log run metadata column.""" + op.drop_column("data_source_sync_runs", "mutation_log_run") diff --git a/src/api/management/domain/entities/__init__.py b/src/api/management/domain/entities/__init__.py index c665e7309..81e8ce208 100644 --- a/src/api/management/domain/entities/__init__.py +++ b/src/api/management/domain/entities/__init__.py @@ -4,6 +4,9 @@ They don't emit domain events independently. """ -from management.domain.entities.data_source_sync_run import DataSourceSyncRun +from management.domain.entities.data_source_sync_run import ( + DataSourceSyncRun, + MutationLogRunMetadata, +) -__all__ = ["DataSourceSyncRun"] +__all__ = ["DataSourceSyncRun", "MutationLogRunMetadata"] diff --git a/src/api/management/domain/entities/data_source_sync_run.py b/src/api/management/domain/entities/data_source_sync_run.py index 6bb9ca903..9bf466518 100644 --- a/src/api/management/domain/entities/data_source_sync_run.py +++ b/src/api/management/domain/entities/data_source_sync_run.py @@ -4,6 +4,7 @@ from dataclasses import dataclass, field from datetime import datetime +from typing import Any # Valid sync run status values representing the lifecycle state machine. TERMINAL_STATUSES = frozenset({"completed", "failed"}) @@ -12,6 +13,64 @@ ) +@dataclass +class MutationLogRunMetadata: + """Run-level metadata captured for a produced/applied mutation log.""" + + mutation_log_id: str + knowledge_graph_id: str + session_id: str | None + actor_id: str | None + started_at: datetime + completed_at: datetime | None = None + token_usage_total: int | None = None + cost_total_usd: float | None = None + operation_counts: dict[str, int] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return { + "mutation_log_id": self.mutation_log_id, + "knowledge_graph_id": self.knowledge_graph_id, + "session_id": self.session_id, + "actor_id": self.actor_id, + "started_at": self.started_at.isoformat(), + "completed_at": ( + self.completed_at.isoformat() if self.completed_at is not None else None + ), + "token_usage_total": self.token_usage_total, + "cost_total_usd": self.cost_total_usd, + "operation_counts": self.operation_counts, + } + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> "MutationLogRunMetadata": + return cls( + mutation_log_id=str(raw["mutation_log_id"]), + knowledge_graph_id=str(raw["knowledge_graph_id"]), + session_id=raw.get("session_id"), + actor_id=raw.get("actor_id"), + started_at=datetime.fromisoformat(str(raw["started_at"])), + completed_at=( + datetime.fromisoformat(str(raw["completed_at"])) + if raw.get("completed_at") + else None + ), + token_usage_total=( + int(raw["token_usage_total"]) + if raw.get("token_usage_total") is not None + else None + ), + cost_total_usd=( + float(raw["cost_total_usd"]) + if raw.get("cost_total_usd") is not None + else None + ), + operation_counts={ + str(k): int(v) for k, v in (raw.get("operation_counts") or {}).items() + }, + ) + + @dataclass class DataSourceSyncRun: """Entity tracking the execution of a data source sync. @@ -41,6 +100,7 @@ class DataSourceSyncRun: error: str | None created_at: datetime logs: list[str] = field(default_factory=list) + mutation_log_run: MutationLogRunMetadata | None = None def is_terminal(self) -> bool: """Return True if the sync run is in a terminal state. diff --git a/src/api/management/infrastructure/models/data_source_sync_run.py b/src/api/management/infrastructure/models/data_source_sync_run.py index 4e92dee98..d1401fe96 100644 --- a/src/api/management/infrastructure/models/data_source_sync_run.py +++ b/src/api/management/infrastructure/models/data_source_sync_run.py @@ -15,6 +15,7 @@ String, Text, ) +from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column from infrastructure.database.models import Base, _utc_now @@ -69,6 +70,9 @@ class DataSourceSyncRunModel(Base): default=list, server_default="{}", ) + mutation_log_run: Mapped[dict | None] = mapped_column( + JSONB, nullable=True, default=None + ) __table_args__ = ( Index("idx_sync_runs_data_source_id", "data_source_id"), diff --git a/src/api/management/infrastructure/repositories/data_source_sync_run_repository.py b/src/api/management/infrastructure/repositories/data_source_sync_run_repository.py index 57997f868..aa234411f 100644 --- a/src/api/management/infrastructure/repositories/data_source_sync_run_repository.py +++ b/src/api/management/infrastructure/repositories/data_source_sync_run_repository.py @@ -11,7 +11,7 @@ from sqlalchemy import desc from sqlalchemy.ext.asyncio import AsyncSession -from management.domain.entities import DataSourceSyncRun +from management.domain.entities import DataSourceSyncRun, MutationLogRunMetadata from management.infrastructure.models import DataSourceSyncRunModel from management.infrastructure.observability import ( DefaultSyncRunRepositoryProbe, @@ -51,6 +51,11 @@ async def save(self, sync_run: DataSourceSyncRun) -> None: model.completed_at = sync_run.completed_at model.error = sync_run.error model.logs = sync_run.logs + model.mutation_log_run = ( + sync_run.mutation_log_run.to_dict() + if sync_run.mutation_log_run is not None + else None + ) else: model = DataSourceSyncRunModel( id=sync_run.id, @@ -61,6 +66,11 @@ async def save(self, sync_run: DataSourceSyncRun) -> None: error=sync_run.error, created_at=sync_run.created_at, logs=sync_run.logs, + mutation_log_run=( + sync_run.mutation_log_run.to_dict() + if sync_run.mutation_log_run is not None + else None + ), ) self._session.add(model) @@ -122,4 +132,9 @@ def _to_domain(self, model: DataSourceSyncRunModel) -> DataSourceSyncRun: error=model.error, created_at=model.created_at, logs=model.logs if model.logs is not None else [], + mutation_log_run=( + MutationLogRunMetadata.from_dict(model.mutation_log_run) + if model.mutation_log_run is not None + else None + ), ) diff --git a/src/api/management/infrastructure/sync_lifecycle_handler.py b/src/api/management/infrastructure/sync_lifecycle_handler.py index 54bcf62c7..5817f6cbb 100644 --- a/src/api/management/infrastructure/sync_lifecycle_handler.py +++ b/src/api/management/infrastructure/sync_lifecycle_handler.py @@ -21,6 +21,7 @@ from datetime import UTC, datetime from typing import TYPE_CHECKING, Any +from management.domain.entities import MutationLogRunMetadata from management.domain.value_objects import DataSourceId if TYPE_CHECKING: @@ -122,6 +123,21 @@ async def handle( sync_run.status = "completed" sync_run.completed_at = now sync_run.logs.append(f"[{now.isoformat()}] Sync completed") + if sync_run.mutation_log_run is not None: + sync_run.mutation_log_run.completed_at = now + if payload.get("token_usage_total") is not None: + sync_run.mutation_log_run.token_usage_total = int( + payload["token_usage_total"] + ) + if payload.get("cost_total_usd") is not None: + sync_run.mutation_log_run.cost_total_usd = float( + payload["cost_total_usd"] + ) + if payload.get("operation_counts") is not None: + sync_run.mutation_log_run.operation_counts = { + str(k): int(v) + for k, v in dict(payload["operation_counts"]).items() + } await self._update_data_source_last_sync_at( data_source_id=sync_run.data_source_id, now=now, @@ -135,6 +151,36 @@ async def handle( sync_run.logs.append( f"[{now.isoformat()}] {event_type}: status → {new_status}" ) + if event_type == "MutationLogProduced": + sync_run.mutation_log_run = MutationLogRunMetadata( + mutation_log_id=str(payload["mutation_log_id"]), + knowledge_graph_id=str(payload["knowledge_graph_id"]), + session_id=( + str(payload["session_id"]) + if payload.get("session_id") is not None + else None + ), + actor_id=( + str(payload["actor_id"]) + if payload.get("actor_id") is not None + else None + ), + started_at=now, + token_usage_total=( + int(payload["token_usage_total"]) + if payload.get("token_usage_total") is not None + else None + ), + cost_total_usd=( + float(payload["cost_total_usd"]) + if payload.get("cost_total_usd") is not None + else None + ), + operation_counts={ + str(k): int(v) + for k, v in dict(payload.get("operation_counts") or {}).items() + }, + ) await self._sync_run_repo.save(sync_run) await self._session.commit() diff --git a/src/api/tests/integration/management/test_data_source_sync_run_repository.py b/src/api/tests/integration/management/test_data_source_sync_run_repository.py index 675bf30dc..c441b3b7b 100644 --- a/src/api/tests/integration/management/test_data_source_sync_run_repository.py +++ b/src/api/tests/integration/management/test_data_source_sync_run_repository.py @@ -10,7 +10,7 @@ from sqlalchemy import text from management.domain.aggregates import DataSource, KnowledgeGraph -from management.domain.entities import DataSourceSyncRun +from management.domain.entities import DataSourceSyncRun, MutationLogRunMetadata from management.infrastructure.repositories.data_source_sync_run_repository import ( DataSourceSyncRunRepository, ) @@ -85,6 +85,70 @@ async def test_saves_and_retrieves_sync_run( assert retrieved.error is None assert retrieved.created_at is not None + @pytest.mark.asyncio + async def test_saves_and_retrieves_mutation_log_run_metadata( + self, + data_source_sync_run_repository: DataSourceSyncRunRepository, + data_source_repository: DataSourceRepository, + knowledge_graph_repository: KnowledgeGraphRepository, + async_session, + test_tenant: str, + test_workspace: str, + clean_management_data, + ): + """Should persist mutation log run metadata JSONB for sync runs.""" + kg = KnowledgeGraph.create( + tenant_id=test_tenant, + workspace_id=test_workspace, + name="Test KG", + description="For sync run tests", + ) + async with async_session.begin(): + await knowledge_graph_repository.save(kg) + + ds = DataSource.create( + knowledge_graph_id=kg.id.value, + tenant_id=test_tenant, + name="My GitHub Source", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"repo": "org/repo", "branch": "main"}, + ) + async with async_session.begin(): + await data_source_repository.save(ds) + + now = datetime.now(UTC) + sync_run = DataSourceSyncRun( + id=str(ULID()), + data_source_id=ds.id.value, + status="applying", + started_at=now, + completed_at=None, + error=None, + created_at=now, + mutation_log_run=MutationLogRunMetadata( + mutation_log_id="log-001", + knowledge_graph_id=kg.id.value, + session_id="sess-001", + actor_id="user-001", + started_at=now, + token_usage_total=1234, + cost_total_usd=1.5, + operation_counts={"create_node": 2}, + ), + ) + + async with async_session.begin(): + await data_source_sync_run_repository.save(sync_run) + + retrieved = await data_source_sync_run_repository.get_by_id(sync_run.id) + + assert retrieved is not None + assert retrieved.mutation_log_run is not None + assert retrieved.mutation_log_run.mutation_log_id == "log-001" + assert retrieved.mutation_log_run.session_id == "sess-001" + assert retrieved.mutation_log_run.token_usage_total == 1234 + assert retrieved.mutation_log_run.operation_counts["create_node"] == 2 + @pytest.mark.asyncio async def test_saves_completed_sync_run( self, diff --git a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py index 974f60b26..edc049ecc 100644 --- a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py +++ b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py @@ -190,6 +190,39 @@ async def test_mutation_log_produced_sets_applying( saved_run: DataSourceSyncRun = mock_sync_run_repo.save.call_args[0][0] assert saved_run.status == "applying" + async def test_mutation_log_produced_stores_run_metadata( + self, + handler: SyncLifecycleHandler, + mock_sync_run_repo: AsyncMock, + ): + """MutationLogProduced should persist run-level mutation metadata.""" + run = _make_sync_run(status="ai_extracting") + mock_sync_run_repo.get_by_id.return_value = run + + await handler.handle( + "MutationLogProduced", + _payload( + sync_run_id=run.id, + knowledge_graph_id="kg-001", + mutation_log_id="log-001", + session_id="sess-001", + actor_id="user-001", + token_usage_total=1234, + cost_total_usd=1.25, + operation_counts={"create_node": 2, "update_edge": 1}, + ), + ) + + saved_run: DataSourceSyncRun = mock_sync_run_repo.save.call_args[0][0] + assert saved_run.mutation_log_run is not None + assert saved_run.mutation_log_run.mutation_log_id == "log-001" + assert saved_run.mutation_log_run.knowledge_graph_id == "kg-001" + assert saved_run.mutation_log_run.session_id == "sess-001" + assert saved_run.mutation_log_run.actor_id == "user-001" + assert saved_run.mutation_log_run.token_usage_total == 1234 + assert saved_run.mutation_log_run.cost_total_usd == 1.25 + assert saved_run.mutation_log_run.operation_counts["create_node"] == 2 + @pytest.mark.asyncio class TestExtractionFailedTransition: @@ -260,6 +293,62 @@ async def test_mutations_applied_sets_completed( assert saved_run.status == "completed" assert saved_run.completed_at is not None + async def test_mutations_applied_finalizes_mutation_log_metadata( + self, + handler: SyncLifecycleHandler, + mock_sync_run_repo: AsyncMock, + mock_ds_repo: AsyncMock, + ): + """MutationsApplied should finalize mutation run metrics and completed_at.""" + from management.domain.aggregates import DataSource + from management.domain.entities import MutationLogRunMetadata + from management.domain.value_objects import DataSourceId, Schedule, ScheduleType + from shared_kernel.datasource_types import DataSourceAdapterType + + run = _make_sync_run(status="applying") + run.mutation_log_run = MutationLogRunMetadata( + mutation_log_id="log-001", + knowledge_graph_id="kg-001", + session_id="sess-001", + actor_id="user-001", + started_at=datetime.now(UTC), + ) + mock_sync_run_repo.get_by_id.return_value = run + + now = datetime.now(UTC) + ds = DataSource( + id=DataSourceId(value="ds-001"), + knowledge_graph_id="kg-001", + tenant_id="tenant-001", + name="My DS", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={}, + credentials_path=None, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + created_at=now, + updated_at=now, + ) + mock_ds_repo.get_by_id.return_value = ds + + await handler.handle( + "MutationsApplied", + _payload( + sync_run_id=run.id, + knowledge_graph_id="kg-001", + token_usage_total=4321, + cost_total_usd=2.5, + operation_counts={"create_node": 9}, + ), + ) + + saved_run: DataSourceSyncRun = mock_sync_run_repo.save.call_args[0][0] + assert saved_run.mutation_log_run is not None + assert saved_run.mutation_log_run.completed_at is not None + assert saved_run.mutation_log_run.token_usage_total == 4321 + assert saved_run.mutation_log_run.cost_total_usd == 2.5 + assert saved_run.mutation_log_run.operation_counts == {"create_node": 9} + async def test_mutations_applied_updates_data_source_last_sync_at( self, handler: SyncLifecycleHandler, From 60ce34f002ab9bd6e16db457a4c908e90bfb79b5 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 02:32:47 -0400 Subject: [PATCH 008/153] feat(graph): wire mutation apply metrics into lifecycle events (#685) Emit operation-class counts and token/cost totals from mutation-log application results into MutationsApplied payloads so downstream sync lifecycle persistence can finalize run-level metadata. Co-authored-by: Cursor --- src/api/graph/infrastructure/event_handler.py | 12 +++++++++--- src/api/graph/ports/mutation_log.py | 15 +++++++++++++-- src/api/main.py | 3 ++- .../test_graph_mutation_event_handler.py | 16 ++++++++++++++-- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/api/graph/infrastructure/event_handler.py b/src/api/graph/infrastructure/event_handler.py index df4bfebe8..b22b9fe5e 100644 --- a/src/api/graph/infrastructure/event_handler.py +++ b/src/api/graph/infrastructure/event_handler.py @@ -14,7 +14,7 @@ from datetime import UTC, datetime from typing import TYPE_CHECKING, Any -from graph.ports.mutation_log import IMutationLogApplier +from graph.ports.mutation_log import IMutationLogApplier, MutationLogApplyResult if TYPE_CHECKING: from shared_kernel.outbox.ports import IOutboxRepository @@ -77,17 +77,20 @@ async def handle( now = datetime.now(UTC) try: - success = await self._mutation_log_applier.apply_mutation_log( + apply_result = await self._mutation_log_applier.apply_mutation_log( mutation_log_id ) - if success: + if apply_result.success: await self._outbox.append( event_type="MutationsApplied", payload={ "sync_run_id": sync_run_id, "data_source_id": data_source_id, "knowledge_graph_id": knowledge_graph_id, + "operation_counts": apply_result.operation_counts, + "token_usage_total": apply_result.token_usage_total, + "cost_total_usd": apply_result.cost_total_usd, "occurred_at": now.isoformat(), }, occurred_at=now, @@ -101,6 +104,9 @@ async def handle( "sync_run_id": sync_run_id, "data_source_id": data_source_id, "error": "Mutation application returned failure", + "operation_counts": apply_result.operation_counts, + "token_usage_total": apply_result.token_usage_total, + "cost_total_usd": apply_result.cost_total_usd, "occurred_at": now.isoformat(), }, occurred_at=now, diff --git a/src/api/graph/ports/mutation_log.py b/src/api/graph/ports/mutation_log.py index dcf6037c6..14e06c85e 100644 --- a/src/api/graph/ports/mutation_log.py +++ b/src/api/graph/ports/mutation_log.py @@ -2,9 +2,20 @@ from __future__ import annotations +from dataclasses import dataclass, field from typing import Protocol +@dataclass(frozen=True) +class MutationLogApplyResult: + """Result metadata produced when applying a mutation log.""" + + success: bool + operation_counts: dict[str, int] = field(default_factory=dict) + token_usage_total: int | None = None + cost_total_usd: float | None = None + + class IMutationLogApplier(Protocol): """Protocol for applying a MutationLog to the graph database. @@ -15,7 +26,7 @@ class IMutationLogApplier(Protocol): infrastructure (AGE connection pools, bulk loading strategies, etc.). """ - async def apply_mutation_log(self, mutation_log_id: str) -> bool: + async def apply_mutation_log(self, mutation_log_id: str) -> MutationLogApplyResult: """Apply all mutations from a MutationLog to the graph database. Args: @@ -24,7 +35,7 @@ async def apply_mutation_log(self, mutation_log_id: str) -> bool: log content from storage (filesystem, object store, etc.). Returns: - True if all mutations were applied successfully. + MutationLogApplyResult with success flag and finalized run metrics. Raises: Exception: Any exception signals a failure; callers should diff --git a/src/api/main.py b/src/api/main.py index 074b0c232..ffeeff0a4 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -48,6 +48,7 @@ ) from infrastructure.mcp_dependencies import dispose_mcp_auth_engine from query.presentation.mcp import mcp_http_app_proxy, query_mcp_app +from graph.ports.mutation_log import MutationLogApplyResult # Default work directory for JobPackage ZIP archives _JOB_PACKAGE_WORK_DIR = Path("/tmp/kartograph/job_packages") # noqa: S108 @@ -238,7 +239,7 @@ class _StubMutationLogApplier: result in MutationApplicationFailed being emitted. """ - async def apply_mutation_log(self, mutation_log_id: str) -> bool: + async def apply_mutation_log(self, mutation_log_id: str) -> MutationLogApplyResult: raise NotImplementedError( "Graph mutation application via outbox is not yet fully implemented. " "Register a real IMutationLogApplier to enable graph writes from the outbox." diff --git a/src/api/tests/unit/graph/infrastructure/test_graph_mutation_event_handler.py b/src/api/tests/unit/graph/infrastructure/test_graph_mutation_event_handler.py index 2a2feda5c..31f5d190b 100644 --- a/src/api/tests/unit/graph/infrastructure/test_graph_mutation_event_handler.py +++ b/src/api/tests/unit/graph/infrastructure/test_graph_mutation_event_handler.py @@ -18,6 +18,7 @@ import pytest from graph.infrastructure.event_handler import GraphMutationEventHandler +from graph.ports.mutation_log import MutationLogApplyResult class _FakeOutboxRepository: @@ -59,11 +60,16 @@ def __init__(self, fail: bool = False, error: str = "DB write error") -> None: self._error = error self.calls: list[str] = [] - async def apply_mutation_log(self, mutation_log_id: str) -> bool: + async def apply_mutation_log(self, mutation_log_id: str) -> MutationLogApplyResult: self.calls.append(mutation_log_id) if self._fail: raise RuntimeError(self._error) - return True + return MutationLogApplyResult( + success=True, + operation_counts={"create_node": 2, "update_edge": 1}, + token_usage_total=321, + cost_total_usd=0.42, + ) @pytest.fixture @@ -150,6 +156,12 @@ async def test_emits_mutations_applied_on_success( assert event["payload"]["sync_run_id"] == "run-001" assert event["payload"]["data_source_id"] == "ds-001" assert event["payload"]["knowledge_graph_id"] == "kg-001" + assert event["payload"]["operation_counts"] == { + "create_node": 2, + "update_edge": 1, + } + assert event["payload"]["token_usage_total"] == 321 + assert event["payload"]["cost_total_usd"] == 0.42 async def test_mutations_applied_aggregate_type( self, From d50c238cf8912b9d05cda0340a2fe6bd6e4bd73f Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 02:35:20 -0400 Subject: [PATCH 009/153] test(extraction): add bounded-context architecture guardrails scaffold (#686) Scaffold extraction application/presentation package structure and add pytest-archon rules enforcing DDD layer boundaries plus cross-context isolation so subsequent extraction features stay architecturally clean. Co-authored-by: Cursor --- src/api/extraction/application/__init__.py | 6 + src/api/extraction/application/services.py | 6 + src/api/extraction/infrastructure/__init__.py | 6 + src/api/extraction/ports/__init__.py | 6 + src/api/extraction/presentation/__init__.py | 6 + .../unit/extraction/test_architecture.py | 201 ++++++++++++++++++ 6 files changed, 231 insertions(+) create mode 100644 src/api/extraction/application/__init__.py create mode 100644 src/api/extraction/application/services.py create mode 100644 src/api/extraction/presentation/__init__.py create mode 100644 src/api/tests/unit/extraction/test_architecture.py diff --git a/src/api/extraction/application/__init__.py b/src/api/extraction/application/__init__.py new file mode 100644 index 000000000..13e58a0b4 --- /dev/null +++ b/src/api/extraction/application/__init__.py @@ -0,0 +1,6 @@ +"""Extraction application layer. + +Application services orchestrate extraction workflows using domain logic +and port contracts. They do not directly depend on infrastructure. +""" + diff --git a/src/api/extraction/application/services.py b/src/api/extraction/application/services.py new file mode 100644 index 000000000..12cf7a5aa --- /dev/null +++ b/src/api/extraction/application/services.py @@ -0,0 +1,6 @@ +"""Application service contracts for Extraction workflows. + +Concrete implementations will orchestrate long-running extraction sessions, +agent execution, and mutation-log production. +""" + diff --git a/src/api/extraction/infrastructure/__init__.py b/src/api/extraction/infrastructure/__init__.py index e69de29bb..f7a85405f 100644 --- a/src/api/extraction/infrastructure/__init__.py +++ b/src/api/extraction/infrastructure/__init__.py @@ -0,0 +1,6 @@ +"""Extraction infrastructure adapters and event handlers.""" + +from extraction.infrastructure.event_handler import ExtractionEventHandler + +__all__ = ["ExtractionEventHandler"] + diff --git a/src/api/extraction/ports/__init__.py b/src/api/extraction/ports/__init__.py index e69de29bb..817081a56 100644 --- a/src/api/extraction/ports/__init__.py +++ b/src/api/extraction/ports/__init__.py @@ -0,0 +1,6 @@ +"""Extraction port contracts.""" + +from extraction.ports.services import IExtractionService + +__all__ = ["IExtractionService"] + diff --git a/src/api/extraction/presentation/__init__.py b/src/api/extraction/presentation/__init__.py new file mode 100644 index 000000000..a87e6bef8 --- /dev/null +++ b/src/api/extraction/presentation/__init__.py @@ -0,0 +1,6 @@ +"""Extraction presentation layer. + +HTTP/MCP routes for extraction session and operation workflows are defined +here as the bounded context expands. +""" + diff --git a/src/api/tests/unit/extraction/test_architecture.py b/src/api/tests/unit/extraction/test_architecture.py new file mode 100644 index 000000000..f6e2fcb2c --- /dev/null +++ b/src/api/tests/unit/extraction/test_architecture.py @@ -0,0 +1,201 @@ +"""Architecture tests for the Extraction bounded context.""" + +import importlib + +import pytest +from pytest_archon import archrule + + +def _subpackage_exists(name: str) -> bool: + """Return True when package exists, False when missing.""" + try: + importlib.import_module(name) + return True + except ModuleNotFoundError as e: + if e.name == name: + return False + raise + + +_has_domain = _subpackage_exists("extraction.domain") +_has_ports = _subpackage_exists("extraction.ports") +_has_application = _subpackage_exists("extraction.application") +_has_infrastructure = _subpackage_exists("extraction.infrastructure") +_has_presentation = _subpackage_exists("extraction.presentation") + +_skip_no_domain = pytest.mark.skipif( + not _has_domain, + reason="extraction.domain subpackage does not exist yet", +) +_skip_no_ports = pytest.mark.skipif( + not _has_ports, + reason="extraction.ports subpackage does not exist yet", +) +_skip_no_application = pytest.mark.skipif( + not _has_application, + reason="extraction.application subpackage does not exist yet", +) +_skip_no_infrastructure = pytest.mark.skipif( + not _has_infrastructure, + reason="extraction.infrastructure subpackage does not exist yet", +) +_skip_no_presentation = pytest.mark.skipif( + not _has_presentation, + reason="extraction.presentation subpackage does not exist yet", +) + + +@_skip_no_domain +class TestExtractionDomainLayerBoundaries: + def test_domain_does_not_import_infrastructure(self): + ( + archrule("extraction_domain_no_infrastructure") + .match("extraction.domain*") + .should_not_import("extraction.infrastructure*") + .check("extraction") + ) + + def test_domain_does_not_import_application(self): + ( + archrule("extraction_domain_no_application") + .match("extraction.domain*") + .should_not_import("extraction.application*") + .check("extraction") + ) + + def test_domain_does_not_import_fastapi(self): + ( + archrule("extraction_domain_no_fastapi") + .match("extraction.domain*") + .should_not_import("fastapi*", "starlette*") + .check("extraction") + ) + + +@_skip_no_ports +class TestExtractionPortsLayerBoundaries: + def test_ports_does_not_import_infrastructure(self): + ( + archrule("extraction_ports_no_infrastructure") + .match("extraction.ports*") + .should_not_import("extraction.infrastructure*") + .check("extraction") + ) + + def test_ports_does_not_import_application(self): + ( + archrule("extraction_ports_no_application") + .match("extraction.ports*") + .should_not_import("extraction.application*") + .check("extraction") + ) + + +@_skip_no_application +class TestExtractionApplicationLayerBoundaries: + def test_application_does_not_import_infrastructure(self): + ( + archrule("extraction_application_no_infrastructure") + .match("extraction.application*") + .should_not_import("extraction.infrastructure*") + .check("extraction") + ) + + def test_application_may_import_domain_and_ports(self): + ( + archrule("extraction_application_may_import_domain_ports") + .match("extraction.application*") + .may_import("extraction.domain*", "extraction.ports*") + .check("extraction") + ) + + +@_skip_no_infrastructure +class TestExtractionInfrastructureLayerBoundaries: + def test_infrastructure_does_not_import_application(self): + ( + archrule("extraction_infrastructure_no_application") + .match("extraction.infrastructure*") + .should_not_import("extraction.application*") + .check("extraction") + ) + + def test_infrastructure_may_import_domain_and_ports(self): + ( + archrule("extraction_infrastructure_may_import_domain_ports") + .match("extraction.infrastructure*") + .may_import("extraction.domain*", "extraction.ports*") + .check("extraction") + ) + + +@_skip_no_presentation +class TestExtractionPresentationLayerBoundaries: + def test_presentation_does_not_import_other_contexts(self): + ( + archrule("extraction_presentation_no_cross_context_imports") + .match("extraction.presentation*") + .should_not_import("graph*", "management*", "ingestion*", "query*") + .check("extraction") + ) + + +class TestExtractionBoundedContextIsolation: + def test_extraction_does_not_import_iam(self): + ( + archrule("extraction_no_iam") + .match("extraction*") + .should_not_import("iam*") + .check("extraction") + ) + + def test_extraction_does_not_import_management(self): + ( + archrule("extraction_no_management") + .match("extraction*") + .should_not_import("management*") + .check("extraction") + ) + + def test_extraction_does_not_import_ingestion(self): + ( + archrule("extraction_no_ingestion") + .match("extraction*") + .should_not_import("ingestion*") + .check("extraction") + ) + + def test_extraction_does_not_import_graph(self): + ( + archrule("extraction_no_graph") + .match("extraction*") + .should_not_import("graph*") + .check("extraction") + ) + + def test_extraction_does_not_import_query(self): + ( + archrule("extraction_no_query") + .match("extraction*") + .should_not_import("query*") + .check("extraction") + ) + + +class TestExtractionAllowedDependencies: + def test_extraction_may_import_shared_kernel(self): + ( + archrule("extraction_may_import_shared_kernel") + .match("extraction*") + .may_import("shared_kernel*") + .check("extraction") + ) + + def test_extraction_may_import_infrastructure(self): + ( + archrule("extraction_may_import_infrastructure") + .match("extraction*") + .may_import("infrastructure*") + .check("extraction") + ) + From 9f91318f5dec68e0455fa17df5f4bf448a970d3c Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 02:39:00 -0400 Subject: [PATCH 010/153] feat(extraction): add scoped agent session lifecycle service (#687) Implement per-user/per-knowledge-graph/per-mode extraction session lifecycle behaviors with clear-chat reset semantics and archived-session retention backed by repository ports and unit coverage. Co-authored-by: Cursor --- src/api/extraction/application/__init__.py | 4 + .../application/agent_session_service.py | 82 +++++++++ src/api/extraction/domain/__init__.py | 5 + .../extraction/domain/entities/__init__.py | 6 + .../domain/entities/agent_session.py | 34 ++++ src/api/extraction/domain/value_objects.py | 11 ++ src/api/extraction/ports/__init__.py | 3 +- src/api/extraction/ports/repositories.py | 31 ++++ .../application/test_agent_session_service.py | 161 ++++++++++++++++++ 9 files changed, 336 insertions(+), 1 deletion(-) create mode 100644 src/api/extraction/application/agent_session_service.py create mode 100644 src/api/extraction/domain/entities/__init__.py create mode 100644 src/api/extraction/domain/entities/agent_session.py create mode 100644 src/api/extraction/domain/value_objects.py create mode 100644 src/api/extraction/ports/repositories.py create mode 100644 src/api/tests/unit/extraction/application/test_agent_session_service.py diff --git a/src/api/extraction/application/__init__.py b/src/api/extraction/application/__init__.py index 13e58a0b4..27c34129b 100644 --- a/src/api/extraction/application/__init__.py +++ b/src/api/extraction/application/__init__.py @@ -4,3 +4,7 @@ and port contracts. They do not directly depend on infrastructure. """ +from extraction.application.agent_session_service import ExtractionAgentSessionService + +__all__ = ["ExtractionAgentSessionService"] + diff --git a/src/api/extraction/application/agent_session_service.py b/src/api/extraction/application/agent_session_service.py new file mode 100644 index 000000000..646489e7c --- /dev/null +++ b/src/api/extraction/application/agent_session_service.py @@ -0,0 +1,82 @@ +"""Application service for extraction agent session lifecycle.""" + +from __future__ import annotations + +from ulid import ULID + +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import ExtractionSessionMode +from extraction.ports.repositories import IExtractionAgentSessionRepository + + +class ExtractionAgentSessionService: + """Orchestrates session create/get/list/archive behaviors by scope.""" + + def __init__(self, repository: IExtractionAgentSessionRepository) -> None: + self._repository = repository + + async def get_or_create_active_session( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> ExtractionAgentSession: + existing = await self._repository.find_active_by_scope( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + if existing is not None: + return existing + + session = ExtractionAgentSession( + id=str(ULID()), + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + await self._repository.save(session) + return session + + async def clear_chat( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> ExtractionAgentSession: + active = await self._repository.find_active_by_scope( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + if active is not None: + active.archive() + await self._repository.save(active) + + return await self.get_or_create_active_session( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + + async def list_sessions( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode | None = None, + ) -> list[ExtractionAgentSession]: + return await self._repository.list_by_scope( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + + async def archive_session(self, session_id: str) -> ExtractionAgentSession | None: + session = await self._repository.get_by_id(session_id) + if session is None: + return None + if session.is_active: + session.archive() + await self._repository.save(session) + return session + diff --git a/src/api/extraction/domain/__init__.py b/src/api/extraction/domain/__init__.py index 119ee9827..0a6dd4f26 100644 --- a/src/api/extraction/domain/__init__.py +++ b/src/api/extraction/domain/__init__.py @@ -1 +1,6 @@ """Extraction domain layer.""" + +from extraction.domain.entities import ExtractionAgentSession +from extraction.domain.value_objects import ExtractionSessionMode + +__all__ = ["ExtractionAgentSession", "ExtractionSessionMode"] diff --git a/src/api/extraction/domain/entities/__init__.py b/src/api/extraction/domain/entities/__init__.py new file mode 100644 index 000000000..50eafd016 --- /dev/null +++ b/src/api/extraction/domain/entities/__init__.py @@ -0,0 +1,6 @@ +"""Extraction domain entities.""" + +from extraction.domain.entities.agent_session import ExtractionAgentSession + +__all__ = ["ExtractionAgentSession"] + diff --git a/src/api/extraction/domain/entities/agent_session.py b/src/api/extraction/domain/entities/agent_session.py new file mode 100644 index 000000000..50903162e --- /dev/null +++ b/src/api/extraction/domain/entities/agent_session.py @@ -0,0 +1,34 @@ +"""Extraction agent session entity.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import UTC, datetime +from typing import Any + +from extraction.domain.value_objects import ExtractionSessionMode + + +@dataclass +class ExtractionAgentSession: + """Long-running conversational session scoped to user/KG/mode.""" + + id: str + user_id: str + knowledge_graph_id: str + mode: ExtractionSessionMode + message_history: list[dict[str, Any]] = field(default_factory=list) + runtime_context: dict[str, Any] = field(default_factory=dict) + created_at: datetime = field(default_factory=lambda: datetime.now(UTC)) + updated_at: datetime = field(default_factory=lambda: datetime.now(UTC)) + archived_at: datetime | None = None + + @property + def is_active(self) -> bool: + return self.archived_at is None + + def archive(self, *, when: datetime | None = None) -> None: + now = when or datetime.now(UTC) + self.archived_at = now + self.updated_at = now + diff --git a/src/api/extraction/domain/value_objects.py b/src/api/extraction/domain/value_objects.py new file mode 100644 index 000000000..c12cdfd2b --- /dev/null +++ b/src/api/extraction/domain/value_objects.py @@ -0,0 +1,11 @@ +"""Value objects for Extraction session lifecycle.""" + +from enum import StrEnum + + +class ExtractionSessionMode(StrEnum): + """Workspace mode for extraction agent sessions.""" + + SCHEMA_BOOTSTRAP = "schema_bootstrap" + EXTRACTION_OPERATIONS = "extraction_operations" + diff --git a/src/api/extraction/ports/__init__.py b/src/api/extraction/ports/__init__.py index 817081a56..ad1aff4f9 100644 --- a/src/api/extraction/ports/__init__.py +++ b/src/api/extraction/ports/__init__.py @@ -1,6 +1,7 @@ """Extraction port contracts.""" +from extraction.ports.repositories import IExtractionAgentSessionRepository from extraction.ports.services import IExtractionService -__all__ = ["IExtractionService"] +__all__ = ["IExtractionService", "IExtractionAgentSessionRepository"] diff --git a/src/api/extraction/ports/repositories.py b/src/api/extraction/ports/repositories.py new file mode 100644 index 000000000..129a2056e --- /dev/null +++ b/src/api/extraction/ports/repositories.py @@ -0,0 +1,31 @@ +"""Repository ports for Extraction sessions.""" + +from __future__ import annotations + +from typing import Protocol + +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import ExtractionSessionMode + + +class IExtractionAgentSessionRepository(Protocol): + """Persistence contract for extraction agent sessions.""" + + async def save(self, session: ExtractionAgentSession) -> None: ... + + async def get_by_id(self, session_id: str) -> ExtractionAgentSession | None: ... + + async def find_active_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> ExtractionAgentSession | None: ... + + async def list_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode | None = None, + ) -> list[ExtractionAgentSession]: ... + diff --git a/src/api/tests/unit/extraction/application/test_agent_session_service.py b/src/api/tests/unit/extraction/application/test_agent_session_service.py new file mode 100644 index 000000000..aade8550f --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_agent_session_service.py @@ -0,0 +1,161 @@ +"""Unit tests for ExtractionAgentSessionService.""" + +from __future__ import annotations + +from dataclasses import replace +from datetime import UTC, datetime + +import pytest + +from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import ExtractionSessionMode + + +class _InMemoryAgentSessionRepository: + def __init__(self) -> None: + self._by_id: dict[str, ExtractionAgentSession] = {} + + async def save(self, session: ExtractionAgentSession) -> None: + self._by_id[session.id] = replace(session) + + async def get_by_id(self, session_id: str) -> ExtractionAgentSession | None: + session = self._by_id.get(session_id) + return replace(session) if session else None + + async def find_active_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> ExtractionAgentSession | None: + for session in self._by_id.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.mode == mode + and session.archived_at is None + ): + return replace(session) + return None + + async def list_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode | None = None, + ) -> list[ExtractionAgentSession]: + sessions = [ + replace(session) + for session in self._by_id.values() + if session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and (mode is None or session.mode == mode) + ] + return sorted(sessions, key=lambda s: s.updated_at, reverse=True) + + +@pytest.mark.asyncio +class TestExtractionAgentSessionService: + async def test_reuses_active_session_for_same_scope(self): + repo = _InMemoryAgentSessionRepository() + service = ExtractionAgentSessionService(repository=repo) + + first = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + second = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + + assert first.id == second.id + + async def test_scope_isolated_by_user(self): + repo = _InMemoryAgentSessionRepository() + service = ExtractionAgentSessionService(repository=repo) + + first = await service.get_or_create_active_session( + user_id="alice", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + second = await service.get_or_create_active_session( + user_id="bob", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + + assert first.id != second.id + + async def test_scope_isolated_by_mode(self): + repo = _InMemoryAgentSessionRepository() + service = ExtractionAgentSessionService(repository=repo) + + bootstrap = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + operations = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + + assert bootstrap.id != operations.id + + async def test_clear_chat_archives_old_session_and_creates_new_one(self): + repo = _InMemoryAgentSessionRepository() + service = ExtractionAgentSessionService(repository=repo) + + old_session = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + old_session.message_history = [{"role": "user", "content": "hello"}] + old_session.runtime_context = {"draft": "x"} + old_session.updated_at = datetime.now(UTC) + await repo.save(old_session) + + new_session = await service.clear_chat( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + + archived = await repo.get_by_id(old_session.id) + assert archived is not None + assert archived.archived_at is not None + assert new_session.id != old_session.id + assert new_session.message_history == [] + assert new_session.runtime_context == {} + + async def test_list_sessions_includes_archived_history(self): + repo = _InMemoryAgentSessionRepository() + service = ExtractionAgentSessionService(repository=repo) + + first = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + await service.clear_chat( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + + sessions = await service.list_sessions( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + + assert len(sessions) == 2 + assert any(session.id == first.id and session.archived_at is not None for session in sessions) + From 56b9ba74255264c8bcfc2d5d28cc610df6a2abee Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 02:41:56 -0400 Subject: [PATCH 011/153] feat(extraction): add skill resolution engine with KG overrides (#688) Resolve mode-specific extraction skill templates from global defaults and apply deterministic knowledge-graph override merges so session prompts are stable, customizable, and repeatable. Co-authored-by: Cursor --- src/api/extraction/application/__init__.py | 5 +- .../application/skill_resolution_service.py | 66 +++++++++++++ src/api/extraction/ports/__init__.py | 11 ++- src/api/extraction/ports/repositories.py | 10 ++ .../test_skill_resolution_service.py | 96 +++++++++++++++++++ 5 files changed, 185 insertions(+), 3 deletions(-) create mode 100644 src/api/extraction/application/skill_resolution_service.py create mode 100644 src/api/tests/unit/extraction/application/test_skill_resolution_service.py diff --git a/src/api/extraction/application/__init__.py b/src/api/extraction/application/__init__.py index 27c34129b..fd5d9c04c 100644 --- a/src/api/extraction/application/__init__.py +++ b/src/api/extraction/application/__init__.py @@ -5,6 +5,9 @@ """ from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.application.skill_resolution_service import ( + ExtractionSkillResolutionService, +) -__all__ = ["ExtractionAgentSessionService"] +__all__ = ["ExtractionAgentSessionService", "ExtractionSkillResolutionService"] diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py new file mode 100644 index 000000000..a2d1b7f83 --- /dev/null +++ b/src/api/extraction/application/skill_resolution_service.py @@ -0,0 +1,66 @@ +"""Skill resolution for extraction sessions.""" + +from __future__ import annotations + +from extraction.domain.value_objects import ExtractionSessionMode +from extraction.ports.repositories import IExtractionSkillOverrideRepository + + +_GLOBAL_SKILL_TEMPLATES: dict[ExtractionSessionMode, dict[str, str]] = { + ExtractionSessionMode.SCHEMA_BOOTSTRAP: { + "schema_modeling": ( + "Guide the user to define complete entity and relationship types " + "with clear labels, constraints, and required properties." + ), + "prepopulation_validation": ( + "Prioritize prepopulated type coverage and highlight any missing " + "instances required before extraction-mode transition." + ), + }, + ExtractionSessionMode.EXTRACTION_OPERATIONS: { + "job_setup": ( + "Prioritize extraction job setup, file-targeting strategy, and " + "safe incremental mutation planning." + ), + "minor_edits": ( + "Allow focused direct graph edits while preserving mutation-log " + "auditability and schema consistency." + ), + "schema_edits_secondary": ( + "Keep schema edits available but framed as secondary to " + "extraction and maintenance operations." + ), + }, +} + + +class ExtractionSkillResolutionService: + """Resolve session skills from global templates + KG overrides.""" + + def __init__(self, override_repository: IExtractionSkillOverrideRepository) -> None: + self._override_repository = override_repository + + async def resolve_for_session( + self, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> dict[str, str]: + base_templates = dict(_GLOBAL_SKILL_TEMPLATES[mode]) + overrides = await self._override_repository.get_overrides_for_knowledge_graph( + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + + resolved = dict(base_templates) + + # Merge existing keys first, then append new override keys in sorted order + # to ensure deterministic ordering across runs. + for key in sorted(overrides.keys()): + if key in resolved: + resolved[key] = overrides[key] + for key in sorted(overrides.keys()): + if key not in resolved: + resolved[key] = overrides[key] + + return resolved + diff --git a/src/api/extraction/ports/__init__.py b/src/api/extraction/ports/__init__.py index ad1aff4f9..d3e72d0e9 100644 --- a/src/api/extraction/ports/__init__.py +++ b/src/api/extraction/ports/__init__.py @@ -1,7 +1,14 @@ """Extraction port contracts.""" -from extraction.ports.repositories import IExtractionAgentSessionRepository +from extraction.ports.repositories import ( + IExtractionAgentSessionRepository, + IExtractionSkillOverrideRepository, +) from extraction.ports.services import IExtractionService -__all__ = ["IExtractionService", "IExtractionAgentSessionRepository"] +__all__ = [ + "IExtractionService", + "IExtractionAgentSessionRepository", + "IExtractionSkillOverrideRepository", +] diff --git a/src/api/extraction/ports/repositories.py b/src/api/extraction/ports/repositories.py index 129a2056e..c9c9bb597 100644 --- a/src/api/extraction/ports/repositories.py +++ b/src/api/extraction/ports/repositories.py @@ -29,3 +29,13 @@ async def list_by_scope( mode: ExtractionSessionMode | None = None, ) -> list[ExtractionAgentSession]: ... + +class IExtractionSkillOverrideRepository(Protocol): + """Read KG-specific skill override templates.""" + + async def get_overrides_for_knowledge_graph( + self, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> dict[str, str]: ... + diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py new file mode 100644 index 000000000..90bb67082 --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -0,0 +1,96 @@ +"""Unit tests for ExtractionSkillResolutionService.""" + +from __future__ import annotations + +import pytest + +from extraction.application.skill_resolution_service import ( + ExtractionSkillResolutionService, +) +from extraction.domain.value_objects import ExtractionSessionMode + + +class _InMemorySkillOverrideRepository: + def __init__(self, overrides: dict[tuple[str, ExtractionSessionMode], dict[str, str]] | None = None) -> None: + self._overrides = overrides or {} + + async def get_overrides_for_knowledge_graph( + self, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> dict[str, str]: + return dict(self._overrides.get((knowledge_graph_id, mode), {})) + + +@pytest.mark.asyncio +class TestExtractionSkillResolutionService: + async def test_bootstrap_mode_uses_bootstrap_defaults(self): + service = ExtractionSkillResolutionService( + override_repository=_InMemorySkillOverrideRepository() + ) + + resolved = await service.resolve_for_session( + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + + assert "schema_modeling" in resolved + assert "prepopulation_validation" in resolved + + async def test_extraction_mode_uses_extraction_defaults(self): + service = ExtractionSkillResolutionService( + override_repository=_InMemorySkillOverrideRepository() + ) + + resolved = await service.resolve_for_session( + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + + assert "job_setup" in resolved + assert "minor_edits" in resolved + + async def test_kg_overrides_replace_matching_template_and_append_new(self): + repo = _InMemorySkillOverrideRepository( + overrides={ + ( + "kg-1", + ExtractionSessionMode.EXTRACTION_OPERATIONS, + ): { + "job_setup": "KG-specific job setup instructions", + "custom_review": "Custom review flow", + } + } + ) + service = ExtractionSkillResolutionService(override_repository=repo) + + resolved = await service.resolve_for_session( + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + + assert resolved["job_setup"] == "KG-specific job setup instructions" + assert resolved["custom_review"] == "Custom review flow" + + async def test_override_merge_is_deterministic(self): + repo = _InMemorySkillOverrideRepository( + overrides={ + ( + "kg-1", + ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ): { + "z_last": "z", + "a_first": "a", + } + } + ) + service = ExtractionSkillResolutionService(override_repository=repo) + + resolved = await service.resolve_for_session( + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + + # Additional override keys are merged in sorted order for determinism. + assert list(resolved.keys())[-2:] == ["a_first", "z_last"] + From a75608da1393f6eb26ba483488d7fe2db6984246 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 02:57:58 -0400 Subject: [PATCH 012/153] feat(extraction): implement clear-chat reset with session archival (#689) Persist extraction agent sessions and expose scoped APIs for active/list/clear-chat so reset creates a fresh session while preserving archived history and runtime context audit records. Co-authored-by: Cursor --- src/api/extraction/dependencies.py | 20 ++ src/api/extraction/infrastructure/__init__.py | 3 +- .../infrastructure/models/__init__.py | 6 + .../infrastructure/models/agent_session.py | 62 ++++++ .../infrastructure/repositories/__init__.py | 8 + .../repositories/agent_session_repository.py | 107 +++++++++++ src/api/extraction/presentation/__init__.py | 9 + src/api/extraction/presentation/models.py | 47 +++++ src/api/extraction/presentation/routes.py | 123 ++++++++++++ ..._create_extraction_agent_sessions_table.py | 75 ++++++++ src/api/main.py | 4 + .../extraction/presentation/test_routes.py | 178 ++++++++++++++++++ .../unit/extraction/test_architecture.py | 9 +- 13 files changed, 648 insertions(+), 3 deletions(-) create mode 100644 src/api/extraction/dependencies.py create mode 100644 src/api/extraction/infrastructure/models/__init__.py create mode 100644 src/api/extraction/infrastructure/models/agent_session.py create mode 100644 src/api/extraction/infrastructure/repositories/__init__.py create mode 100644 src/api/extraction/infrastructure/repositories/agent_session_repository.py create mode 100644 src/api/extraction/presentation/models.py create mode 100644 src/api/extraction/presentation/routes.py create mode 100644 src/api/infrastructure/migrations/versions/f7d8e9f0a1b2_create_extraction_agent_sessions_table.py create mode 100644 src/api/tests/unit/extraction/presentation/test_routes.py diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py new file mode 100644 index 000000000..6ded27903 --- /dev/null +++ b/src/api/extraction/dependencies.py @@ -0,0 +1,20 @@ +"""FastAPI dependencies for Extraction services.""" + +from typing import Annotated + +from fastapi import Depends +from sqlalchemy.ext.asyncio import AsyncSession + +from extraction.application import ExtractionAgentSessionService +from extraction.infrastructure.repositories import ExtractionAgentSessionRepository +from infrastructure.database.dependencies import get_write_session + + +def get_extraction_agent_session_service( + session: Annotated[AsyncSession, Depends(get_write_session)], +) -> ExtractionAgentSessionService: + """Get ExtractionAgentSessionService instance.""" + return ExtractionAgentSessionService( + repository=ExtractionAgentSessionRepository(session=session) + ) + diff --git a/src/api/extraction/infrastructure/__init__.py b/src/api/extraction/infrastructure/__init__.py index f7a85405f..5aaa4c73a 100644 --- a/src/api/extraction/infrastructure/__init__.py +++ b/src/api/extraction/infrastructure/__init__.py @@ -1,6 +1,7 @@ """Extraction infrastructure adapters and event handlers.""" from extraction.infrastructure.event_handler import ExtractionEventHandler +from extraction.infrastructure.repositories import ExtractionAgentSessionRepository -__all__ = ["ExtractionEventHandler"] +__all__ = ["ExtractionEventHandler", "ExtractionAgentSessionRepository"] diff --git a/src/api/extraction/infrastructure/models/__init__.py b/src/api/extraction/infrastructure/models/__init__.py new file mode 100644 index 000000000..cc9758797 --- /dev/null +++ b/src/api/extraction/infrastructure/models/__init__.py @@ -0,0 +1,6 @@ +"""Extraction infrastructure ORM models.""" + +from extraction.infrastructure.models.agent_session import ExtractionAgentSessionModel + +__all__ = ["ExtractionAgentSessionModel"] + diff --git a/src/api/extraction/infrastructure/models/agent_session.py b/src/api/extraction/infrastructure/models/agent_session.py new file mode 100644 index 000000000..02d282592 --- /dev/null +++ b/src/api/extraction/infrastructure/models/agent_session.py @@ -0,0 +1,62 @@ +"""ORM model for extraction agent sessions.""" + +from __future__ import annotations + +from datetime import datetime + +from sqlalchemy import CheckConstraint, DateTime, Index, String +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from infrastructure.database.models import Base, _utc_now + + +class ExtractionAgentSessionModel(Base): + """Persistence model for long-running extraction sessions.""" + + __tablename__ = "extraction_agent_sessions" + + id: Mapped[str] = mapped_column(String(26), primary_key=True) + user_id: Mapped[str] = mapped_column(String(255), nullable=False) + knowledge_graph_id: Mapped[str] = mapped_column(String(26), nullable=False) + mode: Mapped[str] = mapped_column(String(64), nullable=False) + message_history: Mapped[list[dict]] = mapped_column( + JSONB, nullable=False, default=list + ) + runtime_context: Mapped[dict] = mapped_column(JSONB, nullable=False, default=dict) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + insert_default=_utc_now, + nullable=False, + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + insert_default=_utc_now, + onupdate=_utc_now, + nullable=False, + ) + archived_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), + nullable=True, + ) + + __table_args__ = ( + Index( + "idx_extract_sessions_scope_active", + "user_id", + "knowledge_graph_id", + "mode", + "archived_at", + ), + Index( + "idx_extract_sessions_scope_updated", + "user_id", + "knowledge_graph_id", + "updated_at", + ), + CheckConstraint( + "mode IN ('schema_bootstrap', 'extraction_operations')", + name="ck_extract_sessions_mode", + ), + ) + diff --git a/src/api/extraction/infrastructure/repositories/__init__.py b/src/api/extraction/infrastructure/repositories/__init__.py new file mode 100644 index 000000000..e39627e27 --- /dev/null +++ b/src/api/extraction/infrastructure/repositories/__init__.py @@ -0,0 +1,8 @@ +"""Extraction infrastructure repositories.""" + +from extraction.infrastructure.repositories.agent_session_repository import ( + ExtractionAgentSessionRepository, +) + +__all__ = ["ExtractionAgentSessionRepository"] + diff --git a/src/api/extraction/infrastructure/repositories/agent_session_repository.py b/src/api/extraction/infrastructure/repositories/agent_session_repository.py new file mode 100644 index 000000000..156301e48 --- /dev/null +++ b/src/api/extraction/infrastructure/repositories/agent_session_repository.py @@ -0,0 +1,107 @@ +"""PostgreSQL repository for extraction agent sessions.""" + +from __future__ import annotations + +from sqlalchemy import desc, select +from sqlalchemy.ext.asyncio import AsyncSession + +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import ExtractionSessionMode +from extraction.infrastructure.models.agent_session import ExtractionAgentSessionModel +from extraction.ports.repositories import IExtractionAgentSessionRepository + + +class ExtractionAgentSessionRepository(IExtractionAgentSessionRepository): + """Persist and query extraction session records.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + + async def save(self, session: ExtractionAgentSession) -> None: + stmt = select(ExtractionAgentSessionModel).where( + ExtractionAgentSessionModel.id == session.id + ) + result = await self._session.execute(stmt) + model = result.scalar_one_or_none() + if model is None: + model = ExtractionAgentSessionModel( + id=session.id, + user_id=session.user_id, + knowledge_graph_id=session.knowledge_graph_id, + mode=session.mode.value, + message_history=session.message_history, + runtime_context=session.runtime_context, + created_at=session.created_at, + updated_at=session.updated_at, + archived_at=session.archived_at, + ) + self._session.add(model) + else: + model.message_history = session.message_history + model.runtime_context = session.runtime_context + model.updated_at = session.updated_at + model.archived_at = session.archived_at + await self._session.flush() + + async def get_by_id(self, session_id: str) -> ExtractionAgentSession | None: + stmt = select(ExtractionAgentSessionModel).where( + ExtractionAgentSessionModel.id == session_id + ) + result = await self._session.execute(stmt) + model = result.scalar_one_or_none() + if model is None: + return None + return self._to_domain(model) + + async def find_active_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> ExtractionAgentSession | None: + stmt = ( + select(ExtractionAgentSessionModel) + .where( + ExtractionAgentSessionModel.user_id == user_id, + ExtractionAgentSessionModel.knowledge_graph_id == knowledge_graph_id, + ExtractionAgentSessionModel.mode == mode.value, + ExtractionAgentSessionModel.archived_at.is_(None), + ) + .order_by(desc(ExtractionAgentSessionModel.updated_at)) + .limit(1) + ) + result = await self._session.execute(stmt) + model = result.scalar_one_or_none() + if model is None: + return None + return self._to_domain(model) + + async def list_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode | None = None, + ) -> list[ExtractionAgentSession]: + stmt = select(ExtractionAgentSessionModel).where( + ExtractionAgentSessionModel.user_id == user_id, + ExtractionAgentSessionModel.knowledge_graph_id == knowledge_graph_id, + ) + if mode is not None: + stmt = stmt.where(ExtractionAgentSessionModel.mode == mode.value) + stmt = stmt.order_by(desc(ExtractionAgentSessionModel.updated_at)) + result = await self._session.execute(stmt) + return [self._to_domain(model) for model in result.scalars().all()] + + def _to_domain(self, model: ExtractionAgentSessionModel) -> ExtractionAgentSession: + return ExtractionAgentSession( + id=model.id, + user_id=model.user_id, + knowledge_graph_id=model.knowledge_graph_id, + mode=ExtractionSessionMode(model.mode), + message_history=list(model.message_history or []), + runtime_context=dict(model.runtime_context or {}), + created_at=model.created_at, + updated_at=model.updated_at, + archived_at=model.archived_at, + ) + diff --git a/src/api/extraction/presentation/__init__.py b/src/api/extraction/presentation/__init__.py index a87e6bef8..62603fc2b 100644 --- a/src/api/extraction/presentation/__init__.py +++ b/src/api/extraction/presentation/__init__.py @@ -4,3 +4,12 @@ here as the bounded context expands. """ +from fastapi import APIRouter + +from extraction.presentation import routes + +router = APIRouter(prefix="/extraction", tags=["extraction"]) +router.include_router(routes.router) + +__all__ = ["router"] + diff --git a/src/api/extraction/presentation/models.py b/src/api/extraction/presentation/models.py new file mode 100644 index 000000000..995bf0136 --- /dev/null +++ b/src/api/extraction/presentation/models.py @@ -0,0 +1,47 @@ +"""Pydantic models for extraction session APIs.""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, Field + +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import ExtractionSessionMode + + +class ExtractionSessionResponse(BaseModel): + """API model for extraction session state.""" + + id: str + user_id: str + knowledge_graph_id: str + mode: ExtractionSessionMode + message_history: list[dict[str, Any]] = Field(default_factory=list) + runtime_context: dict[str, Any] = Field(default_factory=dict) + created_at: datetime + updated_at: datetime + archived_at: datetime | None = None + + @classmethod + def from_domain(cls, session: ExtractionAgentSession) -> "ExtractionSessionResponse": + return cls( + id=session.id, + user_id=session.user_id, + knowledge_graph_id=session.knowledge_graph_id, + mode=session.mode, + message_history=session.message_history, + runtime_context=session.runtime_context, + created_at=session.created_at, + updated_at=session.updated_at, + archived_at=session.archived_at, + ) + + +class ExtractionSessionListResponse(BaseModel): + """List response for scoped extraction sessions.""" + + sessions: list[ExtractionSessionResponse] + count: int + diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py new file mode 100644 index 000000000..af6bd2e99 --- /dev/null +++ b/src/api/extraction/presentation/routes.py @@ -0,0 +1,123 @@ +"""HTTP routes for extraction session lifecycle operations.""" + +from __future__ import annotations + +from typing import Annotated + +from fastapi import APIRouter, Depends, HTTPException, status + +from extraction.application import ExtractionAgentSessionService +from extraction.dependencies import get_extraction_agent_session_service +from extraction.domain.value_objects import ExtractionSessionMode +from extraction.presentation.models import ( + ExtractionSessionListResponse, + ExtractionSessionResponse, +) +from iam.application.value_objects import CurrentUser +from iam.dependencies.user import get_current_user +from infrastructure.authorization_dependencies import get_spicedb_client +from shared_kernel.authorization.protocols import AuthorizationProvider +from shared_kernel.authorization.types import ( + Permission, + ResourceType, + format_resource, + format_subject, +) + +router = APIRouter(tags=["extraction-sessions"]) + + +async def _assert_kg_edit_permission( + *, + authz: AuthorizationProvider, + current_user: CurrentUser, + knowledge_graph_id: str, +) -> None: + subject = format_subject(ResourceType.USER, current_user.user_id.value) + resource = format_resource(ResourceType.KNOWLEDGE_GRAPH, knowledge_graph_id) + allowed = await authz.check_permission(resource, Permission.EDIT, subject) + if not allowed: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="You do not have permission to perform this action", + ) + + +@router.get( + "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}/active", + response_model=ExtractionSessionResponse, +) +async def get_active_session( + knowledge_graph_id: str, + mode: ExtractionSessionMode, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[ + ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) + ], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], +) -> ExtractionSessionResponse: + await _assert_kg_edit_permission( + authz=authz, + current_user=current_user, + knowledge_graph_id=knowledge_graph_id, + ) + session = await service.get_or_create_active_session( + user_id=current_user.user_id.value, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + return ExtractionSessionResponse.from_domain(session) + + +@router.get( + "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}", + response_model=ExtractionSessionListResponse, +) +async def list_sessions( + knowledge_graph_id: str, + mode: ExtractionSessionMode, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[ + ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) + ], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], +) -> ExtractionSessionListResponse: + await _assert_kg_edit_permission( + authz=authz, + current_user=current_user, + knowledge_graph_id=knowledge_graph_id, + ) + sessions = await service.list_sessions( + user_id=current_user.user_id.value, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + payload = [ExtractionSessionResponse.from_domain(session) for session in sessions] + return ExtractionSessionListResponse(sessions=payload, count=len(payload)) + + +@router.post( + "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}/clear-chat", + response_model=ExtractionSessionResponse, +) +async def clear_chat( + knowledge_graph_id: str, + mode: ExtractionSessionMode, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[ + ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) + ], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], +) -> ExtractionSessionResponse: + await _assert_kg_edit_permission( + authz=authz, + current_user=current_user, + knowledge_graph_id=knowledge_graph_id, + ) + session = await service.clear_chat( + user_id=current_user.user_id.value, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + return ExtractionSessionResponse.from_domain(session) + diff --git a/src/api/infrastructure/migrations/versions/f7d8e9f0a1b2_create_extraction_agent_sessions_table.py b/src/api/infrastructure/migrations/versions/f7d8e9f0a1b2_create_extraction_agent_sessions_table.py new file mode 100644 index 000000000..bb8daa853 --- /dev/null +++ b/src/api/infrastructure/migrations/versions/f7d8e9f0a1b2_create_extraction_agent_sessions_table.py @@ -0,0 +1,75 @@ +"""create extraction_agent_sessions table + +Stores per-user/per-knowledge-graph/per-mode extraction sessions, including +chat history, runtime context, and archival timestamps used by Clear chat. + +Revision ID: f7d8e9f0a1b2 +Revises: f6c7d8e9f0a1 +Create Date: 2026-05-14 15:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + + +# revision identifiers, used by Alembic. +revision: str = "f7d8e9f0a1b2" +down_revision: Union[str, Sequence[str], None] = "f6c7d8e9f0a1" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Create extraction session table and scope indexes.""" + op.create_table( + "extraction_agent_sessions", + sa.Column("id", sa.String(length=26), nullable=False), + sa.Column("user_id", sa.String(length=255), nullable=False), + sa.Column("knowledge_graph_id", sa.String(length=26), nullable=False), + sa.Column("mode", sa.String(length=64), nullable=False), + sa.Column( + "message_history", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default=sa.text("'[]'::jsonb"), + ), + sa.Column( + "runtime_context", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default=sa.text("'{}'::jsonb"), + ), + sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("archived_at", sa.DateTime(timezone=True), nullable=True), + sa.CheckConstraint( + "mode IN ('schema_bootstrap', 'extraction_operations')", + name="ck_extract_sessions_mode", + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + "idx_extract_sessions_scope_active", + "extraction_agent_sessions", + ["user_id", "knowledge_graph_id", "mode", "archived_at"], + ) + op.create_index( + "idx_extract_sessions_scope_updated", + "extraction_agent_sessions", + ["user_id", "knowledge_graph_id", "updated_at"], + ) + + +def downgrade() -> None: + """Drop extraction session table and indexes.""" + op.drop_index( + "idx_extract_sessions_scope_updated", table_name="extraction_agent_sessions" + ) + op.drop_index( + "idx_extract_sessions_scope_active", table_name="extraction_agent_sessions" + ) + op.drop_table("extraction_agent_sessions") + diff --git a/src/api/main.py b/src/api/main.py index ffeeff0a4..d4deaa401 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -13,6 +13,7 @@ from graph.presentation import routes as graph_routes from iam.presentation import router as iam_router from management.presentation import router as management_router +from extraction.presentation import router as extraction_router from infrastructure.database.dependencies import ( close_database_engines, init_database_engines, @@ -563,6 +564,9 @@ async def kartograph_lifespan(app: FastAPI): # Include Management bounded context routes app.include_router(management_router) +# Include Extraction bounded context routes +app.include_router(extraction_router) + # Include dev utility routes (easy to remove for production) app.include_router(dev_routes.router) diff --git a/src/api/tests/unit/extraction/presentation/test_routes.py b/src/api/tests/unit/extraction/presentation/test_routes.py new file mode 100644 index 000000000..a59923026 --- /dev/null +++ b/src/api/tests/unit/extraction/presentation/test_routes.py @@ -0,0 +1,178 @@ +"""Unit tests for extraction session routes.""" + +from __future__ import annotations + +from dataclasses import replace +import pytest +from fastapi import FastAPI, status +from fastapi.testclient import TestClient + +from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.domain.entities.agent_session import ExtractionAgentSession +from iam.application.value_objects import CurrentUser +from iam.domain.value_objects import TenantId, UserId + + +class _InMemoryAgentSessionRepository: + def __init__(self) -> None: + self._sessions: dict[str, ExtractionAgentSession] = {} + + async def save(self, session: ExtractionAgentSession) -> None: + self._sessions[session.id] = replace(session) + + async def get_by_id(self, session_id: str) -> ExtractionAgentSession | None: + session = self._sessions.get(session_id) + return replace(session) if session else None + + async def find_active_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> ExtractionAgentSession | None: + for session in self._sessions.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.mode == mode + and session.archived_at is None + ): + return replace(session) + return None + + async def list_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode | None = None, + ) -> list[ExtractionAgentSession]: + rows = [ + replace(session) + for session in self._sessions.values() + if session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and (mode is None or session.mode == mode) + ] + return sorted(rows, key=lambda s: s.updated_at, reverse=True) + + +class _AllowAllAuthz: + async def check_permission(self, resource: str, permission: str, subject: str) -> bool: + return True + + async def write_relationship(self, resource: str, relation: str, subject: str) -> None: + return None + + async def write_relationships(self, relationships: list) -> None: + return None + + async def delete_relationship(self, resource: str, relation: str, subject: str) -> None: + return None + + async def delete_relationships(self, relationships: list) -> None: + return None + + async def delete_relationships_by_filter( + self, + resource_type: str, + resource_id: str | None = None, + relation: str | None = None, + subject_type: str | None = None, + subject_id: str | None = None, + ) -> None: + return None + + async def bulk_check_permission(self, requests: list) -> set[str]: + return set() + + async def lookup_subjects( + self, + resource: str, + relation: str, + subject_type: str, + optional_subject_relation: str | None = None, + ) -> list: + return [] + + async def lookup_resources( + self, + resource_type: str, + permission: str, + subject: str, + ) -> list[str]: + return [] + + async def read_relationships( + self, + resource_type: str, + resource_id: str | None = None, + relation: str | None = None, + subject_type: str | None = None, + subject_id: str | None = None, + ) -> list: + return [] + + +@pytest.fixture +def extraction_client(): + from extraction.dependencies import get_extraction_agent_session_service + from extraction.presentation import router + from iam.dependencies.user import get_current_user + from infrastructure.authorization_dependencies import get_spicedb_client + + app = FastAPI() + repo = _InMemoryAgentSessionRepository() + service = ExtractionAgentSessionService(repository=repo) + app.dependency_overrides[get_extraction_agent_session_service] = lambda: service + app.dependency_overrides[get_current_user] = lambda: CurrentUser( + user_id=UserId(value="user-123"), + username="alice", + tenant_id=TenantId(value="t1"), + ) + app.dependency_overrides[get_spicedb_client] = lambda: _AllowAllAuthz() + app.include_router(router) + return TestClient(app), service + + +class TestExtractionSessionRoutes: + def test_clear_chat_archives_old_session_and_returns_fresh_session( + self, extraction_client + ): + client, _ = extraction_client + active = client.get( + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/active" + ) + assert active.status_code == status.HTTP_200_OK + old_id = active.json()["id"] + + response = client.post( + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/clear-chat" + ) + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["id"] != old_id + assert payload["message_history"] == [] + assert payload["runtime_context"] == {} + + history_resp = client.get( + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations" + ) + assert history_resp.status_code == status.HTTP_200_OK + history = history_resp.json()["sessions"] + assert len(history) == 2 + assert any(row["id"] == old_id and row["archived_at"] is not None for row in history) + + def test_active_session_endpoint_returns_existing_active_session( + self, extraction_client + ): + client, _ = extraction_client + first = client.get( + "/extraction/knowledge-graphs/kg-999/sessions/schema_bootstrap/active" + ) + second = client.get( + "/extraction/knowledge-graphs/kg-999/sessions/schema_bootstrap/active" + ) + assert first.status_code == status.HTTP_200_OK + assert second.status_code == status.HTTP_200_OK + assert first.json()["id"] == second.json()["id"] + diff --git a/src/api/tests/unit/extraction/test_architecture.py b/src/api/tests/unit/extraction/test_architecture.py index f6e2fcb2c..6a58ac544 100644 --- a/src/api/tests/unit/extraction/test_architecture.py +++ b/src/api/tests/unit/extraction/test_architecture.py @@ -143,8 +143,13 @@ def test_presentation_does_not_import_other_contexts(self): class TestExtractionBoundedContextIsolation: def test_extraction_does_not_import_iam(self): ( - archrule("extraction_no_iam") - .match("extraction*") + archrule("extraction_inner_no_iam") + .match( + "extraction.domain*", + "extraction.ports*", + "extraction.application*", + "extraction.infrastructure*", + ) .should_not_import("iam*") .check("extraction") ) From a73fa36fdbf849cc3e70d39b6f4e4b2cdf84f4ee Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 03:02:13 -0400 Subject: [PATCH 013/153] feat(management): add data-source commit reference projection (#690) Persist clone-head, last-extraction baseline, and tracked-branch head commit references for data sources and expose them in management API responses for downstream ingestion and UI commit-status workflows. Co-authored-by: Cursor --- ...ommit_reference_columns_to_data_sources.py | 47 +++++++++++++++++++ .../domain/aggregates/data_source.py | 3 ++ .../infrastructure/models/data_source.py | 7 +++ .../repositories/data_source_repository.py | 13 +++++ .../presentation/data_sources/models.py | 24 ++++++++++ .../management/test_data_source_repository.py | 43 +++++++++++++++++ .../presentation/test_data_sources_routes.py | 12 +++++ .../tests/unit/management/test_data_source.py | 13 +++++ 8 files changed, 162 insertions(+) create mode 100644 src/api/infrastructure/migrations/versions/f8e9f0a1b2c3_add_commit_reference_columns_to_data_sources.py diff --git a/src/api/infrastructure/migrations/versions/f8e9f0a1b2c3_add_commit_reference_columns_to_data_sources.py b/src/api/infrastructure/migrations/versions/f8e9f0a1b2c3_add_commit_reference_columns_to_data_sources.py new file mode 100644 index 000000000..a3da811ac --- /dev/null +++ b/src/api/infrastructure/migrations/versions/f8e9f0a1b2c3_add_commit_reference_columns_to_data_sources.py @@ -0,0 +1,47 @@ +"""add commit reference columns to data_sources + +Adds commit reference tracking fields for Git-backed data sources: +- clone_head_commit +- last_extraction_baseline_commit +- tracked_branch_head_commit + +Revision ID: f8e9f0a1b2c3 +Revises: f7d8e9f0a1b2 +Create Date: 2026-05-14 16:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "f8e9f0a1b2c3" +down_revision: Union[str, Sequence[str], None] = "f7d8e9f0a1b2" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Add nullable commit-reference columns to data_sources.""" + op.add_column( + "data_sources", + sa.Column("clone_head_commit", sa.String(length=64), nullable=True), + ) + op.add_column( + "data_sources", + sa.Column("last_extraction_baseline_commit", sa.String(length=64), nullable=True), + ) + op.add_column( + "data_sources", + sa.Column("tracked_branch_head_commit", sa.String(length=64), nullable=True), + ) + + +def downgrade() -> None: + """Drop commit-reference columns from data_sources.""" + op.drop_column("data_sources", "tracked_branch_head_commit") + op.drop_column("data_sources", "last_extraction_baseline_commit") + op.drop_column("data_sources", "clone_head_commit") + diff --git a/src/api/management/domain/aggregates/data_source.py b/src/api/management/domain/aggregates/data_source.py index 431eb4e12..af30f166e 100644 --- a/src/api/management/domain/aggregates/data_source.py +++ b/src/api/management/domain/aggregates/data_source.py @@ -63,6 +63,9 @@ class DataSource: last_sync_at: datetime | None created_at: datetime updated_at: datetime + clone_head_commit: str | None = None + last_extraction_baseline_commit: str | None = None + tracked_branch_head_commit: str | None = None ontology: Ontology | None = None _pending_events: list[DomainEvent] = field(default_factory=list, repr=False) _probe: DataSourceProbe = field( diff --git a/src/api/management/infrastructure/models/data_source.py b/src/api/management/infrastructure/models/data_source.py index bbbc32e4d..c8b5da737 100644 --- a/src/api/management/infrastructure/models/data_source.py +++ b/src/api/management/infrastructure/models/data_source.py @@ -42,6 +42,13 @@ class DataSourceModel(Base, TimestampMixin): last_sync_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True ) + clone_head_commit: Mapped[str | None] = mapped_column(String(64), nullable=True) + last_extraction_baseline_commit: Mapped[str | None] = mapped_column( + String(64), nullable=True + ) + tracked_branch_head_commit: Mapped[str | None] = mapped_column( + String(64), nullable=True + ) ontology_json: Mapped[dict | None] = mapped_column(JSONB, nullable=True) __table_args__ = ( diff --git a/src/api/management/infrastructure/repositories/data_source_repository.py b/src/api/management/infrastructure/repositories/data_source_repository.py index 925623c95..de2f23ff6 100644 --- a/src/api/management/infrastructure/repositories/data_source_repository.py +++ b/src/api/management/infrastructure/repositories/data_source_repository.py @@ -80,6 +80,11 @@ async def save(self, data_source: DataSource) -> None: model.schedule_type = data_source.schedule.schedule_type.value model.schedule_value = data_source.schedule.value model.last_sync_at = data_source.last_sync_at + model.clone_head_commit = data_source.clone_head_commit + model.last_extraction_baseline_commit = ( + data_source.last_extraction_baseline_commit + ) + model.tracked_branch_head_commit = data_source.tracked_branch_head_commit model.updated_at = data_source.updated_at model.ontology_json = ontology_json else: @@ -94,6 +99,11 @@ async def save(self, data_source: DataSource) -> None: schedule_type=data_source.schedule.schedule_type.value, schedule_value=data_source.schedule.value, last_sync_at=data_source.last_sync_at, + clone_head_commit=data_source.clone_head_commit, + last_extraction_baseline_commit=( + data_source.last_extraction_baseline_commit + ), + tracked_branch_head_commit=data_source.tracked_branch_head_commit, ontology_json=ontology_json, created_at=data_source.created_at, updated_at=data_source.updated_at, @@ -207,5 +217,8 @@ def _to_domain(self, model: DataSourceModel) -> DataSource: last_sync_at=model.last_sync_at, created_at=model.created_at, updated_at=model.updated_at, + clone_head_commit=model.clone_head_commit, + last_extraction_baseline_commit=model.last_extraction_baseline_commit, + tracked_branch_head_commit=model.tracked_branch_head_commit, ontology=ontology, ) diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index 192f52c41..30a9a57e5 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -189,6 +189,15 @@ class DataSourceResponse(BaseModel): last_sync_at: datetime | None = Field( None, description="When the last sync completed" ) + clone_head_commit: str | None = Field( + None, description="Latest known commit in the local/ingested clone" + ) + last_extraction_baseline_commit: str | None = Field( + None, description="Commit used as baseline during the last extraction run" + ) + tracked_branch_head_commit: str | None = Field( + None, description="Latest known commit at the tracked source branch head" + ) created_at: datetime = Field(..., description="When the DS was created") updated_at: datetime = Field(..., description="When the DS was last updated") ontology: OntologyModel | None = Field( @@ -214,6 +223,9 @@ def from_domain(cls, ds: DataSource) -> DataSourceResponse: adapter_type=ds.adapter_type.value, schedule_type=ds.schedule.schedule_type.value, last_sync_at=ds.last_sync_at, + clone_head_commit=ds.clone_head_commit, + last_extraction_baseline_commit=ds.last_extraction_baseline_commit, + tracked_branch_head_commit=ds.tracked_branch_head_commit, created_at=ds.created_at, updated_at=ds.updated_at, ontology=( @@ -293,6 +305,15 @@ class DataSourceWithSyncResponse(BaseModel): last_sync_at: datetime | None = Field( None, description="When the last sync completed" ) + clone_head_commit: str | None = Field( + None, description="Latest known commit in the local/ingested clone" + ) + last_extraction_baseline_commit: str | None = Field( + None, description="Commit used as baseline during the last extraction run" + ) + tracked_branch_head_commit: str | None = Field( + None, description="Latest known commit at the tracked source branch head" + ) created_at: datetime = Field(..., description="When the DS was created") updated_at: datetime = Field(..., description="When the DS was last updated") ontology: OntologyModel | None = Field( @@ -325,6 +346,9 @@ def from_domain_pair( adapter_type=ds.adapter_type.value, schedule_type=ds.schedule.schedule_type.value, last_sync_at=ds.last_sync_at, + clone_head_commit=ds.clone_head_commit, + last_extraction_baseline_commit=ds.last_extraction_baseline_commit, + tracked_branch_head_commit=ds.tracked_branch_head_commit, created_at=ds.created_at, updated_at=ds.updated_at, ontology=( diff --git a/src/api/tests/integration/management/test_data_source_repository.py b/src/api/tests/integration/management/test_data_source_repository.py index 6699d192f..94815fa1c 100644 --- a/src/api/tests/integration/management/test_data_source_repository.py +++ b/src/api/tests/integration/management/test_data_source_repository.py @@ -112,6 +112,49 @@ async def test_saves_with_credentials_path( assert retrieved is not None assert retrieved.credentials_path == "vault://secrets/github" + @pytest.mark.asyncio + async def test_saves_and_retrieves_commit_references( + self, + data_source_repository: DataSourceRepository, + knowledge_graph_repository: KnowledgeGraphRepository, + async_session, + test_tenant: str, + test_workspace: str, + clean_management_data, + ): + """Should roundtrip Git commit reference tracking fields.""" + kg = KnowledgeGraph.create( + tenant_id=test_tenant, + workspace_id=test_workspace, + name="Test KG", + description="For DS commit reference tests", + ) + async with async_session.begin(): + await knowledge_graph_repository.save(kg) + + ds = DataSource.create( + knowledge_graph_id=kg.id.value, + tenant_id=test_tenant, + name="GitHub With Commits", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"repo": "org/repo"}, + ) + ds.clone_head_commit = "1111111111111111111111111111111111111111" + ds.last_extraction_baseline_commit = "2222222222222222222222222222222222222222" + ds.tracked_branch_head_commit = "3333333333333333333333333333333333333333" + + async with async_session.begin(): + await data_source_repository.save(ds) + + retrieved = await data_source_repository.get_by_id(ds.id) + assert retrieved is not None + assert retrieved.clone_head_commit == ds.clone_head_commit + assert ( + retrieved.last_extraction_baseline_commit + == ds.last_extraction_baseline_commit + ) + assert retrieved.tracked_branch_head_commit == ds.tracked_branch_head_commit + class TestDataSourceUpdate: """Tests for updating data sources.""" diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index fe2ad4ab0..7859ff453 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -69,6 +69,9 @@ def sample_data_source(mock_current_user: CurrentUser) -> DataSource: last_sync_at=None, created_at=now, updated_at=now, + clone_head_commit="1111111111111111111111111111111111111111", + last_extraction_baseline_commit="2222222222222222222222222222222222222222", + tracked_branch_head_commit="3333333333333333333333333333333333333333", ) @@ -134,6 +137,15 @@ def test_list_data_sources_returns_200( assert result[0]["id"] == sample_data_source.id.value assert result[0]["name"] == sample_data_source.name assert result[0]["adapter_type"] == sample_data_source.adapter_type.value + assert result[0]["clone_head_commit"] == sample_data_source.clone_head_commit + assert ( + result[0]["last_extraction_baseline_commit"] + == sample_data_source.last_extraction_baseline_commit + ) + assert ( + result[0]["tracked_branch_head_commit"] + == sample_data_source.tracked_branch_head_commit + ) def test_list_data_sources_returns_empty_list( self, diff --git a/src/api/tests/unit/management/test_data_source.py b/src/api/tests/unit/management/test_data_source.py index aa709e4b9..4912c364c 100644 --- a/src/api/tests/unit/management/test_data_source.py +++ b/src/api/tests/unit/management/test_data_source.py @@ -145,6 +145,19 @@ def test_create_sets_last_sync_at_to_none(self): ) assert ds.last_sync_at is None + def test_create_sets_commit_references_to_none(self): + """create() should default commit-reference tracking fields to None.""" + ds = DataSource.create( + knowledge_graph_id="kg-1", + tenant_id="t", + name="Source", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={}, + ) + assert ds.clone_head_commit is None + assert ds.last_extraction_baseline_commit is None + assert ds.tracked_branch_head_commit is None + def test_create_with_credentials_path(self): """create() should store optional credentials_path.""" ds = DataSource.create( From 296aff38a6b006bc4145e28999e699539a7509fb Mon Sep 17 00:00:00 2001 From: aredenba-rh Date: Tue, 26 May 2026 13:54:28 -0400 Subject: [PATCH 014/153] feat(ingestion): resolve baseline and tracked head at sync start (#691) Prepare Git-backed ingestion context by loading data-source commit references, refreshing tracked branch head, and passing baseline commit plus resolved credentials into the ingestion pipeline before packaging begins. Co-authored-by: Cursor # Conflicts: # src/api/ingestion/application/services/ingestion_service.py # src/api/ingestion/infrastructure/event_handler.py # src/api/ingestion/ports/services.py # src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py --- .../application/services/ingestion_service.py | 28 +++-- .../ingestion/infrastructure/event_handler.py | 2 + src/api/ingestion/ports/services.py | 4 + src/api/main.py | 106 ++++++++++++++++- .../application/test_ingestion_service.py | 27 +++++ .../test_ingestion_event_handler.py | 22 ++++ .../unit/test_sessioned_ingestion_handler.py | 107 ++++++++++++++++++ 7 files changed, 287 insertions(+), 9 deletions(-) create mode 100644 src/api/tests/unit/test_sessioned_ingestion_handler.py diff --git a/src/api/ingestion/application/services/ingestion_service.py b/src/api/ingestion/application/services/ingestion_service.py index d8fa626d0..489a10ef4 100644 --- a/src/api/ingestion/application/services/ingestion_service.py +++ b/src/api/ingestion/application/services/ingestion_service.py @@ -15,6 +15,7 @@ from shared_kernel.credential_reader import ICredentialReader from shared_kernel.job_package.builder import JobPackageBuilder from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, JobPackageId, SyncMode, ) @@ -58,6 +59,8 @@ async def run( connection_config: dict[str, str], credentials_path: str | None, tenant_id: str | None = None, + credentials: dict[str, str] | None = None, + baseline_commit: str | None = None, ) -> JobPackageId: """Run the ingestion pipeline for a data source sync. @@ -69,6 +72,9 @@ async def run( connection_config: Key-value adapter configuration credentials_path: Path for encrypted credentials tenant_id: Tenant ID for credential decryption scoping + credentials: Optional decrypted credentials prepared by caller + baseline_commit: Optional baseline commit SHA used to seed + incremental extraction checkpoint state Returns: The JobPackageId of the produced ZIP archive @@ -85,23 +91,29 @@ async def run( f"Registered adapters: {list(self._adapter_registry.keys())}" ) - credentials: dict[str, str] = {} - if credentials_path: + # Credentials are usually provided by the session-aware event wrapper. + resolved_credentials: dict[str, str] = dict(credentials or {}) + if not resolved_credentials and credentials_path: if not tenant_id: - raise ValueError( - "tenant_id is required when credentials_path is provided" - ) + raise ValueError("tenant_id is required when credentials_path is provided") if self._credential_reader is None: raise RuntimeError("credential_reader is not configured") - credentials = await self._credential_reader.retrieve( + resolved_credentials = await self._credential_reader.retrieve( credentials_path, tenant_id ) + checkpoint = None + if baseline_commit: + checkpoint = AdapterCheckpoint( + schema_version="1.0.0", + data={"commit_sha": baseline_commit}, + ) + # Extract raw items from the adapter using the new ExtractionResult API result = await adapter.extract( connection_config=connection_config, - credentials=credentials, - checkpoint=None, # no checkpoint support yet; always full refresh + credentials=resolved_credentials, + checkpoint=checkpoint, sync_mode=SyncMode.INCREMENTAL, ) diff --git a/src/api/ingestion/infrastructure/event_handler.py b/src/api/ingestion/infrastructure/event_handler.py index 0a9d02b63..e11aeaf2b 100644 --- a/src/api/ingestion/infrastructure/event_handler.py +++ b/src/api/ingestion/infrastructure/event_handler.py @@ -83,6 +83,8 @@ async def handle( connection_config=payload.get("connection_config", {}), credentials_path=payload.get("credentials_path"), tenant_id=payload.get("tenant_id"), + credentials=payload.get("credentials"), + baseline_commit=payload.get("baseline_commit"), ) except asyncio.CancelledError: # Propagate task cancellation so the event loop can shut down diff --git a/src/api/ingestion/ports/services.py b/src/api/ingestion/ports/services.py index c6306087f..6aee85417 100644 --- a/src/api/ingestion/ports/services.py +++ b/src/api/ingestion/ports/services.py @@ -23,6 +23,8 @@ async def run( connection_config: dict[str, str], credentials_path: str | None, tenant_id: str | None = None, + credentials: dict[str, str] | None = None, + baseline_commit: str | None = None, ) -> JobPackageId: """Run the ingestion pipeline. @@ -33,6 +35,8 @@ async def run( adapter_type: Which adapter to use (e.g. "github") connection_config: Adapter-specific connection configuration credentials_path: Optional Vault path for credentials + credentials: Optional decrypted credentials prepared upstream + baseline_commit: Optional commit SHA used as incremental baseline Returns: JobPackageId for the produced archive diff --git a/src/api/main.py b/src/api/main.py index d4deaa401..5fd81c50c 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -4,9 +4,11 @@ from contextlib import asynccontextmanager from pathlib import Path from typing import Any +from urllib.parse import urlparse from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware +import httpx from util import dev_routes import health_routes @@ -25,6 +27,7 @@ get_cors_settings, get_database_settings, get_iam_settings, + get_management_settings, get_oidc_settings, get_outbox_worker_settings, get_spicedb_settings, @@ -136,13 +139,80 @@ def __init__(self, session_factory: Any) -> None: def supported_event_types(self) -> frozenset[str]: return self._SUPPORTED + @staticmethod + def _parse_github_connection_config( + config: dict[str, str], + ) -> tuple[str, str, str]: + """Parse GitHub config into owner/repo/branch.""" + if "repo_url" in config: + parsed = urlparse(config["repo_url"]) + path_parts = [part for part in parsed.path.split("/") if part] + if len(path_parts) < 2: + raise ValueError("repo_url must include owner and repo") + owner = path_parts[0] + repo = path_parts[1].removesuffix(".git") + branch = config.get("branch", "main") + if len(path_parts) >= 4 and path_parts[2] == "tree": + branch = path_parts[3] + return owner, repo, branch + + if "owner" in config and "repo" in config: + return config["owner"], config["repo"], config.get("branch", "main") + + raise ValueError( + "connection_config must include either 'repo_url' or 'owner'+'repo' keys" + ) + + async def _resolve_github_tracked_head_commit( + self, + connection_config: dict[str, str], + credentials: dict[str, str], + ) -> str | None: + """Resolve latest tracked branch head commit for GitHub sources.""" + try: + owner, repo, branch = self._parse_github_connection_config(connection_config) + except ValueError: + return None + + headers = { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + token = credentials.get("token") or credentials.get("access_token") + if token: + headers["Authorization"] = f"Bearer {token}" + + url = f"https://api.github.com/repos/{owner}/{repo}/branches/{branch}" + async with httpx.AsyncClient(timeout=20.0) as client: + response = await client.get(url, headers=headers) + response.raise_for_status() + payload = response.json() + sha = payload.get("commit", {}).get("sha") + return str(sha) if sha else None + async def handle(self, event_type: str, payload: dict[str, Any]) -> None: from infrastructure.outbox.repository import OutboxRepository from ingestion.application.services.ingestion_service import IngestionService from ingestion.infrastructure.event_handler import IngestionEventHandler + from management.domain.value_objects import DataSourceId + from management.infrastructure.repositories.data_source_repository import ( + DataSourceRepository, + ) + from management.infrastructure.repositories.fernet_secret_store import ( + FernetSecretStore, + ) async with self._session_factory() as session: outbox = OutboxRepository(session=session) + ds_repo = DataSourceRepository(session=session, outbox=outbox) + management_settings = get_management_settings() + encryption_keys = management_settings.encryption_key.get_secret_value().split( + "," + ) + credential_reader = FernetSecretStore( + session=session, + encryption_keys=encryption_keys, + ) from ingestion.infrastructure.adapters.github import GitHubAdapter from infrastructure.settings import get_management_settings from management.infrastructure.repositories.fernet_secret_store import ( @@ -177,7 +247,41 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: ingestion_service=ingestion_service, outbox=outbox, ) - await ingestion_handler.handle(event_type, payload) + enriched_payload = dict(payload) + + data_source_id = str(payload.get("data_source_id", "")) + tenant_id = str(payload.get("tenant_id", "")) if payload.get("tenant_id") else "" + adapter_type = str(payload.get("adapter_type", "")) + if data_source_id and adapter_type == "github": + ds = await ds_repo.get_by_id(DataSourceId(value=data_source_id)) + if ds is not None: + if ds.last_extraction_baseline_commit: + enriched_payload["baseline_commit"] = ( + ds.last_extraction_baseline_commit + ) + + credentials: dict[str, str] = {} + if ds.credentials_path and tenant_id: + try: + credentials = await credential_reader.retrieve( + path=ds.credentials_path, + tenant_id=tenant_id, + ) + except KeyError: + credentials = {} + if credentials: + enriched_payload["credentials"] = credentials + + tracked_head = await self._resolve_github_tracked_head_commit( + connection_config=ds.connection_config, + credentials=credentials, + ) + if tracked_head: + enriched_payload["tracked_branch_head_commit"] = tracked_head + ds.tracked_branch_head_commit = tracked_head + await ds_repo.save(ds) + + await ingestion_handler.handle(event_type, enriched_payload) await session.commit() diff --git a/src/api/tests/unit/ingestion/application/test_ingestion_service.py b/src/api/tests/unit/ingestion/application/test_ingestion_service.py index 5329e0e26..9d5be9cd8 100644 --- a/src/api/tests/unit/ingestion/application/test_ingestion_service.py +++ b/src/api/tests/unit/ingestion/application/test_ingestion_service.py @@ -57,6 +57,8 @@ def __init__( ) -> None: self._result = result self._fail = fail + self.last_checkpoint: AdapterCheckpoint | None = None + self.last_credentials: dict[str, str] | None = None async def extract( self, @@ -65,6 +67,8 @@ async def extract( checkpoint: AdapterCheckpoint | None, sync_mode: SyncMode, ) -> ExtractionResult: + self.last_checkpoint = checkpoint + self.last_credentials = credentials if self._fail: raise RuntimeError("credentials expired") if self._result is not None: @@ -178,3 +182,26 @@ async def test_run_handles_empty_changeset(self): credentials_path=None, ) assert isinstance(job_id, JobPackageId) + + async def test_run_uses_baseline_commit_as_checkpoint(self): + """run() should convert baseline_commit into an adapter checkpoint.""" + result = _make_extraction_result() + adapter = _FakeAdapter(result=result) + registry: dict[str, IDatasourceAdapter] = {"github": adapter} + with tempfile.TemporaryDirectory() as tmpdir: + service = IngestionService( + adapter_registry=registry, + work_dir=Path(tmpdir), + ) + await service.run( + sync_run_id="run-001", + data_source_id="ds-001", + knowledge_graph_id="kg-001", + adapter_type="github", + connection_config={"repo": "org/repo"}, + credentials_path=None, + baseline_commit="abc123", + ) + + assert adapter.last_checkpoint is not None + assert adapter.last_checkpoint.data == {"commit_sha": "abc123"} diff --git a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py index 83716bbcf..37560027c 100644 --- a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py +++ b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py @@ -67,6 +67,8 @@ async def run( connection_config: dict[str, str], credentials_path: str | None, tenant_id: str | None = None, + credentials: dict[str, str] | None = None, + baseline_commit: str | None = None, ) -> JobPackageId: self.calls.append( { @@ -74,6 +76,8 @@ async def run( "data_source_id": data_source_id, "knowledge_graph_id": knowledge_graph_id, "adapter_type": adapter_type, + "credentials": credentials, + "baseline_commit": baseline_commit, } ) if self._fail: @@ -150,6 +154,22 @@ async def test_runs_ingestion_on_sync_started( assert call["sync_run_id"] == "run-001" assert call["adapter_type"] == "github" + async def test_passes_baseline_and_credentials_through_payload( + self, + handler: IngestionEventHandler, + ingestion_service: _FakeIngestionService, + ): + """SyncStarted payload baseline/credentials should pass to service.run().""" + payload = _sync_started_payload() + payload["baseline_commit"] = "abc123" + payload["credentials"] = {"token": "secret"} + + await handler.handle("SyncStarted", payload) + + call = ingestion_service.calls[0] + assert call["baseline_commit"] == "abc123" + assert call["credentials"] == {"token": "secret"} + async def test_emits_job_package_produced_on_success( self, handler: IngestionEventHandler, @@ -296,6 +316,8 @@ async def run( # type: ignore[override] connection_config: dict[str, str], credentials_path: str | None, tenant_id: str | None = None, + credentials: dict[str, str] | None = None, + baseline_commit: str | None = None, ) -> JobPackageId: raise asyncio.CancelledError() diff --git a/src/api/tests/unit/test_sessioned_ingestion_handler.py b/src/api/tests/unit/test_sessioned_ingestion_handler.py new file mode 100644 index 000000000..962873c94 --- /dev/null +++ b/src/api/tests/unit/test_sessioned_ingestion_handler.py @@ -0,0 +1,107 @@ +"""Unit tests for session-aware ingestion event context preparation.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from management.domain.aggregates import DataSource +from management.domain.value_objects import DataSourceId, Schedule, ScheduleType +from shared_kernel.datasource_types import DataSourceAdapterType + + +def _make_session_factory(session): + ctx = MagicMock() + ctx.__aenter__ = AsyncMock(return_value=session) + ctx.__aexit__ = AsyncMock(return_value=False) + factory = MagicMock(return_value=ctx) + return factory + + +def _make_data_source() -> DataSource: + now = datetime.now(UTC) + return DataSource( + id=DataSourceId(value="01JTESTSESSIONHANDLERDATA00"), + knowledge_graph_id="01JTESTSESSIONHANDLERKG0000", + tenant_id="tenant-001", + name="GitHub Source", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"owner": "org", "repo": "repo", "branch": "main"}, + credentials_path="datasource/01JTESTSESSIONHANDLERDATA00/credentials", + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + created_at=now, + updated_at=now, + last_extraction_baseline_commit="baseline123", + ) + + +@pytest.mark.asyncio +async def test_sessioned_ingestion_handler_prepares_commit_context(): + """Wrapper should inject baseline/credentials and refresh tracked head.""" + from main import _SessionedIngestionEventHandler + + session = AsyncMock() + session_factory = _make_session_factory(session) + handler = _SessionedIngestionEventHandler(session_factory=session_factory) + handler._resolve_github_tracked_head_commit = AsyncMock(return_value="head456") # type: ignore[attr-defined] + + outbox_repo = MagicMock() + ds_repo = MagicMock() + secret_store = MagicMock() + ingestion_handler = MagicMock() + ingestion_handler.handle = AsyncMock() + ingestion_service = MagicMock() + + data_source = _make_data_source() + ds_repo.get_by_id = AsyncMock(return_value=data_source) + ds_repo.save = AsyncMock() + secret_store.retrieve = AsyncMock(return_value={"token": "tok"}) + + payload = { + "sync_run_id": "run-001", + "data_source_id": data_source.id.value, + "knowledge_graph_id": data_source.knowledge_graph_id, + "tenant_id": data_source.tenant_id, + "adapter_type": "github", + "connection_config": data_source.connection_config, + "credentials_path": data_source.credentials_path, + } + + management_settings = MagicMock() + management_settings.encryption_key.get_secret_value.return_value = ( + "WlAwWU83a2hSODl2SVY4MHBzQWpwaDBSUHhOU3NfQ3R6aXpvNTJfNE5odz0=" + ) + + with ( + patch("infrastructure.outbox.repository.OutboxRepository", return_value=outbox_repo), + patch( + "management.infrastructure.repositories.data_source_repository.DataSourceRepository", + return_value=ds_repo, + ), + patch( + "management.infrastructure.repositories.fernet_secret_store.FernetSecretStore", + return_value=secret_store, + ), + patch( + "ingestion.application.services.ingestion_service.IngestionService", + return_value=ingestion_service, + ), + patch( + "ingestion.infrastructure.event_handler.IngestionEventHandler", + return_value=ingestion_handler, + ), + patch("main.get_management_settings", return_value=management_settings), + ): + await handler.handle("SyncStarted", payload) + + ingestion_handler.handle.assert_called_once() + call_payload = ingestion_handler.handle.call_args.args[1] + assert call_payload["baseline_commit"] == "baseline123" + assert call_payload["tracked_branch_head_commit"] == "head456" + assert call_payload["credentials"] == {"token": "tok"} + ds_repo.save.assert_awaited_once() + assert data_source.tracked_branch_head_commit == "head456" + From 8ebe2044eaaf8504a5325e0ca1fd03890f4de660 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 10:18:36 -0400 Subject: [PATCH 015/153] feat(ingestion): short-circuit sync when no new commit exists (#692) Skip heavy extraction when tracked branch head equals the last extraction baseline by emitting a completed lifecycle event and recording an explicit no-change audit log entry on the sync run. Co-authored-by: Cursor --- .../ingestion/infrastructure/event_handler.py | 16 +++++ src/api/main.py | 7 ++ .../infrastructure/sync_lifecycle_handler.py | 5 ++ .../test_ingestion_event_handler.py | 21 ++++++ .../test_sync_lifecycle_handler.py | 42 ++++++++++++ .../unit/test_sessioned_ingestion_handler.py | 65 +++++++++++++++++++ 6 files changed, 156 insertions(+) diff --git a/src/api/ingestion/infrastructure/event_handler.py b/src/api/ingestion/infrastructure/event_handler.py index e11aeaf2b..27ea29e5f 100644 --- a/src/api/ingestion/infrastructure/event_handler.py +++ b/src/api/ingestion/infrastructure/event_handler.py @@ -74,6 +74,22 @@ async def handle( knowledge_graph_id = payload["knowledge_graph_id"] now = datetime.now(UTC) + if payload.get("no_changes_detected") is True: + await self._outbox.append( + event_type="MutationsApplied", + payload={ + "sync_run_id": sync_run_id, + "data_source_id": data_source_id, + "knowledge_graph_id": knowledge_graph_id, + "no_changes_detected": True, + "occurred_at": now.isoformat(), + }, + occurred_at=now, + aggregate_type="sync_run", + aggregate_id=sync_run_id, + ) + return + try: job_package_id = await self._ingestion_service.run( sync_run_id=sync_run_id, diff --git a/src/api/main.py b/src/api/main.py index 5fd81c50c..47629ace1 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -280,6 +280,13 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: enriched_payload["tracked_branch_head_commit"] = tracked_head ds.tracked_branch_head_commit = tracked_head await ds_repo.save(ds) + baseline_commit = enriched_payload.get("baseline_commit") + if ( + isinstance(baseline_commit, str) + and baseline_commit + and baseline_commit == tracked_head + ): + enriched_payload["no_changes_detected"] = True await ingestion_handler.handle(event_type, enriched_payload) await session.commit() diff --git a/src/api/management/infrastructure/sync_lifecycle_handler.py b/src/api/management/infrastructure/sync_lifecycle_handler.py index 5817f6cbb..c33ee1d65 100644 --- a/src/api/management/infrastructure/sync_lifecycle_handler.py +++ b/src/api/management/infrastructure/sync_lifecycle_handler.py @@ -123,6 +123,11 @@ async def handle( sync_run.status = "completed" sync_run.completed_at = now sync_run.logs.append(f"[{now.isoformat()}] Sync completed") + if payload.get("no_changes_detected") is True: + sync_run.logs.append( + f"[{now.isoformat()}] No source changes were detected; " + "heavy extraction was short-circuited." + ) if sync_run.mutation_log_run is not None: sync_run.mutation_log_run.completed_at = now if payload.get("token_usage_total") is not None: diff --git a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py index 37560027c..408d02bb6 100644 --- a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py +++ b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py @@ -199,6 +199,27 @@ async def test_job_package_produced_aggregate_type( assert event["aggregate_type"] == "sync_run" assert event["aggregate_id"] == "run-001" + async def test_short_circuits_when_no_changes_detected( + self, + handler: IngestionEventHandler, + ingestion_service: _FakeIngestionService, + outbox: _FakeOutboxRepository, + ): + """When no_changes_detected is true, heavy ingestion is skipped.""" + payload = _sync_started_payload(sync_run_id="run-004") + payload["no_changes_detected"] = True + payload["tracked_branch_head_commit"] = "abc123" + payload["baseline_commit"] = "abc123" + + await handler.handle("SyncStarted", payload) + + assert ingestion_service.calls == [] + assert len(outbox.appended) == 1 + event = outbox.appended[0] + assert event["event_type"] == "MutationsApplied" + assert event["payload"]["sync_run_id"] == "run-004" + assert event["payload"]["no_changes_detected"] is True + @pytest.mark.asyncio class TestIngestionEventHandlerFailure: diff --git a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py index edc049ecc..035afd82e 100644 --- a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py +++ b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py @@ -392,6 +392,48 @@ async def test_mutations_applied_updates_data_source_last_sync_at( saved_ds = mock_ds_repo.save.call_args[0][0] assert saved_ds.last_sync_at is not None + async def test_mutations_applied_logs_no_changes_short_circuit( + self, + handler: SyncLifecycleHandler, + mock_sync_run_repo: AsyncMock, + mock_ds_repo: AsyncMock, + ): + """No-change short-circuit should leave an explicit audit log entry.""" + from management.domain.aggregates import DataSource + from management.domain.value_objects import DataSourceId, Schedule, ScheduleType + from shared_kernel.datasource_types import DataSourceAdapterType + + run = _make_sync_run(status="ingesting") + mock_sync_run_repo.get_by_id.return_value = run + + now = datetime.now(UTC) + ds = DataSource( + id=DataSourceId(value="ds-001"), + knowledge_graph_id="kg-001", + tenant_id="tenant-001", + name="My DS", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={}, + credentials_path=None, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + created_at=now, + updated_at=now, + ) + mock_ds_repo.get_by_id.return_value = ds + + await handler.handle( + "MutationsApplied", + _payload( + sync_run_id=run.id, + knowledge_graph_id="kg-001", + no_changes_detected=True, + ), + ) + + saved_run: DataSourceSyncRun = mock_sync_run_repo.save.call_args[0][0] + assert any("No source changes were detected" in line for line in saved_run.logs) + @pytest.mark.asyncio class TestMutationApplicationFailedTransition: diff --git a/src/api/tests/unit/test_sessioned_ingestion_handler.py b/src/api/tests/unit/test_sessioned_ingestion_handler.py index 962873c94..53817275e 100644 --- a/src/api/tests/unit/test_sessioned_ingestion_handler.py +++ b/src/api/tests/unit/test_sessioned_ingestion_handler.py @@ -105,3 +105,68 @@ async def test_sessioned_ingestion_handler_prepares_commit_context(): ds_repo.save.assert_awaited_once() assert data_source.tracked_branch_head_commit == "head456" + +@pytest.mark.asyncio +async def test_sessioned_ingestion_handler_sets_no_changes_flag_when_heads_match(): + """Wrapper should short-circuit when tracked head equals baseline.""" + from main import _SessionedIngestionEventHandler + + session = AsyncMock() + session_factory = _make_session_factory(session) + handler = _SessionedIngestionEventHandler(session_factory=session_factory) + handler._resolve_github_tracked_head_commit = AsyncMock(return_value="baseline123") # type: ignore[attr-defined] + + outbox_repo = MagicMock() + ds_repo = MagicMock() + secret_store = MagicMock() + ingestion_handler = MagicMock() + ingestion_handler.handle = AsyncMock() + ingestion_service = MagicMock() + + data_source = _make_data_source() + ds_repo.get_by_id = AsyncMock(return_value=data_source) + ds_repo.save = AsyncMock() + secret_store.retrieve = AsyncMock(return_value={"token": "tok"}) + + payload = { + "sync_run_id": "run-002", + "data_source_id": data_source.id.value, + "knowledge_graph_id": data_source.knowledge_graph_id, + "tenant_id": data_source.tenant_id, + "adapter_type": "github", + "connection_config": data_source.connection_config, + "credentials_path": data_source.credentials_path, + } + + management_settings = MagicMock() + management_settings.encryption_key.get_secret_value.return_value = ( + "WlAwWU83a2hSODl2SVY4MHBzQWpwaDBSUHhOU3NfQ3R6aXpvNTJfNE5odz0=" + ) + + with ( + patch("infrastructure.outbox.repository.OutboxRepository", return_value=outbox_repo), + patch( + "management.infrastructure.repositories.data_source_repository.DataSourceRepository", + return_value=ds_repo, + ), + patch( + "management.infrastructure.repositories.fernet_secret_store.FernetSecretStore", + return_value=secret_store, + ), + patch( + "ingestion.application.services.ingestion_service.IngestionService", + return_value=ingestion_service, + ), + patch( + "ingestion.infrastructure.event_handler.IngestionEventHandler", + return_value=ingestion_handler, + ), + patch("main.get_management_settings", return_value=management_settings), + ): + await handler.handle("SyncStarted", payload) + + call_payload = ingestion_handler.handle.call_args.args[1] + assert call_payload["baseline_commit"] == "baseline123" + assert call_payload["tracked_branch_head_commit"] == "baseline123" + assert call_payload["no_changes_detected"] is True + From 3c71c939411150c677e63e8e699d58bf99687739 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 10:25:27 -0400 Subject: [PATCH 016/153] feat(management): add baseline-vs-head diff summary endpoint (#693) Expose a data-source diff summary API that compares the last extraction baseline to tracked branch head and returns aggregate counts plus a large-list-safe changed-file preview for maintenance decisions. Co-authored-by: Cursor --- .../management/dependencies/data_source.py | 18 +++ .../git_diff_summary_service.py | 143 ++++++++++++++++++ .../presentation/data_sources/models.py | 36 +++++ .../presentation/data_sources/routes.py | 55 ++++++- .../test_git_diff_summary_service.py | 95 ++++++++++++ .../presentation/test_data_sources_routes.py | 68 +++++++++ 6 files changed, 414 insertions(+), 1 deletion(-) create mode 100644 src/api/management/infrastructure/git_diff_summary_service.py create mode 100644 src/api/tests/unit/management/infrastructure/test_git_diff_summary_service.py diff --git a/src/api/management/dependencies/data_source.py b/src/api/management/dependencies/data_source.py index c0d6a2765..6133ed673 100644 --- a/src/api/management/dependencies/data_source.py +++ b/src/api/management/dependencies/data_source.py @@ -17,6 +17,7 @@ from infrastructure.settings import get_management_settings from management.application.observability import DefaultDataSourceServiceProbe from management.application.services.data_source_service import DataSourceService +from management.infrastructure.git_diff_summary_service import GitDiffSummaryService from management.infrastructure.repositories import ( DataSourceRepository, DataSourceSyncRunRepository, @@ -78,3 +79,20 @@ def get_data_source_service( scope_to_tenant=current_user.tenant_id.value, probe=DefaultDataSourceServiceProbe(), ) + + +def get_git_diff_summary_service( + session: Annotated[AsyncSession, Depends(get_write_session)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> GitDiffSummaryService: + """Get GitDiffSummaryService for commit-baseline file diff summaries.""" + settings = get_management_settings() + encryption_keys = settings.encryption_key.get_secret_value().split(",") + secret_store = FernetSecretStore( + session=session, + encryption_keys=encryption_keys, + ) + return GitDiffSummaryService( + credential_reader=secret_store, + tenant_id=current_user.tenant_id.value, + ) diff --git a/src/api/management/infrastructure/git_diff_summary_service.py b/src/api/management/infrastructure/git_diff_summary_service.py new file mode 100644 index 000000000..2a270bfaa --- /dev/null +++ b/src/api/management/infrastructure/git_diff_summary_service.py @@ -0,0 +1,143 @@ +"""Git-backed diff summary service for data-source maintenance cues.""" + +from __future__ import annotations + +from dataclasses import dataclass +from urllib.parse import urlparse + +import httpx + +from management.domain.aggregates import DataSource +from shared_kernel.credential_reader import ICredentialReader +from shared_kernel.datasource_types import DataSourceAdapterType + + +@dataclass(frozen=True) +class DiffSummaryResult: + """Aggregate + file-level diff summary between baseline and tracked head.""" + + baseline_commit: str | None + tracked_head_commit: str | None + total_changed_files: int + added_count: int + modified_count: int + removed_count: int + renamed_count: int + files_truncated: bool + changed_files: tuple[dict[str, str], ...] + + +class GitDiffSummaryService: + """Build a Git commit diff summary for a data source.""" + + def __init__( + self, + credential_reader: ICredentialReader, + tenant_id: str, + http_client: httpx.AsyncClient | None = None, + ) -> None: + self._credential_reader = credential_reader + self._tenant_id = tenant_id + self._http_client = http_client + + @staticmethod + def _parse_github_connection_config(config: dict[str, str]) -> tuple[str, str]: + if "repo_url" in config: + parsed = urlparse(config["repo_url"]) + path_parts = [part for part in parsed.path.split("/") if part] + if len(path_parts) < 2: + raise ValueError("repo_url must include owner and repo") + owner = path_parts[0] + repo = path_parts[1].removesuffix(".git") + return owner, repo + + if "owner" in config and "repo" in config: + return config["owner"], config["repo"] + + raise ValueError( + "connection_config must include either 'repo_url' or 'owner'+'repo' keys" + ) + + async def build_summary( + self, + *, + data_source: DataSource, + max_files: int, + ) -> DiffSummaryResult: + """Compute changed-file summary from baseline commit to tracked head.""" + baseline = data_source.last_extraction_baseline_commit + tracked = data_source.tracked_branch_head_commit + if ( + data_source.adapter_type != DataSourceAdapterType.GITHUB + or not baseline + or not tracked + or baseline == tracked + ): + return DiffSummaryResult( + baseline_commit=baseline, + tracked_head_commit=tracked, + total_changed_files=0, + added_count=0, + modified_count=0, + removed_count=0, + renamed_count=0, + files_truncated=False, + changed_files=(), + ) + + owner, repo = self._parse_github_connection_config(data_source.connection_config) + credentials: dict[str, str] = {} + if data_source.credentials_path: + try: + credentials = await self._credential_reader.retrieve( + path=data_source.credentials_path, + tenant_id=self._tenant_id, + ) + except KeyError: + credentials = {} + + headers = { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + token = credentials.get("token") or credentials.get("access_token") + if token: + headers["Authorization"] = f"Bearer {token}" + + url = f"https://api.github.com/repos/{owner}/{repo}/compare/{baseline}...{tracked}" + client = self._http_client or httpx.AsyncClient(timeout=30.0) + try: + response = await client.get(url, headers=headers) + response.raise_for_status() + payload = response.json() + finally: + if self._http_client is None: + await client.aclose() + + files: list[dict[str, str]] = [] + counts = {"added": 0, "modified": 0, "removed": 0, "renamed": 0} + for file in payload.get("files", []): + status = str(file.get("status", "modified")) + if status in counts: + counts[status] += 1 + files.append( + { + "path": str(file.get("filename", "")), + "status": status, + } + ) + + files_truncated = len(files) > max_files + visible_files = tuple(files[:max_files]) + return DiffSummaryResult( + baseline_commit=baseline, + tracked_head_commit=tracked, + total_changed_files=len(files), + added_count=counts["added"], + modified_count=counts["modified"], + removed_count=counts["removed"], + renamed_count=counts["renamed"], + files_truncated=files_truncated, + changed_files=visible_files, + ) + diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index 30a9a57e5..c9c385811 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -245,6 +245,42 @@ class SyncRunLogsResponse(BaseModel): ) +class DiffChangedFileResponse(BaseModel): + """Single changed file entry in a commit diff summary.""" + + path: str = Field(..., description="Repository-relative file path") + status: str = Field( + ..., + description="GitHub compare status (added, modified, removed, renamed, ...)", + ) + + +class DataSourceDiffSummaryResponse(BaseModel): + """Response model for baseline-vs-tracked commit diff summary.""" + + baseline_commit: str | None = Field( + None, + description="Commit baseline used for the previous extraction", + ) + tracked_head_commit: str | None = Field( + None, + description="Latest tracked branch head commit used for comparison", + ) + total_changed_files: int = Field(..., description="Total changed files in compare") + added_count: int = Field(..., description="Number of files added") + modified_count: int = Field(..., description="Number of files modified") + removed_count: int = Field(..., description="Number of files removed") + renamed_count: int = Field(..., description="Number of files renamed") + files_truncated: bool = Field( + ..., + description="True when changed_files is truncated by max_files", + ) + changed_files: list[DiffChangedFileResponse] = Field( + default_factory=list, + description="Changed-file entries, bounded by max_files", + ) + + class SyncRunResponse(BaseModel): """Response model for a data source sync run.""" diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index c268f9a7c..e969a5cf2 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -4,19 +4,22 @@ from typing import Annotated -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, Query, status from iam.application.value_objects import CurrentUser from iam.dependencies.user import get_current_user from management.application.services.data_source_service import DataSourceService from management.dependencies.data_source import ( get_data_source_service, + get_git_diff_summary_service, get_sync_run_repository, ) +from management.infrastructure.git_diff_summary_service import GitDiffSummaryService from management.ports.exceptions import UnauthorizedError from management.ports.repositories import IDataSourceSyncRunRepository from management.presentation.data_sources.models import ( CreateDataSourceRequest, + DataSourceDiffSummaryResponse, DataSourceListResponse, DataSourceResponse, DataSourceWithSyncResponse, @@ -29,6 +32,56 @@ router = APIRouter(tags=["data-sources"]) +@router.get( + "/data-sources/{ds_id}/diff-summary", + status_code=status.HTTP_200_OK, + summary="Get commit diff summary for a data source", +) +async def get_diff_summary( + ds_id: str, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[DataSourceService, Depends(get_data_source_service)], + diff_service: Annotated[ + GitDiffSummaryService, Depends(get_git_diff_summary_service) + ], + max_files: int = Query(default=200, ge=1, le=2000), +) -> DataSourceDiffSummaryResponse: + """Return baseline-vs-tracked diff summary for maintenance readiness cues.""" + try: + ds = await service.get( + user_id=current_user.user_id.value, + ds_id=ds_id, + ) + if ds is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Data source not found", + ) + + summary = await diff_service.build_summary( + data_source=ds, + max_files=max_files, + ) + return DataSourceDiffSummaryResponse( + baseline_commit=summary.baseline_commit, + tracked_head_commit=summary.tracked_head_commit, + total_changed_files=summary.total_changed_files, + added_count=summary.added_count, + modified_count=summary.modified_count, + removed_count=summary.removed_count, + renamed_count=summary.renamed_count, + files_truncated=summary.files_truncated, + changed_files=list(summary.changed_files), + ) + except HTTPException: + raise + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to build diff summary", + ) + + @router.get( "/data-sources", status_code=status.HTTP_200_OK, diff --git a/src/api/tests/unit/management/infrastructure/test_git_diff_summary_service.py b/src/api/tests/unit/management/infrastructure/test_git_diff_summary_service.py new file mode 100644 index 000000000..3e871fd3d --- /dev/null +++ b/src/api/tests/unit/management/infrastructure/test_git_diff_summary_service.py @@ -0,0 +1,95 @@ +"""Unit tests for GitDiffSummaryService.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +import httpx +import pytest + +from management.domain.aggregates import DataSource +from management.domain.value_objects import DataSourceId, Schedule, ScheduleType +from management.infrastructure.git_diff_summary_service import GitDiffSummaryService +from shared_kernel.datasource_types import DataSourceAdapterType + + +class _FakeCredentialReader: + def __init__(self, credentials: dict[str, str] | None = None) -> None: + self._credentials = credentials or {} + + async def retrieve(self, path: str, tenant_id: str) -> dict[str, str]: + return dict(self._credentials) + + +def _make_data_source( + *, + baseline: str | None = "aaaa", + tracked: str | None = "bbbb", +) -> DataSource: + now = datetime.now(UTC) + return DataSource( + id=DataSourceId(value="01JTESTDIFFSUMMARYSOURCE000"), + knowledge_graph_id="01JTESTDIFFSUMMARYKG0000000", + tenant_id="tenant-001", + name="GitHub DS", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"owner": "org", "repo": "repo", "branch": "main"}, + credentials_path=None, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + created_at=now, + updated_at=now, + last_extraction_baseline_commit=baseline, + tracked_branch_head_commit=tracked, + ) + + +@pytest.mark.asyncio +async def test_returns_empty_summary_when_commits_missing(): + """Missing baseline/tracked refs should produce an empty summary.""" + service = GitDiffSummaryService( + credential_reader=_FakeCredentialReader(), + tenant_id="tenant-001", + ) + ds = _make_data_source(baseline=None, tracked="bbbb") + + result = await service.build_summary(data_source=ds, max_files=50) + + assert result.total_changed_files == 0 + assert result.changed_files == () + + +@pytest.mark.asyncio +async def test_truncates_changed_files_when_max_exceeded(): + """Changed-file list should truncate safely for large diffs.""" + + def handler(request: httpx.Request) -> httpx.Response: + assert "compare" in str(request.url) + return httpx.Response( + status_code=200, + json={ + "files": [ + {"filename": "a.py", "status": "added"}, + {"filename": "b.py", "status": "modified"}, + {"filename": "c.py", "status": "removed"}, + ] + }, + ) + + client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + service = GitDiffSummaryService( + credential_reader=_FakeCredentialReader(), + tenant_id="tenant-001", + http_client=client, + ) + + result = await service.build_summary(data_source=_make_data_source(), max_files=2) + await client.aclose() + + assert result.total_changed_files == 3 + assert result.files_truncated is True + assert len(result.changed_files) == 2 + assert result.added_count == 1 + assert result.modified_count == 1 + assert result.removed_count == 1 + diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index 7859ff453..71adcd6d1 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -18,6 +18,7 @@ from management.application.services.data_source_service import DataSourceService from management.domain.aggregates import DataSource from management.domain.entities import DataSourceSyncRun +from management.infrastructure.git_diff_summary_service import DiffSummaryResult from management.domain.value_objects import ( DataSourceId, Ontology, @@ -43,6 +44,12 @@ def mock_sync_run_repo() -> AsyncMock: return AsyncMock(spec=IDataSourceSyncRunRepository) +@pytest.fixture +def mock_diff_summary_service() -> AsyncMock: + """Mock GitDiffSummaryService for diff-summary route testing.""" + return AsyncMock() + + @pytest.fixture def mock_current_user() -> CurrentUser: """Mock CurrentUser for authentication.""" @@ -94,12 +101,14 @@ def sample_sync_run(sample_data_source: DataSource) -> DataSourceSyncRun: def test_client( mock_ds_service: AsyncMock, mock_sync_run_repo: AsyncMock, + mock_diff_summary_service: AsyncMock, mock_current_user: CurrentUser, ) -> TestClient: """Create TestClient with mocked dependencies.""" from iam.dependencies.user import get_current_user from management.dependencies.data_source import ( get_data_source_service, + get_git_diff_summary_service, get_sync_run_repository, ) from management.presentation import router @@ -108,6 +117,9 @@ def test_client( app.dependency_overrides[get_data_source_service] = lambda: mock_ds_service app.dependency_overrides[get_sync_run_repository] = lambda: mock_sync_run_repo + app.dependency_overrides[get_git_diff_summary_service] = ( + lambda: mock_diff_summary_service + ) app.dependency_overrides[get_current_user] = lambda: mock_current_user app.include_router(router) @@ -728,6 +740,62 @@ def test_list_all_calls_service_with_current_user_id( ) +class TestDataSourceDiffSummaryRoute: + """Tests for GET /management/data-sources/{ds_id}/diff-summary endpoint.""" + + def test_diff_summary_returns_counts_and_changed_files( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_diff_summary_service: AsyncMock, + sample_data_source: DataSource, + ) -> None: + """Diff summary should include aggregate counts + changed file list.""" + mock_ds_service.get.return_value = sample_data_source + mock_diff_summary_service.build_summary.return_value = DiffSummaryResult( + baseline_commit="abc", + tracked_head_commit="def", + total_changed_files=2, + added_count=1, + modified_count=1, + removed_count=0, + renamed_count=0, + files_truncated=False, + changed_files=( + {"path": "src/a.py", "status": "added"}, + {"path": "src/b.py", "status": "modified"}, + ), + ) + + response = test_client.get( + f"/management/data-sources/{sample_data_source.id.value}/diff-summary" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["total_changed_files"] == 2 + assert payload["added_count"] == 1 + assert payload["modified_count"] == 1 + assert payload["files_truncated"] is False + assert payload["changed_files"][0]["path"] == "src/a.py" + + def test_diff_summary_returns_404_when_data_source_inaccessible( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_diff_summary_service: AsyncMock, + ) -> None: + """Diff summary route should return 404 when DS is not found/authorized.""" + mock_ds_service.get.return_value = None + + response = test_client.get( + "/management/data-sources/01JPQRST1234567890ABCDEFDS/diff-summary" + ) + + assert response.status_code == status.HTTP_404_NOT_FOUND + mock_diff_summary_service.build_summary.assert_not_called() + + class TestUpdateDataSourceRoute: """Tests for PATCH /management/data-sources/{ds_id} endpoint. From 2dc62cac30191cd5865115f00e81d31c7d75343a Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 10:31:59 -0400 Subject: [PATCH 017/153] feat(dev-ui): add collapsed diff-summary panel for data sources (#694) Show commit-based diff counts immediately on each data source card and render the changed-file list as collapsed-by-default with explicit expand/collapse controls for large-diff safe browsing. Co-authored-by: Cursor --- src/dev-ui/app/pages/data-sources/index.vue | 112 ++++++++++++++++++++ src/dev-ui/app/tests/data-sources.test.ts | 42 ++++++++ 2 files changed, 154 insertions(+) diff --git a/src/dev-ui/app/pages/data-sources/index.vue b/src/dev-ui/app/pages/data-sources/index.vue index 573c7b6bf..50f702cb3 100644 --- a/src/dev-ui/app/pages/data-sources/index.vue +++ b/src/dev-ui/app/pages/data-sources/index.vue @@ -95,7 +95,28 @@ interface DataSourceItem { knowledge_graph_id: string last_sync_at: string | null created_at: string + clone_head_commit?: string | null + last_extraction_baseline_commit?: string | null + tracked_branch_head_commit?: string | null sync_runs?: SyncRun[] + diff_summary?: DataSourceDiffSummary | null +} + +interface DiffChangedFile { + path: string + status: string +} + +interface DataSourceDiffSummary { + baseline_commit: string | null + tracked_head_commit: string | null + total_changed_files: number + added_count: number + modified_count: number + removed_count: number + renamed_count: number + files_truncated: boolean + changed_files: DiffChangedFile[] } interface AdapterType { @@ -595,6 +616,20 @@ async function approveOntology() { const dataSources = ref([]) const loadingDataSources = ref(false) +const expandedDiffLists = ref>({}) + +function isMaintenanceReady(ds: DataSourceItem): boolean { + if (!ds.last_extraction_baseline_commit || !ds.tracked_branch_head_commit) return false + return ds.last_extraction_baseline_commit !== ds.tracked_branch_head_commit +} + +function isDiffExpanded(dsId: string): boolean { + return expandedDiffLists.value[dsId] === true +} + +function toggleDiffExpanded(dsId: string) { + expandedDiffLists.value[dsId] = !isDiffExpanded(dsId) +} async function loadDataSources() { if (!hasTenant.value) return @@ -623,6 +658,13 @@ async function loadDataSources() { } catch { ds.sync_runs = [] } + try { + ds.diff_summary = await apiFetch( + `/management/data-sources/${ds.id}/diff-summary` + ) + } catch { + ds.diff_summary = null + } all.push(ds) } } catch { @@ -1141,6 +1183,76 @@ async function handleDeleteDs() { + +
+

Commit Status

+
+
+

Local clone commit

+

{{ ds.clone_head_commit ?? '—' }}

+
+
+

Commit during last extraction

+

{{ ds.last_extraction_baseline_commit ?? '—' }}

+
+
+

Tracked branch head commit

+

{{ ds.tracked_branch_head_commit ?? '—' }}

+
+
+ +
+
+
+ {{ ds.diff_summary.total_changed_files }} + changed files + (+{{ ds.diff_summary.added_count }}, + ~{{ ds.diff_summary.modified_count }}, + -{{ ds.diff_summary.removed_count }}, + r{{ ds.diff_summary.renamed_count }}) +
+ + {{ isMaintenanceReady(ds) ? 'New commits available' : 'Up to date' }} + +
+ +
+

+ Changed-file list is collapsed by default for large diffs. +

+ +
+ +
+
+ {{ file.path }} + {{ file.status }} +
+

+ Showing first {{ ds.diff_summary.changed_files.length }} files. Refine or page for full list. +

+
+
+

Sync History

diff --git a/src/dev-ui/app/tests/data-sources.test.ts b/src/dev-ui/app/tests/data-sources.test.ts index 03c1a8d33..224e00ebb 100644 --- a/src/dev-ui/app/tests/data-sources.test.ts +++ b/src/dev-ui/app/tests/data-sources.test.ts @@ -2023,6 +2023,48 @@ describe('Backend API Alignment — Scenario: Resource operations succeed end-to }) }) +describe('Diff summary panel behavior', () => { + it('detects maintenance readiness when tracked head differs from baseline', () => { + function isMaintenanceReady(ds: { + last_extraction_baseline_commit?: string | null + tracked_branch_head_commit?: string | null + }): boolean { + if (!ds.last_extraction_baseline_commit || !ds.tracked_branch_head_commit) return false + return ds.last_extraction_baseline_commit !== ds.tracked_branch_head_commit + } + + expect( + isMaintenanceReady({ + last_extraction_baseline_commit: 'aaa', + tracked_branch_head_commit: 'bbb', + }), + ).toBe(true) + expect( + isMaintenanceReady({ + last_extraction_baseline_commit: 'aaa', + tracked_branch_head_commit: 'aaa', + }), + ).toBe(false) + }) + + it('keeps changed-file list collapsed by default and toggles on demand', () => { + const expanded: Record = {} + + function isDiffExpanded(dsId: string): boolean { + return expanded[dsId] === true + } + function toggleDiffExpanded(dsId: string) { + expanded[dsId] = !isDiffExpanded(dsId) + } + + expect(isDiffExpanded('ds-1')).toBe(false) + toggleDiffExpanded('ds-1') + expect(isDiffExpanded('ds-1')).toBe(true) + toggleDiffExpanded('ds-1') + expect(isDiffExpanded('ds-1')).toBe(false) + }) +}) + // ── task-082: Ontology Editor — save to backend after post-extraction edit ─── // Spec: "GIVEN a knowledge graph with completed extraction // WHEN the user modifies the ontology From 5e7a5fdbebfeb3aceed08156ea303885a3f2d900 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 13:57:18 -0400 Subject: [PATCH 018/153] feat(management): add commit-refresh actions for data sources (#675) (#695) Add explicit data-source actions to refresh tracked/clone commit references and adopt tracked head as the current extraction baseline. This lets the UI surface per-source changed-file counts with user-controlled commit context updates for maintenance decisioning. Co-authored-by: Cursor --- src/api/main.py | 2 +- .../services/data_source_service.py | 80 +++++++++++++ .../management/dependencies/data_source.py | 20 ++++ .../git_commit_reference_service.py | 89 ++++++++++++++ .../presentation/data_sources/routes.py | 99 ++++++++++++++++ .../application/test_data_source_service.py | 101 ++++++++++++++++ .../test_git_commit_reference_service.py | 109 ++++++++++++++++++ .../presentation/test_data_sources_routes.py | 95 +++++++++++++++ src/dev-ui/app/pages/data-sources/index.vue | 60 ++++++++++ src/dev-ui/app/tests/data-sources.test.ts | 53 +++++++++ 10 files changed, 707 insertions(+), 1 deletion(-) create mode 100644 src/api/management/infrastructure/git_commit_reference_service.py create mode 100644 src/api/tests/unit/management/infrastructure/test_git_commit_reference_service.py diff --git a/src/api/main.py b/src/api/main.py index 47629ace1..2061f1808 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -153,7 +153,7 @@ def _parse_github_connection_config( repo = path_parts[1].removesuffix(".git") branch = config.get("branch", "main") if len(path_parts) >= 4 and path_parts[2] == "tree": - branch = path_parts[3] + branch = "/".join(path_parts[3:]) return owner, repo, branch if "owner" in config and "repo" in config: diff --git a/src/api/management/application/services/data_source_service.py b/src/api/management/application/services/data_source_service.py index a64490357..d17b4f7ff 100644 --- a/src/api/management/application/services/data_source_service.py +++ b/src/api/management/application/services/data_source_service.py @@ -455,6 +455,86 @@ async def update_ontology( return ds + async def refresh_commit_references( + self, + user_id: str, + ds_id: str, + tracked_branch_head_commit: str, + clone_head_commit: str | None = None, + ) -> DataSource: + """Persist refreshed source commit references for a data source. + + Requires MANAGE permission on the data source. This action updates + tracked and clone commit references and initializes extraction baseline + on first refresh so per-source diff counts can be computed immediately. + """ + has_manage = await self._check_permission( + user_id=user_id, + resource_type=ResourceType.DATA_SOURCE, + resource_id=ds_id, + permission=Permission.MANAGE, + ) + if not has_manage: + self._probe.permission_denied( + user_id=user_id, + resource_id=ds_id, + permission=Permission.MANAGE, + ) + raise UnauthorizedError( + f"User {user_id} lacks manage permission on data source {ds_id}" + ) + + ds = await self._ds_repo.get_by_id(DataSourceId(value=ds_id)) + if ds is None or ds.tenant_id != self._scope_to_tenant: + raise ValueError(f"Data source {ds_id} not found") + + resolved_clone_head = clone_head_commit or tracked_branch_head_commit + ds.tracked_branch_head_commit = tracked_branch_head_commit + ds.clone_head_commit = resolved_clone_head + if ds.last_extraction_baseline_commit is None: + ds.last_extraction_baseline_commit = tracked_branch_head_commit + + await self._ds_repo.save(ds) + await self._session.commit() + self._probe.data_source_updated(ds_id=ds_id, name=ds.name) + return ds + + async def adopt_tracked_head_as_baseline( + self, + user_id: str, + ds_id: str, + ) -> DataSource: + """Move extraction baseline to the currently tracked branch head.""" + has_manage = await self._check_permission( + user_id=user_id, + resource_type=ResourceType.DATA_SOURCE, + resource_id=ds_id, + permission=Permission.MANAGE, + ) + if not has_manage: + self._probe.permission_denied( + user_id=user_id, + resource_id=ds_id, + permission=Permission.MANAGE, + ) + raise UnauthorizedError( + f"User {user_id} lacks manage permission on data source {ds_id}" + ) + + ds = await self._ds_repo.get_by_id(DataSourceId(value=ds_id)) + if ds is None or ds.tenant_id != self._scope_to_tenant: + raise ValueError(f"Data source {ds_id} not found") + if not ds.tracked_branch_head_commit: + raise ValueError( + "Cannot adopt tracked branch head as baseline before refs are refreshed" + ) + + ds.last_extraction_baseline_commit = ds.tracked_branch_head_commit + await self._ds_repo.save(ds) + await self._session.commit() + self._probe.data_source_updated(ds_id=ds_id, name=ds.name) + return ds + async def delete( self, user_id: str, diff --git a/src/api/management/dependencies/data_source.py b/src/api/management/dependencies/data_source.py index 6133ed673..911703851 100644 --- a/src/api/management/dependencies/data_source.py +++ b/src/api/management/dependencies/data_source.py @@ -17,6 +17,9 @@ from infrastructure.settings import get_management_settings from management.application.observability import DefaultDataSourceServiceProbe from management.application.services.data_source_service import DataSourceService +from management.infrastructure.git_commit_reference_service import ( + GitCommitReferenceService, +) from management.infrastructure.git_diff_summary_service import GitDiffSummaryService from management.infrastructure.repositories import ( DataSourceRepository, @@ -96,3 +99,20 @@ def get_git_diff_summary_service( credential_reader=secret_store, tenant_id=current_user.tenant_id.value, ) + + +def get_git_commit_reference_service( + session: Annotated[AsyncSession, Depends(get_write_session)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> GitCommitReferenceService: + """Get GitCommitReferenceService for tracked-head refresh actions.""" + settings = get_management_settings() + encryption_keys = settings.encryption_key.get_secret_value().split(",") + secret_store = FernetSecretStore( + session=session, + encryption_keys=encryption_keys, + ) + return GitCommitReferenceService( + credential_reader=secret_store, + tenant_id=current_user.tenant_id.value, + ) diff --git a/src/api/management/infrastructure/git_commit_reference_service.py b/src/api/management/infrastructure/git_commit_reference_service.py new file mode 100644 index 000000000..b2ddcab8b --- /dev/null +++ b/src/api/management/infrastructure/git_commit_reference_service.py @@ -0,0 +1,89 @@ +"""Resolve remote commit references for Git-backed data sources.""" + +from __future__ import annotations + +from urllib.parse import urlparse + +import httpx + +from management.domain.aggregates import DataSource +from shared_kernel.credential_reader import ICredentialReader +from shared_kernel.datasource_types import DataSourceAdapterType + + +class GitCommitReferenceService: + """Fetch tracked branch HEAD commit metadata from remote Git providers.""" + + def __init__( + self, + credential_reader: ICredentialReader, + tenant_id: str, + http_client: httpx.AsyncClient | None = None, + ) -> None: + self._credential_reader = credential_reader + self._tenant_id = tenant_id + self._http_client = http_client + + @staticmethod + def _parse_github_connection_config( + config: dict[str, str], + ) -> tuple[str, str, str]: + """Parse GitHub connection settings into owner/repo/branch.""" + if "repo_url" in config: + parsed = urlparse(config["repo_url"]) + path_parts = [part for part in parsed.path.split("/") if part] + if len(path_parts) < 2: + raise ValueError("repo_url must include owner and repo") + owner = path_parts[0] + repo = path_parts[1].removesuffix(".git") + branch = config.get("branch", "main") + if len(path_parts) >= 4 and path_parts[2] == "tree": + branch = "/".join(path_parts[3:]) + return owner, repo, branch + + if "owner" in config and "repo" in config: + return config["owner"], config["repo"], config.get("branch", "main") + + raise ValueError( + "connection_config must include either 'repo_url' or 'owner'+'repo' keys" + ) + + async def resolve_tracked_head_commit(self, data_source: DataSource) -> str | None: + """Resolve tracked branch HEAD commit for GitHub data sources.""" + if data_source.adapter_type != DataSourceAdapterType.GITHUB: + return None + + owner, repo, branch = self._parse_github_connection_config( + data_source.connection_config + ) + + credentials: dict[str, str] = {} + if data_source.credentials_path: + try: + credentials = await self._credential_reader.retrieve( + path=data_source.credentials_path, + tenant_id=self._tenant_id, + ) + except KeyError: + credentials = {} + + headers = { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + token = credentials.get("token") or credentials.get("access_token") + if token: + headers["Authorization"] = f"Bearer {token}" + + url = f"https://api.github.com/repos/{owner}/{repo}/branches/{branch}" + client = self._http_client or httpx.AsyncClient(timeout=20.0) + try: + response = await client.get(url, headers=headers) + response.raise_for_status() + payload = response.json() + finally: + if self._http_client is None: + await client.aclose() + + sha = payload.get("commit", {}).get("sha") + return str(sha) if sha else None diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index e969a5cf2..99c99e419 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -11,9 +11,13 @@ from management.application.services.data_source_service import DataSourceService from management.dependencies.data_source import ( get_data_source_service, + get_git_commit_reference_service, get_git_diff_summary_service, get_sync_run_repository, ) +from management.infrastructure.git_commit_reference_service import ( + GitCommitReferenceService, +) from management.infrastructure.git_diff_summary_service import GitDiffSummaryService from management.ports.exceptions import UnauthorizedError from management.ports.repositories import IDataSourceSyncRunRepository @@ -32,6 +36,101 @@ router = APIRouter(tags=["data-sources"]) +@router.post( + "/data-sources/{ds_id}/commit-refs/refresh", + status_code=status.HTTP_200_OK, + summary="Refresh source commit references for a data source", +) +async def refresh_commit_references( + ds_id: str, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[DataSourceService, Depends(get_data_source_service)], + commit_ref_service: Annotated[ + GitCommitReferenceService, Depends(get_git_commit_reference_service) + ], +) -> DataSourceResponse: + """Refresh tracked/cloned commit references for a Git-backed data source.""" + try: + ds = await service.get( + user_id=current_user.user_id.value, + ds_id=ds_id, + ) + if ds is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Data source not found", + ) + + tracked_head = await commit_ref_service.resolve_tracked_head_commit(ds) + if tracked_head is None: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Unable to resolve tracked branch head commit for this data source", + ) + + updated = await service.refresh_commit_references( + user_id=current_user.user_id.value, + ds_id=ds_id, + tracked_branch_head_commit=tracked_head, + clone_head_commit=tracked_head, + ) + return DataSourceResponse.from_domain(updated) + except UnauthorizedError: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="You do not have permission to perform this action", + ) + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + except HTTPException: + raise + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to refresh commit references", + ) + + +@router.post( + "/data-sources/{ds_id}/commit-refs/adopt-tracked-head", + status_code=status.HTTP_200_OK, + summary="Adopt tracked branch head as extraction baseline", +) +async def adopt_tracked_head_as_baseline( + ds_id: str, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[DataSourceService, Depends(get_data_source_service)], +) -> DataSourceResponse: + """Set extraction baseline commit to the current tracked branch head.""" + try: + updated = await service.adopt_tracked_head_as_baseline( + user_id=current_user.user_id.value, + ds_id=ds_id, + ) + return DataSourceResponse.from_domain(updated) + except UnauthorizedError: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="You do not have permission to perform this action", + ) + except ValueError as e: + detail = str(e) + status_code = ( + status.HTTP_422_UNPROCESSABLE_ENTITY + if "tracked branch head" in detail + else status.HTTP_404_NOT_FOUND + ) + raise HTTPException(status_code=status_code, detail=detail) + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to adopt tracked head as baseline", + ) + + @router.get( "/data-sources/{ds_id}/diff-summary", status_code=status.HTTP_200_OK, diff --git a/src/api/tests/unit/management/application/test_data_source_service.py b/src/api/tests/unit/management/application/test_data_source_service.py index 960f49cab..3ebdc3220 100644 --- a/src/api/tests/unit/management/application/test_data_source_service.py +++ b/src/api/tests/unit/management/application/test_data_source_service.py @@ -1030,6 +1030,107 @@ async def test_trigger_sync_creates_sync_run_and_saves_ds( assert ds_probe.sync_requested_calls[0]["ds_id"] == ds.id.value +class TestDataSourceServiceCommitReferenceActions: + """Tests for commit reference refresh/baseline actions.""" + + @pytest.mark.asyncio + async def test_refresh_commit_references_requires_manage_permission( + self, service, authz, ds_repo, user_id + ) -> None: + """refresh_commit_references() must check MANAGE permission.""" + ds = _make_ds() + ds_repo.seed(ds) + authz.grant_all() + + await service.refresh_commit_references( + user_id=user_id, + ds_id=ds.id.value, + tracked_branch_head_commit="abc123", + clone_head_commit="abc123", + ) + + authz.assert_check_called_once( + resource=f"data_source:{ds.id.value}", + permission=Permission.MANAGE, + subject=f"user:{user_id}", + ) + + @pytest.mark.asyncio + async def test_refresh_commit_references_initializes_baseline_when_empty( + self, service, authz, ds_repo, user_id + ) -> None: + """First commit-refresh should initialize extraction baseline.""" + ds = _make_ds() + ds.last_extraction_baseline_commit = None + ds_repo.seed(ds) + authz.grant_all() + + updated = await service.refresh_commit_references( + user_id=user_id, + ds_id=ds.id.value, + tracked_branch_head_commit="abc123", + clone_head_commit="abc123", + ) + + assert updated.tracked_branch_head_commit == "abc123" + assert updated.clone_head_commit == "abc123" + assert updated.last_extraction_baseline_commit == "abc123" + + @pytest.mark.asyncio + async def test_refresh_commit_references_preserves_existing_baseline( + self, service, authz, ds_repo, user_id + ) -> None: + """Refresh should not overwrite an existing extraction baseline.""" + ds = _make_ds() + ds.last_extraction_baseline_commit = "baseline000" + ds_repo.seed(ds) + authz.grant_all() + + updated = await service.refresh_commit_references( + user_id=user_id, + ds_id=ds.id.value, + tracked_branch_head_commit="tracked999", + clone_head_commit="tracked999", + ) + + assert updated.last_extraction_baseline_commit == "baseline000" + assert updated.tracked_branch_head_commit == "tracked999" + + @pytest.mark.asyncio + async def test_adopt_tracked_head_as_baseline_updates_baseline( + self, service, authz, ds_repo, user_id + ) -> None: + """adopt_tracked_head_as_baseline() should copy tracked head to baseline.""" + ds = _make_ds() + ds.last_extraction_baseline_commit = "old-base" + ds.tracked_branch_head_commit = "new-head" + ds_repo.seed(ds) + authz.grant_all() + + updated = await service.adopt_tracked_head_as_baseline( + user_id=user_id, + ds_id=ds.id.value, + ) + + assert updated.last_extraction_baseline_commit == "new-head" + + @pytest.mark.asyncio + async def test_adopt_tracked_head_as_baseline_requires_tracked_head( + self, service, authz, ds_repo, user_id + ) -> None: + """adopt_tracked_head_as_baseline() should reject when tracked head missing.""" + ds = _make_ds() + ds.tracked_branch_head_commit = None + ds_repo.seed(ds) + authz.grant_all() + + with pytest.raises(ValueError, match="tracked branch head"): + await service.adopt_tracked_head_as_baseline( + user_id=user_id, + ds_id=ds.id.value, + ) + + class TestDataSourceServiceListAllForUser: """Unit tests for DataSourceService.list_all_for_user.""" diff --git a/src/api/tests/unit/management/infrastructure/test_git_commit_reference_service.py b/src/api/tests/unit/management/infrastructure/test_git_commit_reference_service.py new file mode 100644 index 000000000..91a0cd85e --- /dev/null +++ b/src/api/tests/unit/management/infrastructure/test_git_commit_reference_service.py @@ -0,0 +1,109 @@ +"""Unit tests for GitCommitReferenceService.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +import httpx +import pytest + +from management.domain.aggregates import DataSource +from management.domain.value_objects import DataSourceId, Schedule, ScheduleType +from management.infrastructure.git_commit_reference_service import ( + GitCommitReferenceService, +) +from shared_kernel.datasource_types import DataSourceAdapterType + + +class _FakeCredentialReader: + def __init__(self, credentials: dict[str, str] | None = None) -> None: + self._credentials = credentials or {} + + async def retrieve(self, path: str, tenant_id: str) -> dict[str, str]: + return dict(self._credentials) + + +def _make_data_source( + *, + adapter_type: DataSourceAdapterType = DataSourceAdapterType.GITHUB, + connection_config: dict[str, str] | None = None, + credentials_path: str | None = None, +) -> DataSource: + now = datetime.now(UTC) + return DataSource( + id=DataSourceId(value="01JTESTCOMMITREFSERVICE0000"), + knowledge_graph_id="01JTESTCOMMITREFKG0000000", + tenant_id="tenant-001", + name="GitHub DS", + adapter_type=adapter_type, + connection_config=connection_config + or {"owner": "org", "repo": "repo", "branch": "main"}, + credentials_path=credentials_path, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + created_at=now, + updated_at=now, + ) + + +def test_parse_github_config_rejects_invalid_repo_url() -> None: + """Malformed GitHub repo URL should raise a clear error.""" + with pytest.raises(ValueError, match="owner and repo"): + GitCommitReferenceService._parse_github_connection_config( + {"repo_url": "https://github.com/owner-only"} + ) + + +@pytest.mark.asyncio +async def test_resolve_tracked_head_uses_branches_endpoint_with_token() -> None: + """Service should call GitHub branches API with PAT when available.""" + + def handler(request: httpx.Request) -> httpx.Response: + assert str(request.url) == "https://api.github.com/repos/org/repo/branches/main" + assert request.headers.get("Authorization") == "Bearer secret-token" + return httpx.Response( + status_code=200, + json={"commit": {"sha": "abc123"}}, + ) + + client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + service = GitCommitReferenceService( + credential_reader=_FakeCredentialReader({"access_token": "secret-token"}), + tenant_id="tenant-001", + http_client=client, + ) + ds = _make_data_source(credentials_path="datasource/ds-1/credentials") + + tracked = await service.resolve_tracked_head_commit(ds) + await client.aclose() + + assert tracked == "abc123" + + +@pytest.mark.asyncio +async def test_resolve_tracked_head_parses_repo_url_branch() -> None: + """repo_url tree syntax should map to owner/repo/branch correctly.""" + + def handler(request: httpx.Request) -> httpx.Response: + assert ( + str(request.url) + == "https://api.github.com/repos/openshift-hyperfleet/kartograph/branches/feature/test" + ) + return httpx.Response(status_code=200, json={"commit": {"sha": "head987"}}) + + client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + service = GitCommitReferenceService( + credential_reader=_FakeCredentialReader(), + tenant_id="tenant-001", + http_client=client, + ) + ds = _make_data_source( + connection_config={ + "repo_url": "https://github.com/openshift-hyperfleet/kartograph/tree/feature/test" + } + ) + + tracked = await service.resolve_tracked_head_commit(ds) + await client.aclose() + + assert tracked == "head987" diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index 71adcd6d1..2e64d01bf 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -50,6 +50,12 @@ def mock_diff_summary_service() -> AsyncMock: return AsyncMock() +@pytest.fixture +def mock_commit_reference_service() -> AsyncMock: + """Mock GitCommitReferenceService for commit-ref route testing.""" + return AsyncMock() + + @pytest.fixture def mock_current_user() -> CurrentUser: """Mock CurrentUser for authentication.""" @@ -102,12 +108,14 @@ def test_client( mock_ds_service: AsyncMock, mock_sync_run_repo: AsyncMock, mock_diff_summary_service: AsyncMock, + mock_commit_reference_service: AsyncMock, mock_current_user: CurrentUser, ) -> TestClient: """Create TestClient with mocked dependencies.""" from iam.dependencies.user import get_current_user from management.dependencies.data_source import ( get_data_source_service, + get_git_commit_reference_service, get_git_diff_summary_service, get_sync_run_repository, ) @@ -120,6 +128,9 @@ def test_client( app.dependency_overrides[get_git_diff_summary_service] = ( lambda: mock_diff_summary_service ) + app.dependency_overrides[get_git_commit_reference_service] = ( + lambda: mock_commit_reference_service + ) app.dependency_overrides[get_current_user] = lambda: mock_current_user app.include_router(router) @@ -796,6 +807,90 @@ def test_diff_summary_returns_404_when_data_source_inaccessible( mock_diff_summary_service.build_summary.assert_not_called() +class TestDataSourceCommitReferenceRoutes: + """Tests for commit-reference refresh/baseline endpoints.""" + + def test_refresh_commit_references_returns_updated_data_source( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_commit_reference_service: AsyncMock, + sample_data_source: DataSource, + ) -> None: + """Refresh endpoint should return updated commit references.""" + refreshed = sample_data_source + refreshed.clone_head_commit = "aaa" + refreshed.tracked_branch_head_commit = "aaa" + refreshed.last_extraction_baseline_commit = "aaa" + mock_ds_service.get.return_value = sample_data_source + mock_commit_reference_service.resolve_tracked_head_commit.return_value = "aaa" + mock_ds_service.refresh_commit_references.return_value = refreshed + + response = test_client.post( + f"/management/data-sources/{sample_data_source.id.value}/commit-refs/refresh" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["clone_head_commit"] == "aaa" + assert payload["tracked_branch_head_commit"] == "aaa" + assert payload["last_extraction_baseline_commit"] == "aaa" + + def test_refresh_commit_references_returns_404_when_inaccessible( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_commit_reference_service: AsyncMock, + ) -> None: + """Refresh endpoint should return 404 if DS not found/authorized.""" + mock_ds_service.get.return_value = None + + response = test_client.post( + "/management/data-sources/01JPQRST1234567890ABCDEFDS/commit-refs/refresh" + ) + + assert response.status_code == status.HTTP_404_NOT_FOUND + mock_ds_service.refresh_commit_references.assert_not_called() + mock_commit_reference_service.resolve_tracked_head_commit.assert_not_called() + + def test_adopt_tracked_head_as_baseline_returns_updated_data_source( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + sample_data_source: DataSource, + ) -> None: + """Adopt endpoint should return DS with baseline moved to tracked head.""" + updated = sample_data_source + updated.last_extraction_baseline_commit = "tracked-head" + updated.tracked_branch_head_commit = "tracked-head" + mock_ds_service.adopt_tracked_head_as_baseline.return_value = updated + + response = test_client.post( + f"/management/data-sources/{sample_data_source.id.value}/commit-refs/adopt-tracked-head" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["last_extraction_baseline_commit"] == "tracked-head" + + def test_adopt_tracked_head_as_baseline_returns_404_for_missing_source( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + sample_data_source: DataSource, + ) -> None: + """Adopt endpoint should return 404 if service reports missing DS.""" + mock_ds_service.adopt_tracked_head_as_baseline.side_effect = ValueError( + "Data source not found" + ) + + response = test_client.post( + f"/management/data-sources/{sample_data_source.id.value}/commit-refs/adopt-tracked-head" + ) + + assert response.status_code == status.HTTP_404_NOT_FOUND + + class TestUpdateDataSourceRoute: """Tests for PATCH /management/data-sources/{ds_id} endpoint. diff --git a/src/dev-ui/app/pages/data-sources/index.vue b/src/dev-ui/app/pages/data-sources/index.vue index 50f702cb3..f112f9ba9 100644 --- a/src/dev-ui/app/pages/data-sources/index.vue +++ b/src/dev-ui/app/pages/data-sources/index.vue @@ -22,6 +22,7 @@ import { ScrollText, FileText, Settings, + RefreshCw, } from 'lucide-vue-next' import { ADAPTERS, @@ -617,6 +618,8 @@ async function approveOntology() { const dataSources = ref([]) const loadingDataSources = ref(false) const expandedDiffLists = ref>({}) +const refreshingCommitRefs = ref>({}) +const adoptingBaselines = ref>({}) function isMaintenanceReady(ds: DataSourceItem): boolean { if (!ds.last_extraction_baseline_commit || !ds.tracked_branch_head_commit) return false @@ -631,6 +634,39 @@ function toggleDiffExpanded(dsId: string) { expandedDiffLists.value[dsId] = !isDiffExpanded(dsId) } +async function refreshCommitRefs(dsId: string) { + refreshingCommitRefs.value[dsId] = true + try { + const { apiFetch } = useApiClient() + await apiFetch(`/management/data-sources/${dsId}/commit-refs/refresh`, { + method: 'POST', + }) + toast.success('Commit references refreshed') + await loadDataSources() + } catch { + toast.error('Failed to refresh commit references') + } finally { + refreshingCommitRefs.value[dsId] = false + } +} + +async function adoptTrackedHeadBaseline(dsId: string) { + adoptingBaselines.value[dsId] = true + try { + const { apiFetch } = useApiClient() + await apiFetch(`/management/data-sources/${dsId}/commit-refs/adopt-tracked-head`, { + method: 'POST', + }) + toast.success('Baseline updated to tracked head') + await loadDataSources() + } catch (err) { + const msg = err instanceof Error ? err.message : 'Failed to update baseline' + toast.error('Failed to update baseline', { description: msg }) + } finally { + adoptingBaselines.value[dsId] = false + } +} + async function loadDataSources() { if (!hasTenant.value) return loadingDataSources.value = true @@ -1200,6 +1236,30 @@ async function handleDeleteDs() {

{{ ds.tracked_branch_head_commit ?? '—' }}

+
+ + +
{ + it('refreshCommitRefs calls refresh endpoint and reloads data sources on success', async () => { + const apiFetch = vi.fn().mockResolvedValue({}) + const loadDataSources = vi.fn().mockResolvedValue(undefined) + const refreshingCommitRefs: Record = {} + + async function refreshCommitRefs(dsId: string) { + refreshingCommitRefs[dsId] = true + try { + await apiFetch(`/management/data-sources/${dsId}/commit-refs/refresh`, { + method: 'POST', + }) + await loadDataSources() + } finally { + refreshingCommitRefs[dsId] = false + } + } + + await refreshCommitRefs('ds-1') + expect(apiFetch).toHaveBeenCalledWith('/management/data-sources/ds-1/commit-refs/refresh', { + method: 'POST', + }) + expect(loadDataSources).toHaveBeenCalledOnce() + expect(refreshingCommitRefs['ds-1']).toBe(false) + }) + + it('adoptTrackedHeadBaseline calls adopt endpoint and reloads data on success', async () => { + const apiFetch = vi.fn().mockResolvedValue({}) + const loadDataSources = vi.fn().mockResolvedValue(undefined) + const adoptingBaselines: Record = {} + + async function adoptTrackedHeadBaseline(dsId: string) { + adoptingBaselines[dsId] = true + try { + await apiFetch(`/management/data-sources/${dsId}/commit-refs/adopt-tracked-head`, { + method: 'POST', + }) + await loadDataSources() + } finally { + adoptingBaselines[dsId] = false + } + } + + await adoptTrackedHeadBaseline('ds-2') + expect(apiFetch).toHaveBeenCalledWith( + '/management/data-sources/ds-2/commit-refs/adopt-tracked-head', + { method: 'POST' }, + ) + expect(loadDataSources).toHaveBeenCalledOnce() + expect(adoptingBaselines['ds-2']).toBe(false) + }) +}) + describe('Diff summary panel behavior', () => { it('detects maintenance readiness when tracked head differs from baseline', () => { function isMaintenanceReady(ds: { From 6b11bbc98b11b36705ed540b89d81b77a2272357 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 14:01:06 -0400 Subject: [PATCH 019/153] chore(skills): add parallel blocker-question protocol (#696) Strengthen subagent delivery guidance with a parallel execution model, required context packs, and a blocker-question escalation flow so multiple agents can pause and ask focused questions without serializing delivery. Co-authored-by: Cursor --- skills/subagent-delivery/SKILL.md | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/skills/subagent-delivery/SKILL.md b/skills/subagent-delivery/SKILL.md index 0273dba82..3b9156390 100644 --- a/skills/subagent-delivery/SKILL.md +++ b/skills/subagent-delivery/SKILL.md @@ -4,12 +4,27 @@ description: > Executes a GitHub issue end-to-end with consistent branch, test, PR, and merge behavior. Use when implementing units of work with sub-agents, preparing pull requests, resolving merge conflicts, or when the user asks to run issue-by-issue delivery into feature/manage-knowledge-graph. + Supports parallel delivery waves with explicit blocker-question escalation. --- # Subagent Delivery Protocol Follow this protocol for every assigned issue. +## Parallel Execution Model + +Use this model whenever multiple issues are independent: + +1. One subagent per issue branch. +2. Shared target branch: `feature/manage-knowledge-graph`. +3. No shared working branch between agents. +4. Each subagent works to PR-ready state independently. +5. Merge in dependency order (foundational backend before UI polish when coupled). + +If two issues touch the same files heavily, either: +- serialize those two issues, or +- split scope so each agent owns non-overlapping symbols. + ## Scope and Inputs Before coding, gather: @@ -17,9 +32,36 @@ Before coding, gather: 1. Issue number and acceptance criteria. 2. Target branch: `feature/manage-knowledge-graph`. 3. Current repository state (`git status`, `git branch -vv`). +4. Context pack (required): + - relevant specs under `specs/` + - bounded context ownership (management/ingestion/extraction/graph/querying/ui) + - existing tests near touched code + - architectural constraints from `AGENTS.md` If acceptance criteria are ambiguous, ask one focused question before implementation. +## Blocker Question Protocol (Required) + +Subagents must be able to stop and ask questions immediately. + +Trigger a blocker question when any of these is true: + +1. More than one valid interpretation of acceptance criteria. +2. Missing security/tenancy/authorization decision. +3. Required external behavior is unspecified. +4. You would otherwise make an irreversible guess. + +When blocked: + +1. Stop implementation at the decision boundary. +2. Ask one concise question in the active agent chat immediately. +3. Include: + - what is ambiguous + - 2-3 concrete options + - recommended option and why +4. If working from a GitHub issue, mirror the same question as an issue comment so the orchestrator can batch unresolved questions across agents. +5. Continue only non-blocked work; do not guess on blocked decisions. + ## Git Workflow 1. Ensure local target branch is up to date: @@ -39,6 +81,7 @@ If acceptance criteria are ambiguous, ask one focused question before implementa 4. Run focused tests first, then broader suite for touched context. 5. Run lints/type checks for changed files when applicable. 6. If behavior depends on configuration, use settings/DI instead of hardcoding. +7. If new ambiguity appears mid-implementation, invoke the Blocker Question Protocol. ## PR Workflow @@ -61,6 +104,7 @@ If acceptance criteria are ambiguous, ask one focused question before implementa ``` 4. Link the issue in PR body using `Closes #` when appropriate. +5. If any assumptions were made, include an explicit assumptions list in PR body. ## Merge and Conflict Handling @@ -73,6 +117,15 @@ If acceptance criteria are ambiguous, ask one focused question before implementa 4. Re-run tests after conflict resolution. 5. Merge into `feature/manage-knowledge-graph` only after verification. +## Orchestrator Handoff Contract + +Each subagent must hand back: + +1. Branch name and PR URL. +2. Test commands run with pass/fail status. +3. Any unresolved questions (if still blocked). +4. Any assumptions that were taken and why they are safe. + ## Non-Negotiables - Do not use destructive git commands. From 9d1e9eb9f37be48107fb0ef422998d5b009e3f2b Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 14:07:24 -0400 Subject: [PATCH 020/153] feat(extraction): ship mode-specific prompt bundles and skill packs (#678) (#697) Add structured mode-specific agent configuration (system prompt, hierarchy, guardrails, and skill pack defaults) and wire session initialization to resolve and persist the configuration per knowledge graph scope. Co-authored-by: Cursor --- .../application/agent_session_service.py | 21 ++++++- .../application/skill_resolution_service.py | 63 ++++++++++++++++++- src/api/extraction/dependencies.py | 16 ++++- src/api/extraction/infrastructure/__init__.py | 11 +++- .../infrastructure/repositories/__init__.py | 5 +- .../repositories/skill_override_repository.py | 22 +++++++ .../application/test_agent_session_service.py | 53 ++++++++++++++++ .../test_skill_resolution_service.py | 22 ++++--- 8 files changed, 197 insertions(+), 16 deletions(-) create mode 100644 src/api/extraction/infrastructure/repositories/skill_override_repository.py diff --git a/src/api/extraction/application/agent_session_service.py b/src/api/extraction/application/agent_session_service.py index 646489e7c..61380a1c0 100644 --- a/src/api/extraction/application/agent_session_service.py +++ b/src/api/extraction/application/agent_session_service.py @@ -4,6 +4,9 @@ from ulid import ULID +from extraction.application.skill_resolution_service import ( + ExtractionSkillResolutionService, +) from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.value_objects import ExtractionSessionMode from extraction.ports.repositories import IExtractionAgentSessionRepository @@ -12,8 +15,13 @@ class ExtractionAgentSessionService: """Orchestrates session create/get/list/archive behaviors by scope.""" - def __init__(self, repository: IExtractionAgentSessionRepository) -> None: + def __init__( + self, + repository: IExtractionAgentSessionRepository, + skill_resolution_service: ExtractionSkillResolutionService | None = None, + ) -> None: self._repository = repository + self._skill_resolution_service = skill_resolution_service async def get_or_create_active_session( self, @@ -35,6 +43,17 @@ async def get_or_create_active_session( knowledge_graph_id=knowledge_graph_id, mode=mode, ) + if self._skill_resolution_service is not None: + resolved = await self._skill_resolution_service.resolve_for_session( + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + session.runtime_context["agent_configuration"] = { + "system_prompt": resolved.system_prompt, + "prompt_hierarchy": list(resolved.prompt_hierarchy), + "guardrails": list(resolved.guardrails), + "skills": dict(resolved.skills), + } await self._repository.save(session) return session diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index a2d1b7f83..0cf5f137f 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -2,12 +2,65 @@ from __future__ import annotations +from dataclasses import dataclass + from extraction.domain.value_objects import ExtractionSessionMode from extraction.ports.repositories import IExtractionSkillOverrideRepository +@dataclass(frozen=True) +class ResolvedExtractionSkillPack: + """Resolved mode-aware prompt bundle for agent runtime.""" + + system_prompt: str + prompt_hierarchy: tuple[str, ...] + guardrails: tuple[str, ...] + skills: dict[str, str] + + +_GLOBAL_PROMPT_SETTINGS: dict[ExtractionSessionMode, dict[str, object]] = { + ExtractionSessionMode.SCHEMA_BOOTSTRAP: { + "system_prompt": ( + "You are the schema bootstrap guide. Start by understanding the user's " + "capabilities, goals, and domain intent before proposing a graph model." + ), + "prompt_hierarchy": ( + "platform_security_constraints", + "tenant_and_knowledge_graph_scope", + "schema_bootstrap_goals_and_capabilities_intake", + "mode_specific_skill_pack", + ), + "guardrails": ( + "Prefer mutation-log compatible schema guidance over ad-hoc writes.", + "Never fabricate repository content or credentials.", + "Keep recommendations scoped to the active knowledge graph.", + ), + }, + ExtractionSessionMode.EXTRACTION_OPERATIONS: { + "system_prompt": ( + "You are the extraction operations guide. Optimize for safe incremental " + "job setup, scoped maintenance, and auditable mutation outcomes." + ), + "prompt_hierarchy": ( + "platform_security_constraints", + "tenant_and_knowledge_graph_scope", + "extraction_operations_objective", + "mode_specific_skill_pack", + ), + "guardrails": ( + "All write paths must remain mutation-log auditable.", + "Treat schema edits as secondary unless explicitly requested.", + "Avoid broad destructive changes without explicit confirmation.", + ), + }, +} + _GLOBAL_SKILL_TEMPLATES: dict[ExtractionSessionMode, dict[str, str]] = { ExtractionSessionMode.SCHEMA_BOOTSTRAP: { + "capabilities_intake": ( + "Begin by asking for user capabilities/goals and confirm whether they " + "want a first-pass schema attempt or guided co-design." + ), "schema_modeling": ( "Guide the user to define complete entity and relationship types " "with clear labels, constraints, and required properties." @@ -44,7 +97,8 @@ async def resolve_for_session( self, knowledge_graph_id: str, mode: ExtractionSessionMode, - ) -> dict[str, str]: + ) -> ResolvedExtractionSkillPack: + prompt_settings = _GLOBAL_PROMPT_SETTINGS[mode] base_templates = dict(_GLOBAL_SKILL_TEMPLATES[mode]) overrides = await self._override_repository.get_overrides_for_knowledge_graph( knowledge_graph_id=knowledge_graph_id, @@ -62,5 +116,10 @@ async def resolve_for_session( if key not in resolved: resolved[key] = overrides[key] - return resolved + return ResolvedExtractionSkillPack( + system_prompt=str(prompt_settings["system_prompt"]), + prompt_hierarchy=tuple(prompt_settings["prompt_hierarchy"]), + guardrails=tuple(prompt_settings["guardrails"]), + skills=resolved, + ) diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index 6ded27903..e720e81d8 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -5,8 +5,14 @@ from fastapi import Depends from sqlalchemy.ext.asyncio import AsyncSession -from extraction.application import ExtractionAgentSessionService -from extraction.infrastructure.repositories import ExtractionAgentSessionRepository +from extraction.application import ( + ExtractionAgentSessionService, + ExtractionSkillResolutionService, +) +from extraction.infrastructure.repositories import ( + ExtractionAgentSessionRepository, + ExtractionSkillOverrideRepository, +) from infrastructure.database.dependencies import get_write_session @@ -14,7 +20,11 @@ def get_extraction_agent_session_service( session: Annotated[AsyncSession, Depends(get_write_session)], ) -> ExtractionAgentSessionService: """Get ExtractionAgentSessionService instance.""" + skill_resolution_service = ExtractionSkillResolutionService( + override_repository=ExtractionSkillOverrideRepository() + ) return ExtractionAgentSessionService( - repository=ExtractionAgentSessionRepository(session=session) + repository=ExtractionAgentSessionRepository(session=session), + skill_resolution_service=skill_resolution_service, ) diff --git a/src/api/extraction/infrastructure/__init__.py b/src/api/extraction/infrastructure/__init__.py index 5aaa4c73a..3ffd68fe2 100644 --- a/src/api/extraction/infrastructure/__init__.py +++ b/src/api/extraction/infrastructure/__init__.py @@ -1,7 +1,14 @@ """Extraction infrastructure adapters and event handlers.""" from extraction.infrastructure.event_handler import ExtractionEventHandler -from extraction.infrastructure.repositories import ExtractionAgentSessionRepository +from extraction.infrastructure.repositories import ( + ExtractionAgentSessionRepository, + ExtractionSkillOverrideRepository, +) -__all__ = ["ExtractionEventHandler", "ExtractionAgentSessionRepository"] +__all__ = [ + "ExtractionEventHandler", + "ExtractionAgentSessionRepository", + "ExtractionSkillOverrideRepository", +] diff --git a/src/api/extraction/infrastructure/repositories/__init__.py b/src/api/extraction/infrastructure/repositories/__init__.py index e39627e27..00204177a 100644 --- a/src/api/extraction/infrastructure/repositories/__init__.py +++ b/src/api/extraction/infrastructure/repositories/__init__.py @@ -3,6 +3,9 @@ from extraction.infrastructure.repositories.agent_session_repository import ( ExtractionAgentSessionRepository, ) +from extraction.infrastructure.repositories.skill_override_repository import ( + ExtractionSkillOverrideRepository, +) -__all__ = ["ExtractionAgentSessionRepository"] +__all__ = ["ExtractionAgentSessionRepository", "ExtractionSkillOverrideRepository"] diff --git a/src/api/extraction/infrastructure/repositories/skill_override_repository.py b/src/api/extraction/infrastructure/repositories/skill_override_repository.py new file mode 100644 index 000000000..d274b51f6 --- /dev/null +++ b/src/api/extraction/infrastructure/repositories/skill_override_repository.py @@ -0,0 +1,22 @@ +"""Infrastructure repository for extraction skill overrides.""" + +from __future__ import annotations + +from extraction.domain.value_objects import ExtractionSessionMode +from extraction.ports.repositories import IExtractionSkillOverrideRepository + + +class ExtractionSkillOverrideRepository(IExtractionSkillOverrideRepository): + """Return KG-specific skill overrides. + + Current tracer-bullet implementation returns no overrides. This still allows + the resolution service to compose deterministic mode defaults and provides a + stable extension point for persisted KG overrides. + """ + + async def get_overrides_for_knowledge_graph( + self, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> dict[str, str]: + return {} diff --git a/src/api/tests/unit/extraction/application/test_agent_session_service.py b/src/api/tests/unit/extraction/application/test_agent_session_service.py index aade8550f..5d62f9ba5 100644 --- a/src/api/tests/unit/extraction/application/test_agent_session_service.py +++ b/src/api/tests/unit/extraction/application/test_agent_session_service.py @@ -55,6 +55,39 @@ async def list_by_scope( return sorted(sessions, key=lambda s: s.updated_at, reverse=True) +class _StaticSkillResolutionService: + def __init__(self) -> None: + self.calls: list[tuple[str, ExtractionSessionMode]] = [] + + async def resolve_for_session( + self, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ): + self.calls.append((knowledge_graph_id, mode)) + if mode == ExtractionSessionMode.SCHEMA_BOOTSTRAP: + return type( + "_Resolved", + (), + { + "system_prompt": "Bootstrap system prompt", + "prompt_hierarchy": ("platform", "mode"), + "guardrails": ("never leak credentials",), + "skills": {"schema_modeling": "bootstrap schema guidance"}, + }, + )() + return type( + "_Resolved", + (), + { + "system_prompt": "Operations system prompt", + "prompt_hierarchy": ("platform", "operations"), + "guardrails": ("mutation logs only",), + "skills": {"job_setup": "operations setup guidance"}, + }, + )() + + @pytest.mark.asyncio class TestExtractionAgentSessionService: async def test_reuses_active_session_for_same_scope(self): @@ -159,3 +192,23 @@ async def test_list_sessions_includes_archived_history(self): assert len(sessions) == 2 assert any(session.id == first.id and session.archived_at is not None for session in sessions) + async def test_new_session_initializes_runtime_context_from_skill_resolution(self): + repo = _InMemoryAgentSessionRepository() + skill_resolution = _StaticSkillResolutionService() + service = ExtractionAgentSessionService( + repository=repo, + skill_resolution_service=skill_resolution, + ) + + session = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + + assert "agent_configuration" in session.runtime_context + config = session.runtime_context["agent_configuration"] + assert config["system_prompt"] == "Bootstrap system prompt" + assert config["skills"]["schema_modeling"] == "bootstrap schema guidance" + assert skill_resolution.calls == [("kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP)] + diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 90bb67082..fa5167b54 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -34,8 +34,12 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, ) - assert "schema_modeling" in resolved - assert "prepopulation_validation" in resolved + assert "schema_modeling" in resolved.skills + assert "prepopulation_validation" in resolved.skills + assert "capabilities_intake" in resolved.skills + assert "goal" in resolved.system_prompt.lower() + assert len(resolved.prompt_hierarchy) > 0 + assert len(resolved.guardrails) > 0 async def test_extraction_mode_uses_extraction_defaults(self): service = ExtractionSkillResolutionService( @@ -47,8 +51,12 @@ async def test_extraction_mode_uses_extraction_defaults(self): mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, ) - assert "job_setup" in resolved - assert "minor_edits" in resolved + assert "job_setup" in resolved.skills + assert "minor_edits" in resolved.skills + assert "schema_edits_secondary" in resolved.skills + assert "extraction" in resolved.system_prompt.lower() + assert len(resolved.prompt_hierarchy) > 0 + assert len(resolved.guardrails) > 0 async def test_kg_overrides_replace_matching_template_and_append_new(self): repo = _InMemorySkillOverrideRepository( @@ -69,8 +77,8 @@ async def test_kg_overrides_replace_matching_template_and_append_new(self): mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, ) - assert resolved["job_setup"] == "KG-specific job setup instructions" - assert resolved["custom_review"] == "Custom review flow" + assert resolved.skills["job_setup"] == "KG-specific job setup instructions" + assert resolved.skills["custom_review"] == "Custom review flow" async def test_override_merge_is_deterministic(self): repo = _InMemorySkillOverrideRepository( @@ -92,5 +100,5 @@ async def test_override_merge_is_deterministic(self): ) # Additional override keys are merged in sorted order for determinism. - assert list(resolved.keys())[-2:] == ["a_first", "z_last"] + assert list(resolved.skills.keys())[-2:] == ["a_first", "z_last"] From 3f1da3204c8577c4d405de8ef71d90f90c316196 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 14:11:22 -0400 Subject: [PATCH 021/153] feat(extraction): add bootstrap capabilities intake dual-path flow (#677) (#698) Seed schema bootstrap sessions with a capabilities-intake prompt that offers first-pass or guided co-design paths, and persist the selected path/capability summary in session runtime context so the conversation remains continuous across requests. Co-authored-by: Cursor --- .../application/agent_session_service.py | 49 ++++++++++++++++++- src/api/extraction/domain/value_objects.py | 7 +++ src/api/extraction/presentation/models.py | 12 ++++- src/api/extraction/presentation/routes.py | 28 +++++++++++ .../application/test_agent_session_service.py | 41 +++++++++++++++- .../extraction/presentation/test_routes.py | 21 ++++++++ 6 files changed, 155 insertions(+), 3 deletions(-) diff --git a/src/api/extraction/application/agent_session_service.py b/src/api/extraction/application/agent_session_service.py index 61380a1c0..b4a4c6a03 100644 --- a/src/api/extraction/application/agent_session_service.py +++ b/src/api/extraction/application/agent_session_service.py @@ -2,13 +2,15 @@ from __future__ import annotations +from datetime import UTC, datetime + from ulid import ULID from extraction.application.skill_resolution_service import ( ExtractionSkillResolutionService, ) from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import BootstrapIntakePath, ExtractionSessionMode from extraction.ports.repositories import IExtractionAgentSessionRepository @@ -23,6 +25,15 @@ def __init__( self._repository = repository self._skill_resolution_service = skill_resolution_service + @staticmethod + def _build_bootstrap_intake_prompt() -> str: + return ( + "Before we draft schema types, share your capabilities and goals for this " + "knowledge graph. Then choose one path: " + "(1) immediate first-pass schema attempt, or " + "(2) guided question-by-question co-design." + ) + async def get_or_create_active_session( self, user_id: str, @@ -54,6 +65,19 @@ async def get_or_create_active_session( "guardrails": list(resolved.guardrails), "skills": dict(resolved.skills), } + if mode == ExtractionSessionMode.SCHEMA_BOOTSTRAP: + session.message_history.append( + {"role": "assistant", "content": self._build_bootstrap_intake_prompt()} + ) + session.runtime_context["bootstrap_intake"] = { + "status": "awaiting_path_selection", + "selected_path": None, + "capabilities_goals": None, + "path_options": [ + BootstrapIntakePath.FIRST_PASS_SCHEMA_ATTEMPT.value, + BootstrapIntakePath.GUIDED_CO_DESIGN.value, + ], + } await self._repository.save(session) return session @@ -99,3 +123,26 @@ async def archive_session(self, session_id: str) -> ExtractionAgentSession | Non await self._repository.save(session) return session + async def set_bootstrap_intake_path_for_active_session( + self, + user_id: str, + knowledge_graph_id: str, + selected_path: BootstrapIntakePath, + capabilities_goals: str | None, + ) -> ExtractionAgentSession: + """Persist bootstrap path selection for session continuity.""" + session = await self.get_or_create_active_session( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + intake = dict(session.runtime_context.get("bootstrap_intake", {})) + intake["status"] = "path_selected" + intake["selected_path"] = selected_path.value + intake["capabilities_goals"] = capabilities_goals + intake["selected_at"] = datetime.now(UTC).isoformat() + session.runtime_context["bootstrap_intake"] = intake + session.updated_at = datetime.now(UTC) + await self._repository.save(session) + return session + diff --git a/src/api/extraction/domain/value_objects.py b/src/api/extraction/domain/value_objects.py index c12cdfd2b..22ec0470c 100644 --- a/src/api/extraction/domain/value_objects.py +++ b/src/api/extraction/domain/value_objects.py @@ -9,3 +9,10 @@ class ExtractionSessionMode(StrEnum): SCHEMA_BOOTSTRAP = "schema_bootstrap" EXTRACTION_OPERATIONS = "extraction_operations" + +class BootstrapIntakePath(StrEnum): + """User-selected bootstrap onboarding path.""" + + FIRST_PASS_SCHEMA_ATTEMPT = "first_pass_schema_attempt" + GUIDED_CO_DESIGN = "guided_co_design" + diff --git a/src/api/extraction/presentation/models.py b/src/api/extraction/presentation/models.py index 995bf0136..781791ad7 100644 --- a/src/api/extraction/presentation/models.py +++ b/src/api/extraction/presentation/models.py @@ -8,7 +8,7 @@ from pydantic import BaseModel, Field from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import BootstrapIntakePath, ExtractionSessionMode class ExtractionSessionResponse(BaseModel): @@ -45,3 +45,13 @@ class ExtractionSessionListResponse(BaseModel): sessions: list[ExtractionSessionResponse] count: int + +class BootstrapIntakePathSelectionRequest(BaseModel): + """Request model for bootstrap intake path selection.""" + + selected_path: BootstrapIntakePath + capabilities_goals: str | None = Field( + default=None, + description="Optional user summary of capabilities and schema goals", + ) + diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index af6bd2e99..1301cd224 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -10,6 +10,7 @@ from extraction.dependencies import get_extraction_agent_session_service from extraction.domain.value_objects import ExtractionSessionMode from extraction.presentation.models import ( + BootstrapIntakePathSelectionRequest, ExtractionSessionListResponse, ExtractionSessionResponse, ) @@ -121,3 +122,30 @@ async def clear_chat( ) return ExtractionSessionResponse.from_domain(session) + +@router.post( + "/knowledge-graphs/{knowledge_graph_id}/sessions/schema_bootstrap/active/intake-path", + response_model=ExtractionSessionResponse, +) +async def select_bootstrap_intake_path( + knowledge_graph_id: str, + request: BootstrapIntakePathSelectionRequest, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[ + ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) + ], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], +) -> ExtractionSessionResponse: + await _assert_kg_edit_permission( + authz=authz, + current_user=current_user, + knowledge_graph_id=knowledge_graph_id, + ) + session = await service.set_bootstrap_intake_path_for_active_session( + user_id=current_user.user_id.value, + knowledge_graph_id=knowledge_graph_id, + selected_path=request.selected_path, + capabilities_goals=request.capabilities_goals, + ) + return ExtractionSessionResponse.from_domain(session) + diff --git a/src/api/tests/unit/extraction/application/test_agent_session_service.py b/src/api/tests/unit/extraction/application/test_agent_session_service.py index 5d62f9ba5..444ab8c18 100644 --- a/src/api/tests/unit/extraction/application/test_agent_session_service.py +++ b/src/api/tests/unit/extraction/application/test_agent_session_service.py @@ -9,7 +9,7 @@ from extraction.application.agent_session_service import ExtractionAgentSessionService from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import BootstrapIntakePath, ExtractionSessionMode class _InMemoryAgentSessionRepository: @@ -212,3 +212,42 @@ async def test_new_session_initializes_runtime_context_from_skill_resolution(sel assert config["skills"]["schema_modeling"] == "bootstrap schema guidance" assert skill_resolution.calls == [("kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP)] + async def test_bootstrap_session_seeds_capabilities_intake_prompt_state(self): + repo = _InMemoryAgentSessionRepository() + service = ExtractionAgentSessionService(repository=repo) + + session = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + + assert session.message_history + assert session.message_history[0]["role"] == "assistant" + assert "capabilities" in session.message_history[0]["content"].lower() + intake = session.runtime_context["bootstrap_intake"] + assert intake["status"] == "awaiting_path_selection" + assert intake["selected_path"] is None + + async def test_select_bootstrap_intake_path_persists_choice_for_continuity(self): + repo = _InMemoryAgentSessionRepository() + service = ExtractionAgentSessionService(repository=repo) + session = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + + updated = await service.set_bootstrap_intake_path_for_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + selected_path=BootstrapIntakePath.GUIDED_CO_DESIGN, + capabilities_goals="I can provide domain terms but need guidance.", + ) + + intake = updated.runtime_context["bootstrap_intake"] + assert intake["selected_path"] == BootstrapIntakePath.GUIDED_CO_DESIGN.value + assert intake["status"] == "path_selected" + assert intake["capabilities_goals"] == "I can provide domain terms but need guidance." + assert updated.id == session.id + diff --git a/src/api/tests/unit/extraction/presentation/test_routes.py b/src/api/tests/unit/extraction/presentation/test_routes.py index a59923026..96a5d1df3 100644 --- a/src/api/tests/unit/extraction/presentation/test_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_routes.py @@ -9,6 +9,7 @@ from extraction.application.agent_session_service import ExtractionAgentSessionService from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import BootstrapIntakePath from iam.application.value_objects import CurrentUser from iam.domain.value_objects import TenantId, UserId @@ -176,3 +177,23 @@ def test_active_session_endpoint_returns_existing_active_session( assert second.status_code == status.HTTP_200_OK assert first.json()["id"] == second.json()["id"] + def test_select_bootstrap_intake_path_persists_choice(self, extraction_client): + client, _ = extraction_client + active = client.get( + "/extraction/knowledge-graphs/kg-123/sessions/schema_bootstrap/active" + ) + assert active.status_code == status.HTTP_200_OK + + response = client.post( + "/extraction/knowledge-graphs/kg-123/sessions/schema_bootstrap/active/intake-path", + json={ + "selected_path": BootstrapIntakePath.GUIDED_CO_DESIGN.value, + "capabilities_goals": "I know core entities but need help with relationships.", + }, + ) + assert response.status_code == status.HTTP_200_OK + payload = response.json() + intake = payload["runtime_context"]["bootstrap_intake"] + assert intake["selected_path"] == BootstrapIntakePath.GUIDED_CO_DESIGN.value + assert intake["status"] == "path_selected" + From b11d5597db1381641dcc032faa26e3a8e4d2655f Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 14:20:07 -0400 Subject: [PATCH 022/153] feat(extraction): package workload runtime context and skills mounts (#679) (#699) Build a filesystem runtime context for extraction workloads by materializing ingestion package resources, reconstructing repository files, and exposing a deterministic skills directory path; wire it through extraction event handling and local/deployed container configuration. Co-authored-by: Cursor --- compose.yaml | 2 + .../apps/kartograph/base/api-deployment.yaml | 6 ++ src/api/extraction/infrastructure/__init__.py | 4 + .../infrastructure/event_handler.py | 7 ++ .../infrastructure/runtime_context_builder.py | 52 +++++++++++++ src/api/extraction/ports/services.py | 14 ++++ src/api/main.py | 13 ++++ .../test_extraction_event_handler.py | 25 +++++++ .../test_runtime_context_builder.py | 74 +++++++++++++++++++ 9 files changed, 197 insertions(+) create mode 100644 src/api/extraction/infrastructure/runtime_context_builder.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_runtime_context_builder.py diff --git a/compose.yaml b/compose.yaml index 9e2d1d838..bf632a8a7 100644 --- a/compose.yaml +++ b/compose.yaml @@ -145,6 +145,7 @@ services: - kartograph volumes: - ./certs:/certs:ro + - ./skills:/app/skills:ro # Mount host CA bundle (supports multiple OS types via env var) # Default fallback order: RHEL/Fedora -> Debian/Ubuntu -> macOS - ${HOST_CA_BUNDLE:-/etc/pki/tls/certs/ca-bundle.crt}:/etc/ssl/certs/ca-bundle.crt:ro @@ -156,6 +157,7 @@ services: - GRPC_DEFAULT_SSL_ROOTS_FILE_PATH=/certs/spicedb-cert.pem # SSL cert file uses mounted path (same for all systems) - SSL_CERT_FILE=/etc/ssl/certs/ca-bundle.crt + - KARTOGRAPH_EXTRACTION_SKILLS_DIR=/app/skills depends_on: postgres: condition: service_healthy diff --git a/deploy/apps/kartograph/base/api-deployment.yaml b/deploy/apps/kartograph/base/api-deployment.yaml index 1de0bc5ee..3c9f2f193 100644 --- a/deploy/apps/kartograph/base/api-deployment.yaml +++ b/deploy/apps/kartograph/base/api-deployment.yaml @@ -155,11 +155,15 @@ spec: secretKeyRef: name: kartograph-sso-client-swagger-docs key: client_id + - name: KARTOGRAPH_EXTRACTION_SKILLS_DIR + value: /app/skills volumeMounts: - name: spicedb-ca mountPath: /etc/spicedb-ca readOnly: true + - name: extraction-skills + mountPath: /app/skills livenessProbe: httpGet: path: /health @@ -190,3 +194,5 @@ spec: items: - key: service-ca.crt path: service-ca.crt + - name: extraction-skills + emptyDir: {} diff --git a/src/api/extraction/infrastructure/__init__.py b/src/api/extraction/infrastructure/__init__.py index 3ffd68fe2..20d48d352 100644 --- a/src/api/extraction/infrastructure/__init__.py +++ b/src/api/extraction/infrastructure/__init__.py @@ -5,10 +5,14 @@ ExtractionAgentSessionRepository, ExtractionSkillOverrideRepository, ) +from extraction.infrastructure.runtime_context_builder import ( + FilesystemExtractionRuntimeContextBuilder, +) __all__ = [ "ExtractionEventHandler", "ExtractionAgentSessionRepository", "ExtractionSkillOverrideRepository", + "FilesystemExtractionRuntimeContextBuilder", ] diff --git a/src/api/extraction/infrastructure/event_handler.py b/src/api/extraction/infrastructure/event_handler.py index 4eb5fa33c..32cceb1ee 100644 --- a/src/api/extraction/infrastructure/event_handler.py +++ b/src/api/extraction/infrastructure/event_handler.py @@ -40,6 +40,7 @@ def __init__( self, extraction_service: IExtractionService, outbox: "IOutboxRepository", + runtime_context_builder: Any, ) -> None: """Initialize the extraction event handler. @@ -50,6 +51,7 @@ def __init__( """ self._extraction_service = extraction_service self._outbox = outbox + self._runtime_context_builder = runtime_context_builder def supported_event_types(self) -> frozenset[str]: """Return event types handled by this handler.""" @@ -80,11 +82,16 @@ async def handle( now = datetime.now(UTC) try: + runtime_context = self._runtime_context_builder.build( + sync_run_id=sync_run_id, + job_package_id=job_package_id, + ) mutation_log_id = await self._extraction_service.run( sync_run_id=sync_run_id, data_source_id=data_source_id, knowledge_graph_id=knowledge_graph_id, job_package_id=job_package_id, + runtime_context=runtime_context, ) except Exception as exc: await self._outbox.append( diff --git a/src/api/extraction/infrastructure/runtime_context_builder.py b/src/api/extraction/infrastructure/runtime_context_builder.py new file mode 100644 index 000000000..9c349f12b --- /dev/null +++ b/src/api/extraction/infrastructure/runtime_context_builder.py @@ -0,0 +1,52 @@ +"""Filesystem runtime context preparation for extraction workloads.""" + +from __future__ import annotations + +from pathlib import Path +import zipfile + +from extraction.ports.services import ExtractionRuntimeContext +from shared_kernel.job_package.path_safety import validate_zip_entry_name +from shared_kernel.job_package.reader import JobPackageReader +from shared_kernel.job_package.value_objects import JobPackageId + + +class FilesystemExtractionRuntimeContextBuilder: + """Prepare runtime directories from JobPackage + skills mount path.""" + + def __init__(self, *, work_dir: Path, skills_dir: Path) -> None: + self._work_dir = work_dir + self._skills_dir = skills_dir + + def build(self, *, sync_run_id: str, job_package_id: str) -> ExtractionRuntimeContext: + package_id = JobPackageId(value=job_package_id) + archive_path = self._work_dir / package_id.archive_name() + reader = JobPackageReader(archive_path) + + run_root = self._work_dir / "extraction-runtimes" / sync_run_id + ingestion_context_dir = run_root / "ingestion-context" + repository_files_dir = run_root / "repository-files" + ingestion_context_dir.mkdir(parents=True, exist_ok=True) + repository_files_dir.mkdir(parents=True, exist_ok=True) + self._skills_dir.mkdir(parents=True, exist_ok=True) + + with zipfile.ZipFile(archive_path) as archive: + for entry_name in archive.namelist(): + validate_zip_entry_name(entry_name) + archive.extract(entry_name, path=ingestion_context_dir) + + # Materialize repository-style files for agent-friendly traversal. + for change in reader.iter_changeset(): + if change.content_ref is None or not change.path: + continue + validate_zip_entry_name(change.path) + output_path = repository_files_dir / change.path + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(reader.read_content(change.content_ref)) + + return ExtractionRuntimeContext( + ingestion_context_dir=str(ingestion_context_dir), + repository_files_dir=str(repository_files_dir), + skills_dir=str(self._skills_dir), + job_package_archive=str(archive_path), + ) diff --git a/src/api/extraction/ports/services.py b/src/api/extraction/ports/services.py index f4a62c655..7c73d4865 100644 --- a/src/api/extraction/ports/services.py +++ b/src/api/extraction/ports/services.py @@ -2,9 +2,20 @@ from __future__ import annotations +from dataclasses import dataclass from typing import Protocol +@dataclass(frozen=True) +class ExtractionRuntimeContext: + """Resolved runtime paths available to extraction workloads.""" + + ingestion_context_dir: str + repository_files_dir: str + skills_dir: str + job_package_archive: str + + class IExtractionService(Protocol): """Protocol for the AI-based entity extraction service. @@ -22,6 +33,7 @@ async def run( data_source_id: str, knowledge_graph_id: str, job_package_id: str, + runtime_context: ExtractionRuntimeContext, ) -> str: """Run the AI extraction pipeline for a JobPackage. @@ -30,6 +42,8 @@ async def run( data_source_id: Identifier for the data source being extracted knowledge_graph_id: Identifier for the target knowledge graph job_package_id: Identifier for the JobPackage to process + runtime_context: Resolved runtime context paths for ingestion resources, + reconstructed repository files, and skills availability. Returns: mutation_log_id: Identifier for the produced MutationLog (JSONL) diff --git a/src/api/main.py b/src/api/main.py index 2061f1808..9204c77de 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -2,6 +2,7 @@ import asyncio from contextlib import asynccontextmanager +import os from pathlib import Path from typing import Any from urllib.parse import urlparse @@ -56,6 +57,9 @@ # Default work directory for JobPackage ZIP archives _JOB_PACKAGE_WORK_DIR = Path("/tmp/kartograph/job_packages") # noqa: S108 +_EXTRACTION_SKILLS_DIR = Path( + os.getenv("KARTOGRAPH_EXTRACTION_SKILLS_DIR", "/app/skills") +) # Scheduler polling interval (seconds) _SCHEDULER_POLL_INTERVAL_SECONDS = 60 @@ -305,6 +309,7 @@ async def run( data_source_id: str, knowledge_graph_id: str, job_package_id: str, + runtime_context: Any, ) -> str: raise NotImplementedError( "AI extraction pipeline is not yet implemented. " @@ -332,12 +337,20 @@ def supported_event_types(self) -> frozenset[str]: async def handle(self, event_type: str, payload: dict[str, Any]) -> None: from infrastructure.outbox.repository import OutboxRepository from extraction.infrastructure.event_handler import ExtractionEventHandler + from extraction.infrastructure.runtime_context_builder import ( + FilesystemExtractionRuntimeContextBuilder, + ) async with self._session_factory() as session: outbox = OutboxRepository(session=session) + runtime_context_builder = FilesystemExtractionRuntimeContextBuilder( + work_dir=_JOB_PACKAGE_WORK_DIR, + skills_dir=_EXTRACTION_SKILLS_DIR, + ) extraction_handler = ExtractionEventHandler( extraction_service=self._extraction_service, outbox=outbox, + runtime_context_builder=runtime_context_builder, ) await extraction_handler.handle(event_type, payload) await session.commit() diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py index 38738b321..c04a92096 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py @@ -18,6 +18,7 @@ import pytest from extraction.infrastructure.event_handler import ExtractionEventHandler +from extraction.ports.services import ExtractionRuntimeContext class _FakeOutboxRepository: @@ -65,6 +66,7 @@ async def run( data_source_id: str, knowledge_graph_id: str, job_package_id: str, + runtime_context: ExtractionRuntimeContext, ) -> str: self.calls.append( { @@ -72,6 +74,7 @@ async def run( "data_source_id": data_source_id, "knowledge_graph_id": knowledge_graph_id, "job_package_id": job_package_id, + "runtime_context": runtime_context, } ) if self._fail: @@ -79,6 +82,22 @@ async def run( return "mutation-log-001" +class _FakeRuntimeContextBuilder: + def __init__(self) -> None: + self.calls: list[dict[str, str]] = [] + + def build(self, *, sync_run_id: str, job_package_id: str) -> ExtractionRuntimeContext: + self.calls.append( + {"sync_run_id": sync_run_id, "job_package_id": job_package_id} + ) + return ExtractionRuntimeContext( + ingestion_context_dir="/tmp/ingestion-context", + repository_files_dir="/tmp/repository-files", + skills_dir="/app/skills", + job_package_archive="/tmp/job-package.zip", + ) + + @pytest.fixture def outbox() -> _FakeOutboxRepository: return _FakeOutboxRepository() @@ -99,9 +118,11 @@ def handler( extraction_service: _FakeExtractionService, outbox: _FakeOutboxRepository, ) -> ExtractionEventHandler: + runtime_context_builder = _FakeRuntimeContextBuilder() return ExtractionEventHandler( extraction_service=extraction_service, outbox=outbox, + runtime_context_builder=runtime_context_builder, ) @@ -151,6 +172,7 @@ async def test_runs_extraction_on_job_package_produced( assert call["job_package_id"] == "pkg-001" assert call["data_source_id"] == "ds-001" assert call["knowledge_graph_id"] == "kg-001" + assert call["runtime_context"].skills_dir == "/app/skills" async def test_emits_mutation_log_produced_on_success( self, @@ -208,6 +230,7 @@ async def test_emits_extraction_failed_on_service_error( handler = ExtractionEventHandler( extraction_service=failing_service, outbox=outbox, + runtime_context_builder=_FakeRuntimeContextBuilder(), ) payload = _job_package_produced_payload(sync_run_id="run-002") await handler.handle("JobPackageProduced", payload) @@ -228,6 +251,7 @@ async def test_extraction_failed_aggregate_type( handler = ExtractionEventHandler( extraction_service=failing_service, outbox=outbox, + runtime_context_builder=_FakeRuntimeContextBuilder(), ) await handler.handle( "JobPackageProduced", @@ -285,6 +309,7 @@ async def test_outbox_failure_after_successful_extraction_propagates( handler = ExtractionEventHandler( extraction_service=extraction_service, outbox=failing_outbox, + runtime_context_builder=_FakeRuntimeContextBuilder(), ) with pytest.raises(RuntimeError, match="outbox write failed"): diff --git a/src/api/tests/unit/extraction/infrastructure/test_runtime_context_builder.py b/src/api/tests/unit/extraction/infrastructure/test_runtime_context_builder.py new file mode 100644 index 000000000..9e5bf93a5 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_runtime_context_builder.py @@ -0,0 +1,74 @@ +"""Unit tests for filesystem extraction runtime context builder.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from extraction.infrastructure.runtime_context_builder import ( + FilesystemExtractionRuntimeContextBuilder, +) +from shared_kernel.job_package.builder import JobPackageBuilder +from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, + ChangeOperation, + ChangesetEntry, + ContentRef, + SyncMode, +) + + +def _build_job_package(archive_dir: Path) -> str: + content_bytes = b"print('hello runtime context')\n" + content_ref = ContentRef.from_bytes(content_bytes) + changeset_entry = ChangesetEntry( + operation=ChangeOperation.ADD, + id="file-1", + type="io.kartograph.change.file", + path="src/main.py", + content_ref=content_ref, + content_type="text/x-python", + metadata={}, + ) + builder = JobPackageBuilder( + data_source_id="ds-123", + knowledge_graph_id="kg-123", + sync_mode=SyncMode.FULL_REFRESH, + ) + ref = builder.add_content(content_bytes) + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id=changeset_entry.id, + type=changeset_entry.type, + path=changeset_entry.path, + content_ref=ref, + content_type=changeset_entry.content_type, + metadata=changeset_entry.metadata, + ) + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={})) + archive_path = builder.build(archive_dir) + return archive_path.stem.removeprefix("job-package-") + + +@pytest.mark.asyncio +async def test_build_materializes_ingestion_context_and_repository_files(tmp_path: Path): + work_dir = tmp_path / "work" + work_dir.mkdir(parents=True, exist_ok=True) + package_id = _build_job_package(work_dir) + skills_dir = tmp_path / "skills" + + builder = FilesystemExtractionRuntimeContextBuilder( + work_dir=work_dir, + skills_dir=skills_dir, + ) + runtime = builder.build(sync_run_id="run-123", job_package_id=package_id) + + assert Path(runtime.ingestion_context_dir).exists() + assert Path(runtime.repository_files_dir, "src/main.py").exists() + assert Path(runtime.repository_files_dir, "src/main.py").read_text() == ( + "print('hello runtime context')\n" + ) + assert Path(runtime.skills_dir).exists() From faca1c15c1d6a8f96e5bae4642ea7aff0b199c72 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 14:31:42 -0400 Subject: [PATCH 023/153] feat(dev-ui): enrich schema inspector with type metadata badges (#670) (#700) Enhance schema browser rows to display prepopulated type indicators and live per-type instance counts with lazy query-backed loading, while extending shared type contracts and tests to cover the new inspector metadata behavior. Co-authored-by: Cursor --- src/dev-ui/app/pages/graph/schema.vue | 76 +++++++++++++++++++++ src/dev-ui/app/tests/schema-browser.test.ts | 10 +++ src/dev-ui/app/types/index.ts | 2 + 3 files changed, 88 insertions(+) diff --git a/src/dev-ui/app/pages/graph/schema.vue b/src/dev-ui/app/pages/graph/schema.vue index cd032bce3..4ed6ace26 100644 --- a/src/dev-ui/app/pages/graph/schema.vue +++ b/src/dev-ui/app/pages/graph/schema.vue @@ -22,6 +22,7 @@ import { CopyableText } from '@/components/ui/copyable-text' import type { TypeDefinition } from '~/types' const { listNodeLabels, listEdgeLabels, getNodeSchema, getEdgeSchema } = useGraphApi() +const { queryGraph } = useQueryApi() const { extractErrorMessage } = useErrorHandler() const { hasTenant, tenantVersion } = useTenant() @@ -47,6 +48,8 @@ const searchInputRef = ref | null>(null) const expandedLabels = reactive(new Set()) const schemaCache = reactive(new Map()) const schemaLoadingLabels = reactive(new Set()) +const instanceCountCache = reactive(new Map()) +const instanceCountLoadingKeys = reactive(new Set()) // Virtual scroll container refs (virtualizers defined after filtered computeds) const nodeScrollRef = ref(null) @@ -146,6 +149,10 @@ function toggleExpand(label: string, entityType: 'node' | 'edge') { if (!schemaCache.has(label)) { fetchLabelSchema(label, entityType) } + const cacheKey = `${entityType}:${label}` + if (!instanceCountCache.has(cacheKey)) { + fetchInstanceCount(label, entityType) + } } // Force re-measurement after DOM update for variable-height rows nextTick(() => { @@ -176,6 +183,36 @@ async function fetchLabelSchema(label: string, entityType: 'node' | 'edge') { } } +function getInstanceCount(label: string, entityType: 'node' | 'edge'): number | null { + const cacheKey = `${entityType}:${label}` + return instanceCountCache.has(cacheKey) ? (instanceCountCache.get(cacheKey) ?? 0) : null +} + +function isInstanceCountLoading(label: string, entityType: 'node' | 'edge'): boolean { + return instanceCountLoadingKeys.has(`${entityType}:${label}`) +} + +async function fetchInstanceCount(label: string, entityType: 'node' | 'edge') { + const cacheKey = `${entityType}:${label}` + instanceCountLoadingKeys.add(cacheKey) + try { + const cypher = entityType === 'node' + ? `MATCH (n:\`${label}\`) RETURN count(n) AS instance_count` + : `MATCH ()-[r:\`${label}\`]->() RETURN count(r) AS instance_count` + const result = await queryGraph(cypher, 10, 1) + const value = result.rows[0]?.instance_count + const parsed = typeof value === 'number' + ? value + : Number.parseInt(String(value ?? '0'), 10) + instanceCountCache.set(cacheKey, Number.isFinite(parsed) ? parsed : 0) + } catch { + // Avoid noisy toasts from metadata enrichment failures. + instanceCountCache.set(cacheKey, 0) + } finally { + instanceCountLoadingKeys.delete(cacheKey) + } +} + // ── Cross-page Navigation ────────────────────────────────────────────────── function navigateToQuery(label: string, entityType: 'node' | 'edge') { @@ -247,6 +284,8 @@ watch(tenantVersion, () => { searchQuery.value = '' expandedLabels.clear() schemaCache.clear() + instanceCountCache.clear() + instanceCountLoadingKeys.clear() fetchNodeLabels() fetchEdgeLabels() } @@ -392,6 +431,28 @@ onUnmounted(() => { > Node + + Prepopulated + + + + Counting... + + + {{ getInstanceCount(filteredNodeLabels[virtualRow.index], 'node') }} instances + @@ -587,6 +648,21 @@ onUnmounted(() => { > Edge + + + Counting... + + + {{ getInstanceCount(filteredEdgeLabels[virtualRow.index], 'edge') }} instances + diff --git a/src/dev-ui/app/tests/schema-browser.test.ts b/src/dev-ui/app/tests/schema-browser.test.ts index 1532101b8..cde8d8323 100644 --- a/src/dev-ui/app/tests/schema-browser.test.ts +++ b/src/dev-ui/app/tests/schema-browser.test.ts @@ -345,6 +345,16 @@ describe('Schema Browser - schema.vue uses ontology editor (not mutations consol it('graph explorer button (second) still navigates to /graph/explorer', () => { expect(schemaContent).toContain("path: '/graph/explorer'") }) + + it('schema.vue shows prepopulated indicator badge when provided', () => { + expect(schemaContent).toContain('Prepopulated') + expect(schemaContent).toContain('?.prepopulated') + }) + + it('schema.vue shows instance count metadata badge for node and edge types', () => { + expect(schemaContent).toContain('instances') + expect(schemaContent).toContain('fetchInstanceCount') + }) }) // ──────────────────────────────────────────────────────────────────────────── diff --git a/src/dev-ui/app/types/index.ts b/src/dev-ui/app/types/index.ts index 6462452a2..aa2081e04 100644 --- a/src/dev-ui/app/types/index.ts +++ b/src/dev-ui/app/types/index.ts @@ -103,6 +103,8 @@ export interface TypeDefinition { description: string required_properties: string[] optional_properties: string[] + prepopulated?: boolean + instance_count?: number } export interface SchemaLabelsResponse { From 6b6d32e91e1de8a7075d7e858580e966bc455b3f Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 14:37:28 -0400 Subject: [PATCH 024/153] feat(management): add extraction run-control APIs for sync lifecycle (#671) (#701) Add manage-authorized run-control operations (start, pause, halt, reset_running, reset_failed, reset_completed, reset_all) over data source sync runs, expose them via dedicated management routes, and verify behavior with unit tests for both service transitions and HTTP contract responses. Co-authored-by: Cursor --- .../services/data_source_service.py | 100 ++++++++++++++++ .../presentation/data_sources/models.py | 21 ++++ .../presentation/data_sources/routes.py | 49 ++++++++ .../application/test_data_source_service.py | 112 ++++++++++++++++++ .../presentation/test_data_sources_routes.py | 72 +++++++++++ 5 files changed, 354 insertions(+) diff --git a/src/api/management/application/services/data_source_service.py b/src/api/management/application/services/data_source_service.py index d17b4f7ff..b128052fc 100644 --- a/src/api/management/application/services/data_source_service.py +++ b/src/api/management/application/services/data_source_service.py @@ -49,6 +49,16 @@ class DataSourceWithLatestRun: latest_sync_run: DataSourceSyncRun | None +@dataclass +class RunControlResult: + """Result payload for extraction run-control commands.""" + + action: str + affected_count: int + updated_runs: list[DataSourceSyncRun] + started_run: DataSourceSyncRun | None = None + + class DataSourceService: """Application service for data source management. @@ -655,3 +665,93 @@ async def trigger_sync( self._probe.sync_requested(ds_id=ds_id) return sync_run + + async def apply_run_control( + self, + user_id: str, + ds_id: str, + action: str, + ) -> RunControlResult: + """Apply run-control action to sync runs for a data source.""" + if action == "start": + started = await self.trigger_sync(user_id=user_id, ds_id=ds_id) + return RunControlResult( + action=action, + affected_count=1, + updated_runs=[], + started_run=started, + ) + + has_manage = await self._check_permission( + user_id=user_id, + resource_type=ResourceType.DATA_SOURCE, + resource_id=ds_id, + permission=Permission.MANAGE, + ) + if not has_manage: + self._probe.permission_denied( + user_id=user_id, + resource_id=ds_id, + permission=Permission.MANAGE, + ) + raise UnauthorizedError( + f"User {user_id} lacks manage permission on data source {ds_id}" + ) + + ds = await self._ds_repo.get_by_id(DataSourceId(value=ds_id)) + if ds is None or ds.tenant_id != self._scope_to_tenant: + raise ValueError(f"Data source {ds_id} not found") + + runs = await self._sync_run_repo.find_by_data_source(ds_id) + active_statuses = {"pending", "ingesting", "ai_extracting", "applying"} + targets: list[DataSourceSyncRun] = [] + now = datetime.now(UTC) + + if action == "pause": + targets = [run for run in runs if run.status in active_statuses] + for run in targets: + run.status = "pending" + run.logs.append("Run paused by control plane") + elif action == "halt": + targets = [run for run in runs if run.status in active_statuses] + for run in targets: + run.status = "failed" + run.completed_at = now + run.error = "Run halted by control plane" + run.logs.append("Run halted by control plane") + elif action == "reset_running": + targets = [run for run in runs if run.status in active_statuses] + for run in targets: + run.status = "pending" + run.completed_at = None + run.error = None + elif action == "reset_failed": + targets = [run for run in runs if run.status == "failed"] + for run in targets: + run.status = "pending" + run.completed_at = None + run.error = None + elif action == "reset_completed": + targets = [run for run in runs if run.status == "completed"] + for run in targets: + run.status = "pending" + run.completed_at = None + run.error = None + elif action == "reset_all": + targets = list(runs) + for run in targets: + run.status = "pending" + run.completed_at = None + run.error = None + else: + raise ValueError(f"Unsupported run control action: {action}") + + for run in targets: + await self._sync_run_repo.save(run) + await self._session.commit() + + return RunControlResult( + action=action, + affected_count=len(targets), + updated_runs=targets, + ) diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index c9c385811..a1115daaf 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -3,6 +3,7 @@ from __future__ import annotations from datetime import datetime +from typing import Literal from pydantic import BaseModel, Field @@ -320,6 +321,26 @@ def from_domain(cls, run: DataSourceSyncRun) -> SyncRunResponse: ) +RunControlAction = Literal[ + "start", + "pause", + "halt", + "reset_running", + "reset_failed", + "reset_completed", + "reset_all", +] + + +class RunControlResponse(BaseModel): + """Response model for run-control actions.""" + + action: RunControlAction + affected_count: int + updated_runs: list[SyncRunResponse] = Field(default_factory=list) + started_run: SyncRunResponse | None = None + + class DataSourceWithSyncResponse(BaseModel): """Data source response with embedded latest sync run. diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index 99c99e419..c35ab68c7 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -27,6 +27,8 @@ DataSourceListResponse, DataSourceResponse, DataSourceWithSyncResponse, + RunControlAction, + RunControlResponse, SyncRunLogsResponse, SyncRunResponse, UpdateDataSourceRequest, @@ -442,6 +444,53 @@ async def list_sync_runs( ) +@router.post( + "/data-sources/{ds_id}/run-controls/{action}", + status_code=status.HTTP_200_OK, +) +async def control_sync_runs( + ds_id: str, + action: RunControlAction, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[DataSourceService, Depends(get_data_source_service)], +) -> RunControlResponse: + """Apply run-control action to extraction sync runs for a data source.""" + try: + result = await service.apply_run_control( + user_id=current_user.user_id.value, + ds_id=ds_id, + action=action, + ) + return RunControlResponse( + action=action, + affected_count=result.affected_count, + updated_runs=[SyncRunResponse.from_domain(run) for run in result.updated_runs], + started_run=( + SyncRunResponse.from_domain(result.started_run) + if result.started_run is not None + else None + ), + ) + except UnauthorizedError: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="You do not have permission to perform this action", + ) + except ValueError as e: + detail = str(e) + status_code = ( + status.HTTP_422_UNPROCESSABLE_ENTITY + if "Unsupported run control action" in detail + else status.HTTP_404_NOT_FOUND + ) + raise HTTPException(status_code=status_code, detail=detail) + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to apply run control action", + ) + + @router.patch( "/data-sources/{ds_id}", response_model=DataSourceResponse, diff --git a/src/api/tests/unit/management/application/test_data_source_service.py b/src/api/tests/unit/management/application/test_data_source_service.py index 3ebdc3220..973528875 100644 --- a/src/api/tests/unit/management/application/test_data_source_service.py +++ b/src/api/tests/unit/management/application/test_data_source_service.py @@ -459,6 +459,25 @@ def _make_ds( return ds +def _make_sync_run( + *, + run_id: str, + data_source_id: str, + status: str, +) -> DataSourceSyncRun: + now = datetime.now(UTC) + return DataSourceSyncRun( + id=run_id, + data_source_id=data_source_id, + status=status, + started_at=now, + completed_at=None, + error=None, + created_at=now, + logs=[], + ) + + # ---- create ---- @@ -1030,6 +1049,99 @@ async def test_trigger_sync_creates_sync_run_and_saves_ds( assert ds_probe.sync_requested_calls[0]["ds_id"] == ds.id.value +class TestDataSourceServiceRunControls: + """Tests for extraction run-control operations.""" + + @pytest.mark.asyncio + async def test_pause_updates_active_runs_to_pending( + self, service, authz, ds_repo, sync_run_repo, user_id + ) -> None: + ds = _make_ds() + authz.grant_all() + ds_repo.seed(ds) + sync_run_repo.seed( + _make_sync_run(run_id="run-1", data_source_id=ds.id.value, status="ingesting"), + _make_sync_run(run_id="run-2", data_source_id=ds.id.value, status="applying"), + _make_sync_run(run_id="run-3", data_source_id=ds.id.value, status="completed"), + ) + + result = await service.apply_run_control( + user_id=user_id, + ds_id=ds.id.value, + action="pause", + ) + + assert result.affected_count == 2 + assert all(run.status == "pending" for run in result.updated_runs) + + @pytest.mark.asyncio + async def test_halt_marks_active_runs_as_failed( + self, service, authz, ds_repo, sync_run_repo, user_id + ) -> None: + ds = _make_ds() + authz.grant_all() + ds_repo.seed(ds) + sync_run_repo.seed( + _make_sync_run( + run_id="run-1", data_source_id=ds.id.value, status="ai_extracting" + ) + ) + + result = await service.apply_run_control( + user_id=user_id, + ds_id=ds.id.value, + action="halt", + ) + + assert result.affected_count == 1 + halted = result.updated_runs[0] + assert halted.status == "failed" + assert halted.completed_at is not None + assert halted.error is not None + + @pytest.mark.asyncio + async def test_reset_failed_moves_failed_runs_to_pending( + self, service, authz, ds_repo, sync_run_repo, user_id + ) -> None: + ds = _make_ds() + authz.grant_all() + ds_repo.seed(ds) + failed = _make_sync_run(run_id="run-1", data_source_id=ds.id.value, status="failed") + failed.error = "old error" + failed.completed_at = datetime.now(UTC) + sync_run_repo.seed(failed) + + result = await service.apply_run_control( + user_id=user_id, + ds_id=ds.id.value, + action="reset_failed", + ) + + assert result.affected_count == 1 + updated = result.updated_runs[0] + assert updated.status == "pending" + assert updated.error is None + assert updated.completed_at is None + + @pytest.mark.asyncio + async def test_start_action_creates_new_sync_run( + self, service, authz, ds_repo, user_id + ) -> None: + ds = _make_ds() + authz.grant_all() + ds_repo.seed(ds) + + result = await service.apply_run_control( + user_id=user_id, + ds_id=ds.id.value, + action="start", + ) + + assert result.started_run is not None + assert result.started_run.status == "pending" + assert result.affected_count == 1 + + class TestDataSourceServiceCommitReferenceActions: """Tests for commit reference refresh/baseline actions.""" diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index 2e64d01bf..3eaee1574 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -466,6 +466,78 @@ def test_list_sync_runs_returns_404_when_ds_not_found( mock_sync_run_repo.find_by_data_source.assert_not_called() +class TestRunControlRoutes: + """Tests for POST /management/data-sources/{ds_id}/run-controls/{action}.""" + + def test_pause_run_control_returns_200_with_affected_count( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + sample_sync_run: DataSourceSyncRun, + ) -> None: + mock_ds_service.apply_run_control.return_value = type( + "_Result", + (), + { + "action": "pause", + "affected_count": 1, + "updated_runs": [sample_sync_run], + "started_run": None, + }, + )() + + response = test_client.post( + "/management/data-sources/01JPQRST1234567890ABCDEFDS/run-controls/pause" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["action"] == "pause" + assert payload["affected_count"] == 1 + assert len(payload["updated_runs"]) == 1 + assert payload["started_run"] is None + + def test_start_run_control_returns_started_run( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + sample_sync_run: DataSourceSyncRun, + ) -> None: + mock_ds_service.apply_run_control.return_value = type( + "_Result", + (), + { + "action": "start", + "affected_count": 1, + "updated_runs": [], + "started_run": sample_sync_run, + }, + )() + + response = test_client.post( + "/management/data-sources/01JPQRST1234567890ABCDEFDS/run-controls/start" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["action"] == "start" + assert payload["affected_count"] == 1 + assert payload["started_run"]["id"] == sample_sync_run.id + + def test_run_control_returns_403_when_unauthorized( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + ) -> None: + mock_ds_service.apply_run_control.side_effect = UnauthorizedError("no permission") + + response = test_client.post( + "/management/data-sources/01JPQRST1234567890ABCDEFDS/run-controls/halt" + ) + + assert response.status_code == status.HTTP_403_FORBIDDEN + + class TestGetSyncRunLogsRoute: """Tests for GET /management/data-sources/{ds_id}/sync-runs/{run_id}/logs endpoint. From 9282149b98ef04f097d65359676318b8c0e8cbce Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 14:42:46 -0400 Subject: [PATCH 025/153] feat(dev-ui): add extraction telemetry dashboard metrics (#672) (#702) Expose sync-run token/cost metadata in management API responses and add an extraction telemetry dashboard in the data-sources workspace with active worker counts, status buckets, recent job events, and 24h cost trend indicators backed by auto-refreshing sync data. Co-authored-by: Cursor --- .../presentation/data_sources/models.py | 16 +++ .../presentation/test_data_sources_routes.py | 31 ++++ src/dev-ui/app/pages/data-sources/index.vue | 136 ++++++++++++++++++ src/dev-ui/app/tests/data-sources.test.ts | 24 ++++ 4 files changed, 207 insertions(+) diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index a1115daaf..e06a4bbd7 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -296,6 +296,12 @@ class SyncRunResponse(BaseModel): None, description="When the sync run completed" ) error: str | None = Field(None, description="Error message if the sync run failed") + token_usage_total: int | None = Field( + None, description="Total model tokens consumed during extraction for this run" + ) + cost_total_usd: float | None = Field( + None, description="Estimated USD cost for extraction execution in this run" + ) created_at: datetime = Field( ..., description="When the sync run record was created" ) @@ -317,6 +323,16 @@ def from_domain(cls, run: DataSourceSyncRun) -> SyncRunResponse: started_at=run.started_at, completed_at=run.completed_at, error=run.error, + token_usage_total=( + run.mutation_log_run.token_usage_total + if run.mutation_log_run is not None + else None + ), + cost_total_usd=( + run.mutation_log_run.cost_total_usd + if run.mutation_log_run is not None + else None + ), created_at=run.created_at, ) diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index 3eaee1574..e8ee14fbb 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -18,6 +18,7 @@ from management.application.services.data_source_service import DataSourceService from management.domain.aggregates import DataSource from management.domain.entities import DataSourceSyncRun +from management.domain.entities.data_source_sync_run import MutationLogRunMetadata from management.infrastructure.git_diff_summary_service import DiffSummaryResult from management.domain.value_objects import ( DataSourceId, @@ -449,6 +450,36 @@ def test_list_sync_runs_returns_empty_list( assert response.status_code == status.HTTP_200_OK assert response.json() == [] + def test_list_sync_runs_includes_token_and_cost_metadata_when_available( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_sync_run_repo: AsyncMock, + sample_data_source: DataSource, + sample_sync_run: DataSourceSyncRun, + ) -> None: + """Sync run response should expose token/cost telemetry metadata.""" + sample_sync_run.mutation_log_run = MutationLogRunMetadata( + mutation_log_id="mlog-1", + knowledge_graph_id=sample_data_source.knowledge_graph_id, + session_id="sess-1", + actor_id="actor-1", + started_at=sample_sync_run.started_at, + token_usage_total=3210, + cost_total_usd=1.23, + ) + mock_ds_service.get.return_value = sample_data_source + mock_sync_run_repo.find_by_data_source.return_value = [sample_sync_run] + + response = test_client.get( + f"/management/data-sources/{sample_data_source.id.value}/sync-runs" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json()[0] + assert payload["token_usage_total"] == 3210 + assert payload["cost_total_usd"] == pytest.approx(1.23) + def test_list_sync_runs_returns_404_when_ds_not_found( self, test_client: TestClient, diff --git a/src/dev-ui/app/pages/data-sources/index.vue b/src/dev-ui/app/pages/data-sources/index.vue index f112f9ba9..09593dc8c 100644 --- a/src/dev-ui/app/pages/data-sources/index.vue +++ b/src/dev-ui/app/pages/data-sources/index.vue @@ -23,6 +23,10 @@ import { FileText, Settings, RefreshCw, + Cpu, + Coins, + DollarSign, + Clock3, } from 'lucide-vue-next' import { ADAPTERS, @@ -86,6 +90,8 @@ interface SyncRun { started_at: string completed_at: string | null error: string | null + token_usage_total?: number | null + cost_total_usd?: number | null created_at: string } @@ -728,6 +734,59 @@ const hasActiveSyncs = computed(() => }), ) +const telemetryRows = computed(() => + dataSources.value.flatMap((ds) => + (ds.sync_runs ?? []).map(run => ({ ...run, data_source_name: ds.name })), + ), +) + +const telemetryStatusBuckets = computed(() => { + const buckets = { + pending: 0, + ingesting: 0, + ai_extracting: 0, + applying: 0, + completed: 0, + failed: 0, + } + for (const row of telemetryRows.value) { + buckets[row.status] += 1 + } + return buckets +}) + +const telemetryRecentJobs = computed(() => + [...telemetryRows.value] + .sort((a, b) => new Date(b.started_at).getTime() - new Date(a.started_at).getTime()) + .slice(0, 8), +) + +const telemetryActiveWorkers = computed(() => + telemetryRows.value.filter(row => ACTIVE_STATUSES.includes(row.status)).length, +) + +const telemetryTokenTotal = computed(() => + telemetryRows.value.reduce((sum, row) => sum + (row.token_usage_total ?? 0), 0), +) + +const telemetryCostTotal = computed(() => + telemetryRows.value.reduce((sum, row) => sum + (row.cost_total_usd ?? 0), 0), +) + +const telemetryCostTrend = computed(() => { + const now = Date.now() + const oneDayMs = 24 * 60 * 60 * 1000 + let current = 0 + let previous = 0 + for (const row of telemetryRows.value) { + const eventMs = new Date(row.completed_at ?? row.started_at).getTime() + if (eventMs >= now - oneDayMs) current += row.cost_total_usd ?? 0 + else if (eventMs >= now - oneDayMs * 2) previous += row.cost_total_usd ?? 0 + } + const delta = current - previous + return { current, previous, delta } +}) + /** Holds the active setInterval handle, or null when not polling. */ const pollInterval = ref | null>(null) @@ -1145,6 +1204,83 @@ async function handleDeleteDs() {
diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index d65f70779..1d5cb28ae 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -36,4 +36,23 @@ describe('Knowledge Graph Manage Workspace - mode-aware controls', () => { expect(manageWorkspaceVue).toContain('active_schema_bootstrap_session_id') expect(manageWorkspaceVue).toContain('active_extraction_operations_session_id') }) + + it('keeps extraction conversation panel visible in extraction mode', () => { + expect(manageWorkspaceVue).toContain('Extraction Conversation') + expect(manageWorkspaceVue).toContain('message_history') + expect(manageWorkspaceVue).toContain('statusProjection.workspace_mode === \'extraction_operations\'') + }) + + it('supports explicit Clear chat reset for extraction session', () => { + expect(manageWorkspaceVue).toContain('clearChat') + expect(manageWorkspaceVue).toContain('/sessions/extraction_operations/clear-chat') + expect(manageWorkspaceVue).toContain('Clear chat') + }) + + it('provides tabbed lower operations area for extraction workflows', () => { + expect(manageWorkspaceVue).toContain('Operations Workspace') + expect(manageWorkspaceVue).toContain('TabsTrigger value="extraction-jobs"') + expect(manageWorkspaceVue).toContain('TabsTrigger value="manual-mutations"') + expect(manageWorkspaceVue).toContain('TabsTrigger value="run-logs"') + }) }) From 4afcea04d66ffff3a6017b5449197df739e358f7 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 15:59:32 -0400 Subject: [PATCH 031/153] feat(management): add KG-scoped mutation log browser data and UI (#708) Expose mutation-run identifiers and operation class counts on sync-run responses, then render a knowledge-graph-scoped MutationLog browser in the manage workspace with run summaries, per-entry previews, and token/cost metrics. Co-authored-by: Cursor --- .../presentation/data_sources/models.py | 33 ++++ .../presentation/test_data_sources_routes.py | 37 ++++ .../pages/knowledge-graphs/[kgId]/manage.vue | 185 +++++++++++++++++- .../knowledge-graph-manage-workspace.test.ts | 26 +++ 4 files changed, 280 insertions(+), 1 deletion(-) diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index e06a4bbd7..b2508a79c 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -296,6 +296,19 @@ class SyncRunResponse(BaseModel): None, description="When the sync run completed" ) error: str | None = Field(None, description="Error message if the sync run failed") + mutation_log_id: str | None = Field( + None, description="Associated mutation log run ID when available" + ) + session_id: str | None = Field( + None, description="Extraction session ID associated with this mutation run" + ) + actor_id: str | None = Field( + None, description="Actor identity associated with this mutation run" + ) + operation_counts: dict[str, int] = Field( + default_factory=dict, + description="Operation counts grouped by operation class for this run", + ) token_usage_total: int | None = Field( None, description="Total model tokens consumed during extraction for this run" ) @@ -323,6 +336,26 @@ def from_domain(cls, run: DataSourceSyncRun) -> SyncRunResponse: started_at=run.started_at, completed_at=run.completed_at, error=run.error, + mutation_log_id=( + run.mutation_log_run.mutation_log_id + if run.mutation_log_run is not None + else None + ), + session_id=( + run.mutation_log_run.session_id + if run.mutation_log_run is not None + else None + ), + actor_id=( + run.mutation_log_run.actor_id + if run.mutation_log_run is not None + else None + ), + operation_counts=( + dict(run.mutation_log_run.operation_counts) + if run.mutation_log_run is not None + else {} + ), token_usage_total=( run.mutation_log_run.token_usage_total if run.mutation_log_run is not None diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index e8ee14fbb..62e1a9f53 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -480,6 +480,43 @@ def test_list_sync_runs_includes_token_and_cost_metadata_when_available( assert payload["token_usage_total"] == 3210 assert payload["cost_total_usd"] == pytest.approx(1.23) + def test_list_sync_runs_includes_mutation_log_run_preview_fields( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_sync_run_repo: AsyncMock, + sample_data_source: DataSource, + sample_sync_run: DataSourceSyncRun, + ) -> None: + """Sync run response should include mutation-run IDs and op class counts.""" + sample_sync_run.mutation_log_run = MutationLogRunMetadata( + mutation_log_id="mlog-preview-1", + knowledge_graph_id=sample_data_source.knowledge_graph_id, + session_id="sess-preview-1", + actor_id="actor-preview-1", + started_at=sample_sync_run.started_at, + token_usage_total=144, + cost_total_usd=0.07, + operation_counts={"create_node": 8, "create_edge": 13, "update_node": 2}, + ) + mock_ds_service.get.return_value = sample_data_source + mock_sync_run_repo.find_by_data_source.return_value = [sample_sync_run] + + response = test_client.get( + f"/management/data-sources/{sample_data_source.id.value}/sync-runs" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json()[0] + assert payload["mutation_log_id"] == "mlog-preview-1" + assert payload["session_id"] == "sess-preview-1" + assert payload["actor_id"] == "actor-preview-1" + assert payload["operation_counts"] == { + "create_node": 8, + "create_edge": 13, + "update_node": 2, + } + def test_list_sync_runs_returns_404_when_ds_not_found( self, test_client: TestClient, diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 9c47c726c..c0b2584d7 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -1,7 +1,7 @@ + + diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 46aced110..27119a4b7 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -6,8 +6,8 @@ import { Button } from '@/components/ui/button' import { Badge } from '@/components/ui/badge' import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' import { Separator } from '@/components/ui/separator' -import { Input } from '@/components/ui/input' import { Tabs, TabsList, TabsTrigger, TabsContent } from '@/components/ui/tabs' +import SharedConversationPanel from '@/components/extraction/SharedConversationPanel.vue' interface WorkspaceReadinessStatus { has_minimum_entity_types: boolean @@ -83,6 +83,12 @@ const modeLabel = computed(() => : 'Schema Bootstrap', ) +const sessionMode = computed<'schema_bootstrap' | 'extraction_operations'>(() => + statusProjection.value?.workspace_mode === 'extraction_operations' + ? 'extraction_operations' + : 'schema_bootstrap', +) + const canTransition = computed(() => statusProjection.value?.workspace_mode === 'schema_bootstrap' && statusProjection.value?.transition_eligible === true, @@ -138,6 +144,13 @@ const nextSteps = computed(() => { return steps }) +const sessionActivityLines = computed(() => { + const context = extractionSession.value?.runtime_context ?? {} + const candidate = context.activity_lines ?? context.ndjson_activity_lines ?? context.thinking_lines + if (!Array.isArray(candidate)) return [] + return candidate.filter((line): line is string => typeof line === 'string' && line.trim().length > 0) +}) + async function loadWorkspaceStatus() { if (!hasTenant.value || !kgId.value) return loading.value = true @@ -207,7 +220,7 @@ async function loadExtractionSession() { sessionLoading.value = true try { extractionSession.value = await apiFetch( - `/extraction/knowledge-graphs/${kgId.value}/sessions/extraction_operations/active`, + `/extraction/knowledge-graphs/${kgId.value}/sessions/${sessionMode.value}/active`, ) } catch (err) { extractionSession.value = null @@ -261,7 +274,7 @@ async function clearChat() { clearingChat.value = true try { extractionSession.value = await apiFetch( - `/extraction/knowledge-graphs/${kgId.value}/sessions/extraction_operations/clear-chat`, + `/extraction/knowledge-graphs/${kgId.value}/sessions/${sessionMode.value}/clear-chat`, { method: 'POST' }, ) toast.success('Extraction chat cleared') @@ -289,7 +302,7 @@ watch(tenantVersion, () => { watch( () => statusProjection.value?.workspace_mode, (mode) => { - if (mode === 'extraction_operations') { + if (mode) { loadExtractionSession() } }, @@ -484,48 +497,19 @@ watch( -
- - - Extraction Conversation - - Conversation stays visible while you run extraction and manual-mutation operations. - - - -
- - Loading active extraction session... -
-
-
-

{{ entry.role ?? 'system' }}

-

{{ entry.content ?? entry.message ?? '(empty)' }}

-
-

- Session is active. Start by drafting extraction or mutation tasks below. -

-
-
- - -
-
-
- - +
+ + + Operations Workspace diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 316e72777..7a850813b 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -6,6 +6,10 @@ const manageWorkspaceVue = readFileSync( resolve(__dirname, '../pages/knowledge-graphs/[kgId]/manage.vue'), 'utf-8', ) +const sharedConversationPanelVue = readFileSync( + resolve(__dirname, '../components/extraction/SharedConversationPanel.vue'), + 'utf-8', +) describe('Knowledge Graph Manage Workspace - mode-aware controls', () => { it('loads workspace status projection from management API', () => { @@ -37,15 +41,15 @@ describe('Knowledge Graph Manage Workspace - mode-aware controls', () => { expect(manageWorkspaceVue).toContain('active_extraction_operations_session_id') }) - it('keeps extraction conversation panel visible in extraction mode', () => { - expect(manageWorkspaceVue).toContain('Extraction Conversation') - expect(manageWorkspaceVue).toContain('message_history') - expect(manageWorkspaceVue).toContain('statusProjection.workspace_mode === \'extraction_operations\'') + it('uses shared conversation panel for bootstrap and extraction sessions', () => { + expect(manageWorkspaceVue).toContain('SharedConversationPanel') + expect(manageWorkspaceVue).toContain('sessionMode') + expect(manageWorkspaceVue).toContain('/sessions/${sessionMode.value}/active') }) it('supports explicit Clear chat reset for extraction session', () => { expect(manageWorkspaceVue).toContain('clearChat') - expect(manageWorkspaceVue).toContain('/sessions/extraction_operations/clear-chat') + expect(manageWorkspaceVue).toContain('/sessions/${sessionMode.value}/clear-chat') expect(manageWorkspaceVue).toContain('Clear chat') }) @@ -104,3 +108,22 @@ describe('Knowledge Graph Manage Workspace - bootstrap readiness guidance', () = expect(manageWorkspaceVue).toContain('Transition is enabled') }) }) + +describe('Shared conversation panel - extraction UX contract', () => { + it('renders resume-session action and explicit server-side persistence note', () => { + expect(sharedConversationPanelVue).toContain('Resume session') + expect(sharedConversationPanelVue).toContain('No local cache: conversation state is server-side only.') + }) + + it('renders clear-chat confirmation dialog before emitting clear action', () => { + expect(sharedConversationPanelVue).toContain('Clear conversation?') + expect(sharedConversationPanelVue).toContain('confirmClearChat') + expect(sharedConversationPanelVue).toContain("emit('clearChat')") + }) + + it('renders activity/thinking timeline lines and auto-scrolls timeline updates', () => { + expect(sharedConversationPanelVue).toContain('activityTimeline') + expect(sharedConversationPanelVue).toContain('timelineRef') + expect(sharedConversationPanelVue).toContain('scrollTop = timelineRef.value.scrollHeight') + }) +}) From 6e961dec117d6998d994dd5d1a3b1ccd3b4bbdf3 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh Date: Wed, 20 May 2026 16:55:24 -0400 Subject: [PATCH 036/153] feat(workflow): add section-wave subagent orchestration prompts (#713) Extend the subagent-delivery skill with section-wave execution/monitoring rules and add reusable Claude instance system-prompt and launch-packet templates for parallel issue delivery. Co-authored-by: Cursor --- skills/subagent-delivery/SKILL.md | 70 +++++++++++++++++++ .../claude-instance-system-prompt.txt | 67 ++++++++++++++++++ .../section-wave-launch.template.txt | 38 ++++++++++ 3 files changed, 175 insertions(+) create mode 100644 skills/subagent-delivery/claude-instance-system-prompt.txt create mode 100644 skills/subagent-delivery/section-wave-launch.template.txt diff --git a/skills/subagent-delivery/SKILL.md b/skills/subagent-delivery/SKILL.md index 3b9156390..824d6ec4b 100644 --- a/skills/subagent-delivery/SKILL.md +++ b/skills/subagent-delivery/SKILL.md @@ -11,6 +11,10 @@ description: > Follow this protocol for every assigned issue. +System prompt template for spawned Claude instances: + +- `skills/subagent-delivery/claude-instance-system-prompt.txt` + ## Parallel Execution Model Use this model whenever multiple issues are independent: @@ -25,6 +29,26 @@ If two issues touch the same files heavily, either: - serialize those two issues, or - split scope so each agent owns non-overlapping symbols. +## Section-Wave Execution Model (Required) + +When the user asks for "whole sections at a time", execute in waves aligned to tracker sections: + +1. **Section A: Core lifecycle/data** + - `#643 #644 #645 #646 #659 #660 #661 #662 #663` +2. **Section B: Extraction runtime/session** + - `#649 #650 #651 #652 #653 #654` +3. **Section C: Operations/security/integration** + - `#665 #667 #670 #671 #672 #673` + +Wave rules: + +1. Run independent issues in parallel with one Claude instance per issue. +2. Respect dependencies inside the section (foundation issues first). +3. Keep all PRs targeting `feature/manage-knowledge-graph`. +4. Do not start the next section until current section is merged or explicitly deferred. +5. For each section, maintain a live status board: + - `queued`, `in_progress`, `blocked`, `in_review`, `merged` + ## Scope and Inputs Before coding, gather: @@ -40,6 +64,26 @@ Before coding, gather: If acceptance criteria are ambiguous, ask one focused question before implementation. +## Claude Instance Spawn Contract + +For each issue, provide the Claude instance: + +1. Issue ID + title + acceptance criteria summary. +2. Branch naming requirement: + - `feat/issue--` or `fix/issue--` +3. Required reads: + - `AGENTS.md` + - relevant `specs/*.spec.md` + - related tests in touched context +4. TDD requirement: + - tests first, then implementation, then verification +5. Output contract: + - branch + - commit(s) + - test commands and results + - PR URL + - blockers/questions + ## Blocker Question Protocol (Required) Subagents must be able to stop and ask questions immediately. @@ -62,6 +106,13 @@ When blocked: 4. If working from a GitHub issue, mirror the same question as an issue comment so the orchestrator can batch unresolved questions across agents. 5. Continue only non-blocked work; do not guess on blocked decisions. +If a blocker impacts multiple active instances: + +1. Pause affected issues. +2. Continue unaffected issues in parallel. +3. Post one consolidated orchestrator decision update. +4. Resume paused issues with explicit instruction delta. + ## Git Workflow 1. Ensure local target branch is up to date: @@ -117,6 +168,24 @@ When blocked: 4. Re-run tests after conflict resolution. 5. Merge into `feature/manage-knowledge-graph` only after verification. +## Orchestrator Monitoring Loop (Required) + +During active waves, run this loop continuously: + +1. Poll each PR for: + - mergeability + - CI status + - review comments requiring changes +2. If merge conflict appears: + - rebase/merge target branch into issue branch + - resolve conflicts preserving spec behavior + - rerun relevant tests + - push and re-check PR +3. If CI fails: + - fix in same issue branch + - do not move issue scope +4. Update section status board and report progress to user. + ## Orchestrator Handoff Contract Each subagent must hand back: @@ -133,4 +202,5 @@ Each subagent must hand back: - Do not disable hooks. - Do not commit secrets or credentials. - Prefer fakes over mocks in unit tests when testing domain/application behavior. +- Do not invent acceptance criteria beyond the issue/spec without asking. diff --git a/skills/subagent-delivery/claude-instance-system-prompt.txt b/skills/subagent-delivery/claude-instance-system-prompt.txt new file mode 100644 index 000000000..0c3f3260e --- /dev/null +++ b/skills/subagent-delivery/claude-instance-system-prompt.txt @@ -0,0 +1,67 @@ +You are a focused delivery Claude instance assigned to exactly one Kartograph GitHub issue. + +Mission: +- Deliver the assigned issue end-to-end with TDD discipline. +- Open a PR against `feature/manage-knowledge-graph`. +- Stop and ask immediately when blocked by ambiguity. + +Hard constraints: +1. Scope + - Work only on the assigned issue. + - Do not expand scope to neighboring issues. +2. Branching + - Start from latest `feature/manage-knowledge-graph`. + - Use branch `feat/issue--` or `fix/issue--`. +3. Specs and architecture + - Read `AGENTS.md` first. + - Read all relevant `specs/*.spec.md` for your issue. + - Preserve bounded-context boundaries and authorization rules. +4. TDD + - Write/adjust tests first. + - Implement minimal code to satisfy tests. + - Run focused tests; run broader suite as needed by touched context. +5. Safety + - Never use destructive git commands. + - Never commit secrets. + - Never skip required checks. + +Blocker protocol (mandatory): +- Trigger if acceptance criteria are ambiguous, security/tenancy decision is unclear, or behavior is unspecified. +- Stop at decision boundary and ask one concise question with: + - ambiguity summary + - 2-3 concrete options + - recommended option with rationale +- Mirror the blocker question on the GitHub issue as a comment. +- Continue only non-blocked work. + +Execution checklist: +1. Parse issue acceptance criteria. +2. Inspect affected code and tests. +3. Add failing tests for required behavior. +4. Implement and make tests pass. +5. Run lint/type/test for touched area. +6. Commit atomically using conventional commit message. +7. Push branch and open PR to `feature/manage-knowledge-graph`. + +PR body format: +## Summary +- what changed and why +- key architecture/security note + +## Testing +- [x] commands run and results +- [ ] any pending verification + +## Risks +- none or explicit risk + mitigation + +Include `Closes #` where appropriate. + +Required handoff output: +1. Issue ID +2. Branch name +3. Commit SHA(s) +4. Test commands and pass/fail +5. PR URL +6. Open blockers/questions (if any) +7. Assumptions made diff --git a/skills/subagent-delivery/section-wave-launch.template.txt b/skills/subagent-delivery/section-wave-launch.template.txt new file mode 100644 index 000000000..f6ce65822 --- /dev/null +++ b/skills/subagent-delivery/section-wave-launch.template.txt @@ -0,0 +1,38 @@ +Section wave launch template (one Claude instance per issue) + +Prerequisites: +- Read: `skills/subagent-delivery/SKILL.md` +- System prompt: `skills/subagent-delivery/claude-instance-system-prompt.txt` +- Base branch: `feature/manage-knowledge-graph` + +Per-instance launch packet: + +ISSUE: - +TARGET BRANCH: feature/manage-knowledge-graph +WORK BRANCH: feat/issue-<id>-<short-scope> + +Required context files: +- AGENTS.md +- <relevant spec files> +- <relevant code files> +- <relevant tests> + +Acceptance criteria summary: +- <criterion 1> +- <criterion 2> + +Execution requirements: +1) TDD: tests first +2) Implement minimal passing code +3) Run focused tests + lint +4) Commit atomically (conventional commit) +5) Open PR to feature/manage-knowledge-graph +6) Report branch, tests, PR URL, blockers + +Blocker handling: +- Ask one focused blocker question immediately. +- Include options + recommendation. +- Mirror blocker question on issue comment. + +Orchestrator status line format: +[Issue #<id>] <queued|in_progress|blocked|in_review|merged> | Branch: <branch> | PR: <url-or-pending> From 1f6f8f3db17520676126705086b25a8160f0bcfc Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Wed, 20 May 2026 17:08:17 -0400 Subject: [PATCH 037/153] feat(extraction): add sticky runtime and ephemeral worker adapters (#714) Add extraction runtime contracts and in-memory tracer-bullet adapters for sticky session container reuse/reset/timeout and ephemeral worker launch with scoped short-lived credentials and least-privilege validation. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/api/extraction/infrastructure/__init__.py | 8 + .../infrastructure/workload_runtime.py | 145 +++++++++++++++++ src/api/extraction/ports/__init__.py | 14 ++ src/api/extraction/ports/runtime.py | 98 ++++++++++++ .../infrastructure/test_workload_runtime.py | 150 ++++++++++++++++++ 5 files changed, 415 insertions(+) create mode 100644 src/api/extraction/infrastructure/workload_runtime.py create mode 100644 src/api/extraction/ports/runtime.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py diff --git a/src/api/extraction/infrastructure/__init__.py b/src/api/extraction/infrastructure/__init__.py index 20d48d352..ec40d91d8 100644 --- a/src/api/extraction/infrastructure/__init__.py +++ b/src/api/extraction/infrastructure/__init__.py @@ -8,11 +8,19 @@ from extraction.infrastructure.runtime_context_builder import ( FilesystemExtractionRuntimeContextBuilder, ) +from extraction.infrastructure.workload_runtime import ( + InMemoryEphemeralExtractionWorkerLauncher, + InMemoryStickySessionRuntimeManager, + ScopedWorkloadCredentialIssuer, +) __all__ = [ "ExtractionEventHandler", "ExtractionAgentSessionRepository", "ExtractionSkillOverrideRepository", "FilesystemExtractionRuntimeContextBuilder", + "InMemoryStickySessionRuntimeManager", + "ScopedWorkloadCredentialIssuer", + "InMemoryEphemeralExtractionWorkerLauncher", ] diff --git a/src/api/extraction/infrastructure/workload_runtime.py b/src/api/extraction/infrastructure/workload_runtime.py new file mode 100644 index 000000000..4f50940be --- /dev/null +++ b/src/api/extraction/infrastructure/workload_runtime.py @@ -0,0 +1,145 @@ +"""In-memory runtime adapters for extraction session/workload execution.""" + +from __future__ import annotations + +from dataclasses import replace +from datetime import UTC, datetime, timedelta + +from ulid import ULID + +from extraction.ports.runtime import ( + EphemeralWorkerLaunchRequest, + EphemeralWorkerLaunchResult, + IEphemeralExtractionWorkerLauncher, + IStickySessionRuntimeManager, + ScopedWorkloadCredentials, + StickySessionRuntimeLease, +) + + +class InMemoryStickySessionRuntimeManager(IStickySessionRuntimeManager): + """Sticky runtime manager with session reuse + timeout cleanup semantics.""" + + def __init__(self, *, session_ttl: timedelta = timedelta(minutes=30)) -> None: + self._session_ttl = session_ttl + self._leases: dict[str, StickySessionRuntimeLease] = {} + + def get_or_start_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> StickySessionRuntimeLease: + now = datetime.now(UTC) + existing = self._leases.get(session_id) + if existing is not None and existing.expires_at > now: + refreshed = replace( + existing, + last_activity_at=now, + expires_at=now + self._session_ttl, + status="active", + ) + self._leases[session_id] = refreshed + return refreshed + + lease = StickySessionRuntimeLease( + session_id=session_id, + container_id=str(ULID()), + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + status="active", + last_activity_at=now, + expires_at=now + self._session_ttl, + ) + self._leases[session_id] = lease + return lease + + def reset_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> StickySessionRuntimeLease: + self._leases.pop(session_id, None) + return self.get_or_start_runtime( + session_id=session_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + + def cleanup_expired(self, *, now: datetime) -> list[str]: + expired_sessions = [ + session_id + for session_id, lease in self._leases.items() + if lease.expires_at <= now + ] + terminated: list[str] = [] + for session_id in expired_sessions: + lease = self._leases.pop(session_id) + terminated.append(lease.container_id) + return terminated + + +class ScopedWorkloadCredentialIssuer: + """Issues short-lived tenant/KG scoped credentials for extraction workers.""" + + def __init__(self, *, default_ttl: timedelta = timedelta(minutes=15)) -> None: + self._default_ttl = default_ttl + + def issue(self, *, tenant_id: str, knowledge_graph_id: str) -> ScopedWorkloadCredentials: + now = datetime.now(UTC) + return ScopedWorkloadCredentials( + token=str(ULID()), + expires_at=now + self._default_ttl, + scopes=( + f"tenant:{tenant_id}", + f"knowledge_graph:{knowledge_graph_id}", + "workload:extraction", + ), + ) + + +class InMemoryEphemeralExtractionWorkerLauncher(IEphemeralExtractionWorkerLauncher): + """Ephemeral worker launcher that validates scope and tracks active workers.""" + + def __init__(self) -> None: + self._active_workers: dict[str, EphemeralWorkerLaunchRequest] = {} + + @property + def active_worker_count(self) -> int: + return len(self._active_workers) + + def launch( + self, + *, + request: EphemeralWorkerLaunchRequest, + credentials: ScopedWorkloadCredentials, + ) -> EphemeralWorkerLaunchResult: + required_scopes = { + f"tenant:{request.tenant_id}", + f"knowledge_graph:{request.knowledge_graph_id}", + "workload:extraction", + } + available_scopes = set(credentials.scopes) + if not required_scopes.issubset(available_scopes): + raise ValueError("credentials scope does not satisfy workload requirements") + if credentials.expires_at <= datetime.now(UTC): + raise ValueError("credentials are expired") + + worker_id = str(ULID()) + self._active_workers[worker_id] = request + return EphemeralWorkerLaunchResult( + worker_id=worker_id, + status="running", + credentials_expires_at=credentials.expires_at, + ) + + def complete_worker(self, worker_id: str) -> None: + self._active_workers.pop(worker_id, None) + diff --git a/src/api/extraction/ports/__init__.py b/src/api/extraction/ports/__init__.py index d3e72d0e9..2e253a7ea 100644 --- a/src/api/extraction/ports/__init__.py +++ b/src/api/extraction/ports/__init__.py @@ -4,11 +4,25 @@ IExtractionAgentSessionRepository, IExtractionSkillOverrideRepository, ) +from extraction.ports.runtime import ( + EphemeralWorkerLaunchRequest, + EphemeralWorkerLaunchResult, + IEphemeralExtractionWorkerLauncher, + IStickySessionRuntimeManager, + ScopedWorkloadCredentials, + StickySessionRuntimeLease, +) from extraction.ports.services import IExtractionService __all__ = [ "IExtractionService", "IExtractionAgentSessionRepository", "IExtractionSkillOverrideRepository", + "IStickySessionRuntimeManager", + "IEphemeralExtractionWorkerLauncher", + "StickySessionRuntimeLease", + "ScopedWorkloadCredentials", + "EphemeralWorkerLaunchRequest", + "EphemeralWorkerLaunchResult", ] diff --git a/src/api/extraction/ports/runtime.py b/src/api/extraction/ports/runtime.py new file mode 100644 index 000000000..624973ebd --- /dev/null +++ b/src/api/extraction/ports/runtime.py @@ -0,0 +1,98 @@ +"""Runtime port contracts for extraction workload execution.""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from typing import Protocol + + +@dataclass(frozen=True) +class StickySessionRuntimeLease: + """Represents sticky runtime assignment for an active chat session.""" + + session_id: str + container_id: str + user_id: str + knowledge_graph_id: str + mode: str + status: str + last_activity_at: datetime + expires_at: datetime + + +@dataclass(frozen=True) +class ScopedWorkloadCredentials: + """Short-lived credentials issued for one extraction workload scope.""" + + token: str + expires_at: datetime + scopes: tuple[str, ...] + + +@dataclass(frozen=True) +class EphemeralWorkerLaunchRequest: + """Launch payload for an ephemeral extraction worker.""" + + tenant_id: str + knowledge_graph_id: str + session_id: str + sync_run_id: str + job_package_id: str + + +@dataclass(frozen=True) +class EphemeralWorkerLaunchResult: + """Safe result returned after worker launch.""" + + worker_id: str + status: str + credentials_expires_at: datetime + + +class IStickySessionRuntimeManager(Protocol): + """Manages sticky chat runtime containers for active sessions.""" + + def get_or_start_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> StickySessionRuntimeLease: + """Return current runtime lease or start a new sticky runtime.""" + ... + + def reset_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> StickySessionRuntimeLease: + """Terminate existing runtime for session and start a clean one.""" + ... + + def cleanup_expired(self, *, now: datetime) -> list[str]: + """Terminate and remove expired sticky runtimes; return container IDs.""" + ... + + +class IEphemeralExtractionWorkerLauncher(Protocol): + """Launches short-lived extraction workers with scoped credentials.""" + + def launch( + self, + *, + request: EphemeralWorkerLaunchRequest, + credentials: ScopedWorkloadCredentials, + ) -> EphemeralWorkerLaunchResult: + """Start ephemeral worker; must not expose credential material.""" + ... + + def complete_worker(self, worker_id: str) -> None: + """Mark worker as completed and terminate runtime resources.""" + ... + diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py new file mode 100644 index 000000000..d2b8a943c --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py @@ -0,0 +1,150 @@ +"""Unit tests for extraction workload runtime infrastructure adapters.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + +import pytest + +from extraction.infrastructure.workload_runtime import ( + InMemoryEphemeralExtractionWorkerLauncher, + InMemoryStickySessionRuntimeManager, + ScopedWorkloadCredentialIssuer, +) +from extraction.ports.runtime import ( + EphemeralWorkerLaunchRequest, +) + + +class TestInMemoryStickySessionRuntimeManager: + def test_reuses_same_container_while_session_active(self) -> None: + manager = InMemoryStickySessionRuntimeManager(session_ttl=timedelta(minutes=30)) + + first = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="extraction_operations", + ) + second = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="extraction_operations", + ) + + assert first.container_id == second.container_id + assert first.status == "active" + assert second.status == "active" + + def test_reset_rotates_container_for_same_session(self) -> None: + manager = InMemoryStickySessionRuntimeManager(session_ttl=timedelta(minutes=30)) + original = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + + rotated = manager.reset_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + + assert rotated.container_id != original.container_id + assert rotated.status == "active" + + def test_cleanup_terminates_expired_sessions(self) -> None: + manager = InMemoryStickySessionRuntimeManager(session_ttl=timedelta(minutes=5)) + lease = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + cleanup_at = lease.last_activity_at + timedelta(minutes=6) + + terminated = manager.cleanup_expired(now=cleanup_at) + + assert terminated == [lease.container_id] + replacement = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + assert replacement.container_id != lease.container_id + + +class TestEphemeralWorkerLauncher: + def test_launch_requires_credentials_scoped_to_request(self) -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) + launcher = InMemoryEphemeralExtractionWorkerLauncher() + wrong_scope = issuer.issue( + tenant_id="tenant-2", + knowledge_graph_id="kg-2", + ) + request = EphemeralWorkerLaunchRequest( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id="session-1", + sync_run_id="sync-1", + job_package_id="pkg-1", + ) + + with pytest.raises(ValueError, match="scope"): + launcher.launch(request=request, credentials=wrong_scope) + + def test_launch_uses_ephemeral_worker_and_hides_credential_material(self) -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) + launcher = InMemoryEphemeralExtractionWorkerLauncher() + scoped_credentials = issuer.issue(tenant_id="tenant-1", knowledge_graph_id="kg-1") + request = EphemeralWorkerLaunchRequest( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id="session-1", + sync_run_id="sync-1", + job_package_id="pkg-1", + ) + + result = launcher.launch(request=request, credentials=scoped_credentials) + + assert result.worker_id + assert result.status == "running" + assert result.credentials_expires_at > datetime.now(UTC) + assert not hasattr(result, "token") + assert launcher.active_worker_count == 1 + + def test_complete_worker_terminates_container(self) -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) + launcher = InMemoryEphemeralExtractionWorkerLauncher() + scoped_credentials = issuer.issue(tenant_id="tenant-1", knowledge_graph_id="kg-1") + request = EphemeralWorkerLaunchRequest( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id="session-1", + sync_run_id="sync-1", + job_package_id="pkg-1", + ) + result = launcher.launch(request=request, credentials=scoped_credentials) + + launcher.complete_worker(result.worker_id) + + assert launcher.active_worker_count == 0 + + +class TestScopedWorkloadCredentialIssuer: + def test_issues_short_lived_credentials_with_least_privilege_scope(self) -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=15)) + + issued = issuer.issue(tenant_id="tenant-9", knowledge_graph_id="kg-9") + + assert issued.expires_at > datetime.now(UTC) + assert issued.scopes == ( + "tenant:tenant-9", + "knowledge_graph:kg-9", + "workload:extraction", + ) + assert issued.token From 581d7d11f0a235afaea9a35ec836aa61d4735513 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Wed, 20 May 2026 17:32:40 -0400 Subject: [PATCH 038/153] harden ingestion credential handling and add end-to-end flow validation (#715) Prevent credential leakage by keeping runtime secrets out of event payloads and redacting token-like error output, while adding an integration flow test that validates workspace transition and mutation-run metadata visibility end-to-end. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../ingestion/infrastructure/event_handler.py | 23 ++- src/api/main.py | 9 +- ...test_workspace_extraction_mutation_flow.py | 176 ++++++++++++++++++ .../test_ingestion_event_handler.py | 60 ++++++ .../unit/test_sessioned_ingestion_handler.py | 11 +- 5 files changed, 273 insertions(+), 6 deletions(-) create mode 100644 src/api/tests/integration/management/test_workspace_extraction_mutation_flow.py diff --git a/src/api/ingestion/infrastructure/event_handler.py b/src/api/ingestion/infrastructure/event_handler.py index 27ea29e5f..6eb08ffb2 100644 --- a/src/api/ingestion/infrastructure/event_handler.py +++ b/src/api/ingestion/infrastructure/event_handler.py @@ -7,6 +7,7 @@ from __future__ import annotations import asyncio +import re from datetime import UTC, datetime from typing import TYPE_CHECKING, Any @@ -49,10 +50,29 @@ def supported_event_types(self) -> frozenset[str]: """Return event types handled by this handler.""" return frozenset({"SyncStarted"}) + @staticmethod + def _redact_sensitive_error(message: str) -> str: + """Redact token-like secrets from error strings before persistence.""" + patterns = ( + # GitHub PAT prefixes + re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b"), + # Generic bearer tokens + re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._\-+/=]{16,}\b"), + # Common key/value credential leaks + re.compile( + r"(?i)\b(token|access_token|password|api[_-]?key)\b\s*[:=]\s*['\"]?[^\s,'\"]+" + ), + ) + redacted = message + for pattern in patterns: + redacted = pattern.sub("***REDACTED***", redacted) + return redacted + async def handle( self, event_type: str, payload: dict[str, Any], + runtime_credentials: dict[str, str] | None = None, ) -> None: """Process a SyncStarted event by running the ingestion pipeline. @@ -100,6 +120,7 @@ async def handle( credentials_path=payload.get("credentials_path"), tenant_id=payload.get("tenant_id"), credentials=payload.get("credentials"), + credentials=runtime_credentials or payload.get("credentials"), baseline_commit=payload.get("baseline_commit"), ) except asyncio.CancelledError: @@ -112,7 +133,7 @@ async def handle( payload={ "sync_run_id": sync_run_id, "data_source_id": data_source_id, - "error": str(exc), + "error": self._redact_sensitive_error(str(exc)), "occurred_at": now.isoformat(), }, occurred_at=now, diff --git a/src/api/main.py b/src/api/main.py index 9204c77de..1b0236b2f 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -273,9 +273,6 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: ) except KeyError: credentials = {} - if credentials: - enriched_payload["credentials"] = credentials - tracked_head = await self._resolve_github_tracked_head_commit( connection_config=ds.connection_config, credentials=credentials, @@ -292,7 +289,11 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: ): enriched_payload["no_changes_detected"] = True - await ingestion_handler.handle(event_type, enriched_payload) + await ingestion_handler.handle( + event_type, + enriched_payload, + runtime_credentials=credentials, + ) await session.commit() diff --git a/src/api/tests/integration/management/test_workspace_extraction_mutation_flow.py b/src/api/tests/integration/management/test_workspace_extraction_mutation_flow.py new file mode 100644 index 000000000..4862258ab --- /dev/null +++ b/src/api/tests/integration/management/test_workspace_extraction_mutation_flow.py @@ -0,0 +1,176 @@ +"""Integration test for workspace transition and mutation-log run visibility.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +import pytest +from sqlalchemy import text + +from management.application.services.data_source_service import DataSourceService +from management.application.services.knowledge_graph_service import KnowledgeGraphService +from management.domain.aggregates import KnowledgeGraph +from management.domain.entities.data_source_sync_run import MutationLogRunMetadata +from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig +from management.presentation.data_sources.models import SyncRunResponse +from shared_kernel.datasource_types import DataSourceAdapterType +from tests.fakes.authorization import InMemoryAuthorizationProvider + +pytestmark = pytest.mark.integration + + +@pytest.mark.asyncio +async def test_workspace_transition_then_extraction_run_metadata_visibility( + async_session, + clean_management_data: None, + knowledge_graph_repository, + data_source_repository, + data_source_sync_run_repository, + test_tenant: str, + test_workspace: str, +) -> None: + """End-to-end flow: validate/transition workspace and project mutation-run metadata.""" + required_columns = ( + "maintenance_schedule", + "maintenance_run_history", + ) + for column_name in required_columns: + column_check = await async_session.execute( + text( + """ + SELECT 1 + FROM information_schema.columns + WHERE table_name = 'knowledge_graphs' + AND column_name = :column_name + """ + ), + {"column_name": column_name}, + ) + if column_check.scalar_one_or_none() is None: + pytest.skip( + f"knowledge_graphs.{column_name} is missing in local integration database" + ) + + user_id = "user-integration-001" + authz = InMemoryAuthorizationProvider() + + kg_service = KnowledgeGraphService( + session=async_session, + knowledge_graph_repository=knowledge_graph_repository, + data_source_repository=data_source_repository, + sync_run_repository=data_source_sync_run_repository, + secret_store=None, + authz=authz, + scope_to_tenant=test_tenant, + ) + ds_service = DataSourceService( + session=async_session, + data_source_repository=data_source_repository, + knowledge_graph_repository=knowledge_graph_repository, + sync_run_repository=data_source_sync_run_repository, + secret_store=None, + authz=authz, + scope_to_tenant=test_tenant, + ) + + knowledge_graph = KnowledgeGraph.create( + tenant_id=test_tenant, + workspace_id=test_workspace, + name="Integration Flow KG", + description="Workspace transition + extraction run visibility", + created_by=user_id, + ) + knowledge_graph.set_ontology( + OntologyConfig( + node_types=( + NodeTypeDefinition(label="Repository"), + NodeTypeDefinition( + label="SeedNode", + prepopulated=True, + prepopulated_instance_count=1, + ), + ), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("SeedNode",), + ), + ), + ) + ) + async with async_session.begin(): + await knowledge_graph_repository.save(knowledge_graph) + + await authz.write_relationship( + f"knowledge_graph:{knowledge_graph.id.value}", + "admin", + f"user:{user_id}", + ) + + status_before = await kg_service.get_workspace_status( + user_id=user_id, + kg_id=knowledge_graph.id.value, + ) + assert status_before is not None + assert status_before.workspace_mode.value == "schema_bootstrap" + assert status_before.transition_eligible is True + + validated = await kg_service.validate_workspace( + user_id=user_id, + kg_id=knowledge_graph.id.value, + ) + assert validated.transition_eligible is True + assert validated.readiness.blocking_reasons == () + + transitioned = await kg_service.transition_workspace_to_extraction( + user_id=user_id, + kg_id=knowledge_graph.id.value, + ) + assert transitioned.workspace_mode.value == "extraction_operations" + assert transitioned.session_pointers.active_extraction_operations_session_id is not None + + data_source = await ds_service.create( + user_id=user_id, + kg_id=knowledge_graph.id.value, + name="Integration Source", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"repo_url": "https://github.com/example/repo"}, + ) + await authz.write_relationship( + f"data_source:{data_source.id.value}", + "admin", + f"user:{user_id}", + ) + + sync_run = await ds_service.trigger_sync( + user_id=user_id, + ds_id=data_source.id.value, + ) + assert sync_run.status == "pending" + + sync_run.status = "completed" + sync_run.completed_at = datetime.now(UTC) + sync_run.mutation_log_run = MutationLogRunMetadata( + mutation_log_id="mlog-int-001", + knowledge_graph_id=knowledge_graph.id.value, + session_id=transitioned.session_pointers.active_extraction_operations_session_id, + actor_id=user_id, + started_at=sync_run.started_at, + completed_at=sync_run.completed_at, + token_usage_total=2048, + cost_total_usd=1.37, + operation_counts={"create_node": 12, "create_edge": 8}, + ) + async with async_session.begin(): + await data_source_sync_run_repository.save(sync_run) + + runs = await data_source_sync_run_repository.find_by_data_source(data_source.id.value) + assert len(runs) == 1 + projected = SyncRunResponse.from_domain(runs[0]) + + assert projected.mutation_log_id == "mlog-int-001" + assert projected.session_id == transitioned.session_pointers.active_extraction_operations_session_id + assert projected.token_usage_total == 2048 + assert projected.cost_total_usd == pytest.approx(1.37) + assert projected.operation_counts == {"create_node": 12, "create_edge": 8} diff --git a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py index 408d02bb6..0b1e6069b 100644 --- a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py +++ b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py @@ -170,6 +170,24 @@ async def test_passes_baseline_and_credentials_through_payload( assert call["baseline_commit"] == "abc123" assert call["credentials"] == {"token": "secret"} + async def test_prefers_runtime_credentials_over_payload_credentials( + self, + handler: IngestionEventHandler, + ingestion_service: _FakeIngestionService, + ): + """Runtime credentials override payload credentials to avoid payload leakage.""" + payload = _sync_started_payload() + payload["credentials"] = {"token": "payload-token"} + + await handler.handle( + "SyncStarted", + payload, + runtime_credentials={"token": "runtime-token"}, + ) + + call = ingestion_service.calls[0] + assert call["credentials"] == {"token": "runtime-token"} + async def test_emits_job_package_produced_on_success( self, handler: IngestionEventHandler, @@ -245,6 +263,48 @@ async def test_emits_ingestion_failed_on_adapter_error( assert event["payload"]["data_source_id"] == "ds-001" assert "credentials expired" in event["payload"]["error"] + async def test_redacts_secret_material_from_failure_payload( + self, + outbox: _FakeOutboxRepository, + ): + """Failure payload must redact token-shaped credential values.""" + + class _LeakyService(_FakeIngestionService): + async def run( # type: ignore[override] + self, + sync_run_id: str, + data_source_id: str, + knowledge_graph_id: str, + adapter_type: str, + connection_config: dict[str, str], + credentials_path: str | None, + credentials: dict[str, str] | None = None, + baseline_commit: str | None = None, + ) -> JobPackageId: + raise RuntimeError( + "github auth failed for token ghp_1234567890abcdef1234567890abcdef1234" + ) + + handler = IngestionEventHandler( + ingestion_service=_LeakyService(), + outbox=outbox, + ) + payload = _sync_started_payload(sync_run_id="run-redact") + await handler.handle( + "SyncStarted", + payload, + runtime_credentials={ + "token": "ghp_1234567890abcdef1234567890abcdef1234" + }, + ) + + event = outbox.appended[0] + assert event["event_type"] == "IngestionFailed" + assert "ghp_1234567890abcdef1234567890abcdef1234" not in event["payload"][ + "error" + ] + assert "***REDACTED***" in event["payload"]["error"] + async def test_ingestion_failed_aggregate_type( self, failing_service: _FakeIngestionService, diff --git a/src/api/tests/unit/test_sessioned_ingestion_handler.py b/src/api/tests/unit/test_sessioned_ingestion_handler.py index 53817275e..b0fc4e7ec 100644 --- a/src/api/tests/unit/test_sessioned_ingestion_handler.py +++ b/src/api/tests/unit/test_sessioned_ingestion_handler.py @@ -101,7 +101,11 @@ async def test_sessioned_ingestion_handler_prepares_commit_context(): call_payload = ingestion_handler.handle.call_args.args[1] assert call_payload["baseline_commit"] == "baseline123" assert call_payload["tracked_branch_head_commit"] == "head456" - assert call_payload["credentials"] == {"token": "tok"} + assert "credentials" not in call_payload + assert ( + ingestion_handler.handle.call_args.kwargs["runtime_credentials"] + == {"token": "tok"} + ) ds_repo.save.assert_awaited_once() assert data_source.tracked_branch_head_commit == "head456" @@ -169,4 +173,9 @@ async def test_sessioned_ingestion_handler_sets_no_changes_flag_when_heads_match assert call_payload["baseline_commit"] == "baseline123" assert call_payload["tracked_branch_head_commit"] == "baseline123" assert call_payload["no_changes_detected"] is True + assert "credentials" not in call_payload + assert ( + ingestion_handler.handle.call_args.kwargs["runtime_credentials"] + == {"token": "tok"} + ) From cbe44853cace8fb6dc131e06d4035fa5d47e6aac Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 21 May 2026 11:47:48 -0400 Subject: [PATCH 039/153] test-integration fixes --- ...test_workspace_extraction_mutation_flow.py | 41 +++++++++++-------- .../integration/query/test_kg_resource.py | 8 ++-- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/api/tests/integration/management/test_workspace_extraction_mutation_flow.py b/src/api/tests/integration/management/test_workspace_extraction_mutation_flow.py index 4862258ab..dca78f60d 100644 --- a/src/api/tests/integration/management/test_workspace_extraction_mutation_flow.py +++ b/src/api/tests/integration/management/test_workspace_extraction_mutation_flow.py @@ -50,6 +50,9 @@ async def test_workspace_transition_then_extraction_run_metadata_visibility( pytest.skip( f"knowledge_graphs.{column_name} is missing in local integration database" ) + # The column introspection query starts an implicit transaction on the session. + # Reset it before entering explicit transaction scopes below. + await async_session.rollback() user_id = "user-integration-001" authz = InMemoryAuthorizationProvider() @@ -80,25 +83,24 @@ async def test_workspace_transition_then_extraction_run_metadata_visibility( description="Workspace transition + extraction run visibility", created_by=user_id, ) - knowledge_graph.set_ontology( - OntologyConfig( - node_types=( - NodeTypeDefinition(label="Repository"), - NodeTypeDefinition( - label="SeedNode", - prepopulated=True, - prepopulated_instance_count=1, - ), + ontology_config = OntologyConfig( + node_types=( + NodeTypeDefinition(label="Repository"), + NodeTypeDefinition( + label="SeedNode", + prepopulated=True, + prepopulated_instance_count=1, ), - edge_types=( - EdgeTypeDefinition( - label="CONTAINS", - source_labels=("Repository",), - target_labels=("SeedNode",), - ), + ), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("SeedNode",), ), - ) + ), ) + knowledge_graph.set_ontology(ontology_config) async with async_session.begin(): await knowledge_graph_repository.save(knowledge_graph) @@ -107,6 +109,11 @@ async def test_workspace_transition_then_extraction_run_metadata_visibility( "admin", f"user:{user_id}", ) + await kg_service.save_ontology( + user_id=user_id, + kg_id=knowledge_graph.id.value, + config=ontology_config, + ) status_before = await kg_service.get_workspace_status( user_id=user_id, @@ -139,7 +146,7 @@ async def test_workspace_transition_then_extraction_run_metadata_visibility( ) await authz.write_relationship( f"data_source:{data_source.id.value}", - "admin", + "manage", f"user:{user_id}", ) diff --git a/src/api/tests/integration/query/test_kg_resource.py b/src/api/tests/integration/query/test_kg_resource.py index 68367e747..d219751c1 100644 --- a/src/api/tests/integration/query/test_kg_resource.py +++ b/src/api/tests/integration/query/test_kg_resource.py @@ -196,8 +196,8 @@ async def kg1_id( await async_session.execute( text( "INSERT INTO knowledge_graphs " - "(id, tenant_id, workspace_id, name, description, created_at, updated_at) " - "VALUES (:id, :tenant_id, :workspace_id, :name, :desc, NOW(), NOW())" + "(id, tenant_id, workspace_id, name, description, maintenance_run_history, created_at, updated_at) " + "VALUES (:id, :tenant_id, :workspace_id, :name, :desc, '[]'::jsonb, NOW(), NOW())" ), { "id": kg_id, @@ -227,8 +227,8 @@ async def kg2_id( await async_session.execute( text( "INSERT INTO knowledge_graphs " - "(id, tenant_id, workspace_id, name, description, created_at, updated_at) " - "VALUES (:id, :tenant_id, :workspace_id, :name, :desc, NOW(), NOW())" + "(id, tenant_id, workspace_id, name, description, maintenance_run_history, created_at, updated_at) " + "VALUES (:id, :tenant_id, :workspace_id, :name, :desc, '[]'::jsonb, NOW(), NOW())" ), { "id": kg_id, From 984204622136d47f383b5f0c04d788370ad0398d Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 22 May 2026 11:36:52 -0400 Subject: [PATCH 040/153] kg-manage-experience --- specs/ui/experience.spec.md | 59 ++----- specs/ui/kg-manage-experience.spec.md | 237 ++++++++++++++++++++++++++ 2 files changed, 250 insertions(+), 46 deletions(-) create mode 100644 specs/ui/kg-manage-experience.spec.md diff --git a/specs/ui/experience.spec.md b/specs/ui/experience.spec.md index dd1f091d1..f17216337 100644 --- a/specs/ui/experience.spec.md +++ b/specs/ui/experience.spec.md @@ -543,49 +543,16 @@ The system SHALL expose knowledge graph row actions as Manage, Query, and Delete - WHEN navigation completes - THEN the user lands on that knowledge graph's mode-aware workspace page -### Requirement: Bootstrap to Extraction Transition -The system SHALL provide a UI-gated transition from schema bootstrap mode to extraction operations mode. - -#### Scenario: Validate action -- GIVEN a user with `edit` permission on a knowledge graph in bootstrap mode -- WHEN the user clicks Validate -- THEN validation results are displayed in the workspace -- AND transition action remains unavailable until validation passes - -#### Scenario: Go to extraction action -- GIVEN bootstrap validation has passed -- WHEN the user clicks "Go to Extraction/Mutations" -- THEN the UI transitions the knowledge graph into extraction operations mode -- AND a new extraction-mode agent session is started - -### Requirement: Unified Extraction Workspace -The system SHALL present extraction jobs and minor direct edits in one workspace. - -#### Scenario: Conversation-first layout -- GIVEN a user in extraction operations mode -- THEN the conversation panel remains visible as the primary surface -- AND the lower workspace area is tabbed for operational views - -#### Scenario: Clear chat reset -- GIVEN an active extraction conversation session -- WHEN the user clicks Clear chat -- THEN the current chat history is cleared -- AND a new clean session is started for the same user and knowledge graph - -#### Scenario: Tabbed operations area -- GIVEN the extraction workspace -- WHEN the user switches tabs -- THEN extraction-job controls, manual mutation tools, and run/log views are available without leaving the page - -### Requirement: MutationLog Browser -The system SHALL provide a knowledge-graph-scoped MutationLog browser. - -#### Scenario: Scoped listing -- GIVEN the user is viewing a specific knowledge graph -- WHEN the user opens MutationLogs -- THEN only mutation log runs associated with that knowledge graph are listed - -#### Scenario: Run detail panel -- GIVEN a mutation log run is selected -- WHEN details are shown -- THEN the UI displays run summary, per-entry operation previews, token/cost metrics, and operation counts by type +### Requirement: Detailed KG Manage Experience Specification +The system SHALL define detailed KG Manage workspace behavior in a dedicated canonical UX spec to avoid drift. + +#### Scenario: Canonical detailed behavior source +- GIVEN requirements for the graph manage page flow, conversation UX, modes, and step cards +- THEN details are defined in `specs/ui/kg-manage-experience.spec.md` +- AND this file remains the high-level UX umbrella for broader product behavior + +#### Scenario: Cross-spec consistency +- GIVEN updates to KG manage interaction behavior +- WHEN UX requirements are changed +- THEN `specs/ui/kg-manage-experience.spec.md` is updated first +- AND summary references here are kept consistent with it diff --git a/specs/ui/kg-manage-experience.spec.md b/specs/ui/kg-manage-experience.spec.md new file mode 100644 index 000000000..d91d9eb24 --- /dev/null +++ b/specs/ui/kg-manage-experience.spec.md @@ -0,0 +1,237 @@ +# Knowledge Graph Manage Experience + +## Purpose +Define the canonical UX for the Knowledge Graph `Manage` flow in Kartograph, modeled after the proven interaction patterns in k-extract project workspace and design pages. + +This spec is the detailed source of truth for KG management UI behavior. + +## Scope +In scope: +- `Knowledge Graphs -> Manage` entry flow. +- KG workspace layout, step cards, and progress semantics. +- Graph Management conversation-first interaction model. +- Mode switch behavior and lower-panel content contracts. +- Error/loading/empty/forbidden states and keyboard interactions. + +Out of scope: +- Backend domain rules already specified in management/extraction/graph specs. +- Container runtime implementation details. + +## Page Contracts +### Page: KG Manage Workspace Overview +Route: `/knowledge-graphs/{kgId}/manage` + +Primary intent: +- Provide a project-workspace-style control center for the selected graph. +- Help the user decide the next action with minimal navigation overhead. + +Top-level regions: +- Header (graph name/identity, back action) +- `Project workspace` section +- `Suggested next step` callout with one primary CTA +- Step-card grid (`Data Sources`, `Graph Management`, `MutationLogs`, `Maintain`) + +### Page: KG Graph Management +Route: `/knowledge-graphs/{kgId}/manage` (same page surface, `Graph Management` active state) + +Primary intent: +- Keep conversation as the main control surface. +- Support three operation modes without session fragmentation. + +Top-level regions: +- Graph Management mode switcher +- Shared persistent chat box +- Hybrid lower panel: + - left rail: status/artifacts + - right detail panel: mode-specific workspace + +## Requirements + +### Requirement: Manage Entry Navigation +The system SHALL route users from knowledge graph list rows into a graph-scoped manage workspace. + +#### Scenario: Manage route entry +- GIVEN the user is on the Knowledge Graphs list +- WHEN the user clicks `Manage` for a knowledge graph +- THEN navigation lands on `/knowledge-graphs/{kgId}/manage` +- AND the page header includes graph identity and a back action +- AND the selected graph context is available to all step cards without re-selection + +### Requirement: Workspace Shell and Step Cards +The system SHALL provide a project-workspace-style shell with actionable step cards. + +#### Scenario: Step card set +- GIVEN the user opens KG manage workspace +- THEN the step card grid contains exactly: + - `Data Sources` + - `Graph Management` + - `MutationLogs` + - `Maintain` + +#### Scenario: Suggested next step +- GIVEN workspace status and run metadata are available +- WHEN the manage page renders +- THEN a `Suggested next step` callout is shown above the card grid +- AND the callout CTA routes to the corresponding step destination +- AND the CTA label uses action wording (`Open`, `Revisit`, or `Run`) + +#### Scenario: Card status semantics +- GIVEN each step has completion/readiness metadata +- WHEN cards render +- THEN each card displays status tint and label (`ready`, `in_progress`, `needs_attention`, or `blocked`) +- AND each card includes one primary action (`Open` or `Revisit`) +- AND each card includes one line of status detail text suitable for quick scanning + +### Requirement: Data Sources Step Behavior +The system SHALL preserve the established data-source operations experience while keeping graph context. + +#### Scenario: Graph-scoped data source step +- GIVEN the user opens `Data Sources` from KG manage workspace +- THEN the destination is pre-scoped to the selected knowledge graph +- AND existing commit cues, maintenance readiness, and diff summary behaviors remain available +- AND returning to manage workspace preserves the current graph context + +### Requirement: Graph Management Conversation-First Layout +The system SHALL use a single persistent chat panel as the primary control surface. + +#### Scenario: Persistent shared chat +- GIVEN the user is in Graph Management +- WHEN the user changes modes +- THEN chat history remains in the same session scope +- AND the active mode changes assistant skill framing/instructions rather than opening a new chat +- AND the input placeholder/help text updates to reflect the selected mode + +#### Scenario: Top-section controls +- GIVEN the Graph Management page +- THEN the top section includes: + - mode switcher + - clear chat action + - session status indicator + - validation affordance when relevant to mode +- AND these controls are visible without scrolling on desktop layout + +### Requirement: Graph Management Modes +The system SHALL support three operator modes on one page. + +#### Scenario: Supported modes +- GIVEN the mode selector in Graph Management +- THEN available modes are: + - `Initial Schema Design` + - `Extraction Jobs` + - `One-off Mutations` + +#### Scenario: Mode-specific AI behavior +- GIVEN the user selects a mode +- WHEN the assistant responds or suggests next actions +- THEN the assistant uses mode-appropriate skills and guidance +- AND does not lose shared conversational context +- AND assistant suggestions are constrained to the current knowledge graph scope + +### Requirement: Hybrid Lower Panel +The system SHALL provide a hybrid lower panel with shared status/artifact rail and mode-specific detail panel. + +#### Scenario: Shared rail +- GIVEN the Graph Management lower panel +- THEN a persistent rail shows graph-management status/artifact items relevant across modes +- AND each item includes status plus last-updated metadata +- AND rail items support keyboard focus and selection + +#### Scenario: Mode-specific detail panel +- GIVEN a selected mode and selected rail item +- THEN the right-side detail panel renders mode-specific content: + - `Initial Schema Design`: schema artifacts, readiness blockers, validation controls + - `Extraction Jobs`: job setup, execution controls, and job run context + - `One-off Mutations`: mutation authoring controls and submit/preview context +- AND switching modes preserves rail selection when the selected item is valid in the new mode + +#### Scenario: Schema design parity behavior +- GIVEN `Initial Schema Design` mode is active +- THEN the lower panel exposes schema-focused artifact/status content analogous to k-extract design-artifact workflow +- AND the user can inspect and revise schema-related content without leaving Graph Management + +### Requirement: MutationLogs Step Experience +The system SHALL provide graph-scoped mutation run visibility. + +#### Scenario: Graph-scoped mutation run list +- GIVEN the user opens the `MutationLogs` step +- THEN only runs for the selected knowledge graph are listed +- AND list items show status, timestamp, source, and run identifier +- AND the list defaults to newest run first + +#### Scenario: Run detail richness +- GIVEN a selected run +- THEN the detail panel shows run summary, session reference, token/cost metrics, and operation-class counts +- AND supports per-entry operation preview when available +- AND gracefully displays a no-preview state when detailed entries are unavailable + +### Requirement: Maintain Step Experience +The system SHALL provide incremental maintenance entry points from the manage workspace. + +#### Scenario: Maintenance readiness actioning +- GIVEN tracked source changes are detected +- WHEN the user opens `Maintain` +- THEN the UI highlights change readiness and provides the maintenance execution path +- AND relevant diff summary context is available before execution +- AND the user can navigate back to workspace overview without losing step status context + +### Requirement: Session and Reset Behavior +The system SHALL support explicit conversational reset without losing auditability. + +#### Scenario: Clear chat reset +- GIVEN an active graph-management chat +- WHEN the user clicks `Clear chat` +- THEN the current chat thread resets +- AND a new clean session starts for the same user/knowledge-graph scope +- AND historical session records remain available for audit/history views +- AND mode selection remains unchanged after reset + +### Requirement: State and Accessibility Contracts +The system SHALL provide predictable state handling and keyboard affordances. + +#### Scenario: Loading and empty states +- GIVEN initial page load or step data fetch +- THEN each major section shows explicit loading placeholders +- AND empty states provide direct next actions +- AND loading/error state messaging is step-specific (not generic across all steps) + +#### Scenario: Forbidden state +- GIVEN the user lacks required permission for a step action +- WHEN the action is attempted +- THEN the UI shows a clear forbidden state/message +- AND avoids partial, misleading updates +- AND disabled actions explain why access is restricted + +#### Scenario: Keyboard behavior +- GIVEN the chat input is focused +- THEN `Enter` sends and `Shift+Enter` inserts newline +- AND mode switch/step navigation remains keyboard reachable +- AND primary step-card actions can be triggered by keyboard focus + Enter/Space + +## Traceability to UI Surfaces + +Primary surfaces expected to implement this UX: +- `src/dev-ui/app/pages/knowledge-graphs/index.vue` +- `src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue` +- `src/dev-ui/app/components/extraction/SharedConversationPanel.vue` + +Requirement-to-surface mapping: +- Manage Entry Navigation -> `knowledge-graphs/index.vue` +- Workspace Shell and Step Cards -> `knowledge-graphs/[kgId]/manage.vue` +- Graph Management Conversation-First Layout -> `knowledge-graphs/[kgId]/manage.vue`, `SharedConversationPanel.vue` +- Graph Management Modes -> `knowledge-graphs/[kgId]/manage.vue` +- Hybrid Lower Panel -> `knowledge-graphs/[kgId]/manage.vue` +- MutationLogs Step Experience -> `knowledge-graphs/[kgId]/manage.vue` +- Maintain Step Experience -> `knowledge-graphs/[kgId]/manage.vue` (+ data-source operations surface) +- Session and Reset Behavior -> `knowledge-graphs/[kgId]/manage.vue`, `SharedConversationPanel.vue` +- State and Accessibility Contracts -> `knowledge-graphs/[kgId]/manage.vue`, `SharedConversationPanel.vue` + +## Issue Mapping +Detailed implementation tracking for this spec has been externalized to GitHub issues: +- `#722` workspace overview parity +- `#723` graph-management parity (shared chat + mode switch + hybrid panel) +- `#724` mutationlogs step hardening +- `#725` accessibility and state contracts + +## Notes for Issue Alignment +- In-place unified operations parity is tracked by GitHub issue `#720`. +- Per-run operation preview depth is tracked by GitHub issue `#721`. From 200d1b4977c4701d32d078b37ae6e5404d1f1ce0 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Fri, 22 May 2026 12:19:35 -0400 Subject: [PATCH 041/153] feat(dev-ui): add KG manage workspace overview parity (#722) (#726) Introduce project-workspace step cards, suggested next-step CTA, and graph-scoped navigation for Data Sources and Maintain while preserving existing graph-management flows. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/dev-ui/app/pages/data-sources/index.vue | 51 ++- .../pages/knowledge-graphs/[kgId]/manage.vue | 265 ++++++++++++++- .../knowledge-graph-manage-workspace.test.ts | 192 +++++++++++ src/dev-ui/app/utils/kgManageWorkspace.ts | 319 ++++++++++++++++++ 4 files changed, 815 insertions(+), 12 deletions(-) create mode 100644 src/dev-ui/app/utils/kgManageWorkspace.ts diff --git a/src/dev-ui/app/pages/data-sources/index.vue b/src/dev-ui/app/pages/data-sources/index.vue index e38781d89..1ab0fa470 100644 --- a/src/dev-ui/app/pages/data-sources/index.vue +++ b/src/dev-ui/app/pages/data-sources/index.vue @@ -638,6 +638,8 @@ async function approveOntology() { const dataSources = ref<DataSourceItem[]>([]) const loadingDataSources = ref(false) +const scopedKnowledgeGraphId = ref('') +const manageReturnKgId = ref('') const expandedDiffLists = ref<Record<string, boolean>>({}) const refreshingCommitRefs = ref<Record<string, boolean>>({}) const adoptingBaselines = ref<Record<string, boolean>>({}) @@ -647,6 +649,19 @@ function isMaintenanceReady(ds: DataSourceItem): boolean { return ds.last_extraction_baseline_commit !== ds.tracked_branch_head_commit } +const visibleDataSources = computed(() => { + if (!scopedKnowledgeGraphId.value) return dataSources.value + return dataSources.value.filter( + (ds) => ds.knowledge_graph_id === scopedKnowledgeGraphId.value, + ) +}) + +const manageWorkspaceReturnUrl = computed(() => + manageReturnKgId.value + ? `/knowledge-graphs/${manageReturnKgId.value}/manage` + : '', +) + function isDiffExpanded(dsId: string): boolean { return expandedDiffLists.value[dsId] === true } @@ -958,11 +973,24 @@ onMounted(async () => { // When the user clicks "Add Data Source" from the post-KG-creation toast on // /knowledge-graphs, they are sent to /data-sources?kg_id=<new-kg-id>. Reading // this param here ensures the wizard opens immediately with the right KG chosen. + // Manage workspace navigation contract: ?kg_id=<id>&from=manage preserves graph scope + // without auto-opening the creation wizard (see buildDataSourcesStepUrl). const preselectedKgId = route.query.kg_id as string | undefined - if (preselectedKgId) { + const fromManage = route.query.from === 'manage' + const focusMaintain = route.query.focus === 'maintain' + + if (fromManage && preselectedKgId) { + scopedKnowledgeGraphId.value = preselectedKgId + manageReturnKgId.value = preselectedKgId + selectedMaintenanceKnowledgeGraphId.value = preselectedKgId + } else if (preselectedKgId) { await nextTick() openWizard(preselectedKgId) } + + if (focusMaintain && preselectedKgId) { + selectedMaintenanceKnowledgeGraphId.value = preselectedKgId + } }) onUnmounted(() => { @@ -1312,10 +1340,19 @@ async function handleDeleteDs() { </p> </div> </div> - <Button :disabled="!hasTenant" @click="openWizard"> - <Plus class="mr-2 size-4" /> - Add Data Source - </Button> + <div class="flex items-center gap-2"> + <Button + v-if="manageWorkspaceReturnUrl" + variant="outline" + @click="navigateTo(manageWorkspaceReturnUrl)" + > + Back to workspace overview + </Button> + <Button :disabled="!hasTenant" @click="openWizard(scopedKnowledgeGraphId || undefined)"> + <Plus class="mr-2 size-4" /> + Add Data Source + </Button> + </div> </div> <Separator /> @@ -1505,7 +1542,7 @@ async function handleDeleteDs() { </Card> <!-- Empty state (no data sources yet) --> - <div v-if="dataSources.length === 0" class="flex flex-col items-center gap-4 py-16 text-center"> + <div v-if="visibleDataSources.length === 0" class="flex flex-col items-center gap-4 py-16 text-center"> <div class="rounded-full bg-muted p-5"> <Cable class="size-10 text-muted-foreground" /> </div> @@ -1525,7 +1562,7 @@ async function handleDeleteDs() { <!-- Data source list (shown when sources exist) --> <div v-else class="space-y-3"> - <div v-for="ds in dataSources" :key="ds.id" class="rounded-lg border bg-card"> + <div v-for="ds in visibleDataSources" :key="ds.id" class="rounded-lg border bg-card"> <div class="flex items-center justify-between p-4"> <div class="flex items-center gap-3"> <div class="rounded-md bg-muted p-2"> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 27119a4b7..21eaf07e4 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -8,6 +8,17 @@ import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/com import { Separator } from '@/components/ui/separator' import { Tabs, TabsList, TabsTrigger, TabsContent } from '@/components/ui/tabs' import SharedConversationPanel from '@/components/extraction/SharedConversationPanel.vue' +import { + buildDataSourcesStepUrl, + buildMaintainStepUrl, + buildManageStepUrl, + buildSuggestedNextStep, + buildWorkspaceStepCards, + parseManageStepQuery, + resolveStepDestination, + stepStatusTintClass, + type WorkspaceStepId, +} from '@/utils/kgManageWorkspace' interface WorkspaceReadinessStatus { has_minimum_entity_types: boolean @@ -31,9 +42,17 @@ interface WorkspaceStatusResponse { session_pointers: WorkspaceSessionPointers } +interface KnowledgeGraphIdentity { + id: string + name: string + description?: string | null +} + interface DataSourceRef { id: string name: string + last_extraction_baseline_commit?: string | null + tracked_branch_head_commit?: string | null } interface MutationLogRunView { @@ -64,6 +83,9 @@ const { hasTenant, tenantVersion } = useTenant() const { extractErrorMessage } = useErrorHandler() const { apiFetch } = useApiClient() const kgId = computed(() => String(route.params.kgId ?? '')) +const kgIdentity = ref<KnowledgeGraphIdentity | null>(null) +const dataSourceCount = ref(0) +const maintenanceReadyCount = ref(0) const loading = ref(false) const validating = ref(false) const transitioning = ref(false) @@ -77,6 +99,24 @@ const mutationLogLoading = ref(false) const mutationLogRuns = ref<MutationLogRunView[]>([]) const selectedMutationLogRunId = ref<string | null>(null) +const activeStep = computed(() => parseManageStepQuery(route.query.step)) +const showOverview = computed(() => activeStep.value === null) + +const workspaceOverviewInput = computed(() => ({ + kgId: kgId.value, + dataSourceCount: dataSourceCount.value, + maintenanceReadyCount: maintenanceReadyCount.value, + mutationLogRunCount: mutationLogRuns.value.length, + workspaceStatus: statusProjection.value, +})) + +const workspaceStepCards = computed(() => buildWorkspaceStepCards(workspaceOverviewInput.value)) +const suggestedNextStep = computed(() => buildSuggestedNextStep(workspaceOverviewInput.value)) + +const graphHeaderTitle = computed(() => + kgIdentity.value?.name ?? 'Knowledge Graph Manage Workspace', +) + const modeLabel = computed(() => statusProjection.value?.workspace_mode === 'extraction_operations' ? 'Extraction Operations' @@ -151,6 +191,45 @@ const sessionActivityLines = computed(() => { return candidate.filter((line): line is string => typeof line === 'string' && line.trim().length > 0) }) +async function loadKgIdentity() { + if (!hasTenant.value || !kgId.value) return + try { + kgIdentity.value = await apiFetch<KnowledgeGraphIdentity>( + `/management/knowledge-graphs/${kgId.value}`, + ) + } catch (err) { + kgIdentity.value = { id: kgId.value, name: kgId.value } + toast.error('Failed to load knowledge graph identity', { + description: extractErrorMessage(err), + }) + } +} + +async function loadOverviewMetrics() { + if (!hasTenant.value || !kgId.value) return + try { + const dataSources = await apiFetch<DataSourceRef[]>( + `/management/knowledge-graphs/${kgId.value}/data-sources`, + ) + dataSourceCount.value = dataSources.length + maintenanceReadyCount.value = dataSources.filter((ds) => { + if (!ds.last_extraction_baseline_commit || !ds.tracked_branch_head_commit) return false + return ds.last_extraction_baseline_commit !== ds.tracked_branch_head_commit + }).length + } catch { + dataSourceCount.value = 0 + maintenanceReadyCount.value = 0 + } +} + +function openWorkspaceStep(stepId: WorkspaceStepId) { + navigateTo(resolveStepDestination(kgId.value, stepId)) +} + +function returnToWorkspaceOverview() { + navigateTo(buildManageStepUrl(kgId.value)) +} + async function loadWorkspaceStatus() { if (!hasTenant.value || !kgId.value) return loading.value = true @@ -270,6 +349,7 @@ async function transitionToExtraction() { } async function clearChat() { + // Clear chat resets the active extraction session for this knowledge graph. if (!kgId.value) return clearingChat.value = true try { @@ -288,14 +368,21 @@ async function clearChat() { } onMounted(() => { + loadKgIdentity() loadWorkspaceStatus() + loadOverviewMetrics() loadMutationLogRuns() }) watch(tenantVersion, () => { + kgIdentity.value = null statusProjection.value = null extractionSession.value = null + dataSourceCount.value = 0 + maintenanceReadyCount.value = 0 + loadKgIdentity() loadWorkspaceStatus() + loadOverviewMetrics() loadMutationLogRuns() }) @@ -314,16 +401,25 @@ watch( <div class="flex items-center justify-between"> <div class="space-y-1"> <div class="flex items-center gap-2"> - <h1 class="text-2xl font-semibold tracking-tight">Knowledge Graph Manage Workspace</h1> - <Badge variant="secondary">{{ modeLabel }}</Badge> + <h1 class="text-2xl font-semibold tracking-tight">{{ graphHeaderTitle }}</h1> + <Badge v-if="!showOverview" variant="secondary">{{ modeLabel }}</Badge> </div> <p class="text-sm text-muted-foreground"> - Validate readiness and move from schema bootstrap to extraction operations. + <template v-if="showOverview"> + Project workspace for knowledge graph {{ kgId }}. + </template> + <template v-else> + Validate readiness and move from schema bootstrap to extraction operations. + </template> </p> </div> - <Button variant="outline" size="sm" @click="navigateTo('/knowledge-graphs')"> + <Button + variant="outline" + size="sm" + @click="showOverview ? navigateTo('/knowledge-graphs') : returnToWorkspaceOverview()" + > <ArrowLeft class="mr-1.5 size-3.5" /> - Back to Knowledge Graphs + {{ showOverview ? 'Back to Knowledge Graphs' : 'Back to workspace overview' }} </Button> </div> @@ -339,6 +435,164 @@ watch( </div> <template v-else-if="statusProjection"> + <section v-if="showOverview" class="space-y-6"> + <div> + <h2 class="text-lg font-semibold tracking-tight">Project workspace</h2> + <p class="text-sm text-muted-foreground"> + Choose a step to continue work on this knowledge graph without re-selecting context. + </p> + </div> + + <Card class="border-primary/30 bg-primary/5"> + <CardHeader class="pb-3"> + <CardTitle class="text-base">Suggested next step</CardTitle> + <CardDescription>{{ suggestedNextStep.description }}</CardDescription> + </CardHeader> + <CardContent> + <Button @click="openWorkspaceStep(suggestedNextStep.stepId)"> + {{ suggestedNextStep.actionLabel }} {{ suggestedNextStep.title }} + </Button> + </CardContent> + </Card> + + <div class="grid gap-4 md:grid-cols-2 xl:grid-cols-4"> + <!-- Step cards: Data Sources, Graph Management, MutationLogs, Maintain --> + <Card + v-for="card in workspaceStepCards" + :key="card.id" + class="flex flex-col" + :class="stepStatusTintClass(card.status)" + > + <CardHeader class="pb-3"> + <div class="flex items-center justify-between gap-2"> + <CardTitle class="text-base">{{ card.title }}</CardTitle> + <Badge variant="outline">{{ card.status }}</Badge> + </div> + <CardDescription>{{ card.statusDetail }}</CardDescription> + </CardHeader> + <CardContent class="mt-auto"> + <Button + class="w-full" + variant="outline" + @click="openWorkspaceStep(card.id)" + > + {{ card.actionLabel }} + </Button> + </CardContent> + </Card> + </div> + </section> + + <section v-else-if="activeStep === 'mutation-logs'" class="space-y-4"> + <Card> + <CardHeader> + <CardTitle class="text-base">MutationLogs</CardTitle> + <CardDescription> + Knowledge-graph scoped mutation runs with per-entry operation previews and run metrics. + </CardDescription> + </CardHeader> + <CardContent class="grid gap-3 xl:grid-cols-[280px_1fr]"> + <div class="rounded border"> + <div class="flex items-center justify-between border-b px-3 py-2"> + <p class="text-xs font-medium text-muted-foreground">Runs</p> + <Button size="sm" variant="ghost" class="h-6 px-2 text-[10px]" @click="loadMutationLogRuns"> + Refresh + </Button> + </div> + <div v-if="mutationLogLoading" class="flex items-center gap-2 px-3 py-4 text-xs text-muted-foreground"> + <Loader2 class="size-3.5 animate-spin" /> + Loading mutation runs... + </div> + <div v-else-if="mutationLogRuns.length === 0" class="px-3 py-4 text-xs text-muted-foreground"> + No mutation log runs found for this knowledge graph yet. + </div> + <div v-else class="max-h-64 overflow-auto p-2 space-y-1.5"> + <button + v-for="run in mutationLogRuns" + :key="run.id" + class="w-full rounded border px-2 py-1.5 text-left text-xs transition-colors" + :class="selectedMutationLogRunId === run.id ? 'border-primary bg-primary/5' : 'hover:bg-muted/40'" + @click="selectedMutationLogRunId = run.id" + > + <p class="font-medium truncate">{{ run.data_source_name }}</p> + <p class="text-muted-foreground truncate">{{ new Date(run.started_at).toLocaleString() }}</p> + <div class="mt-1 flex items-center justify-between"> + <Badge variant="outline" class="text-[10px]">{{ run.status }}</Badge> + <span class="font-mono text-[10px] text-muted-foreground">{{ run.mutation_log_id }}</span> + </div> + </button> + </div> + </div> + + <div v-if="selectedMutationLogRun" class="space-y-3 rounded border p-3"> + <div class="flex flex-wrap items-center gap-2"> + <Badge>{{ selectedMutationLogRun.status }}</Badge> + <p class="text-xs text-muted-foreground"> + Data source: + <span class="font-medium text-foreground">{{ selectedMutationLogRun.data_source_name }}</span> + </p> + </div> + <div class="grid gap-2 sm:grid-cols-2"> + <div class="rounded border px-3 py-2 text-xs"> + <p class="text-muted-foreground">MutationLog</p> + <p class="mt-1 font-mono break-all">{{ selectedMutationLogRun.mutation_log_id }}</p> + </div> + <div class="rounded border px-3 py-2 text-xs"> + <p class="text-muted-foreground">Session</p> + <p class="mt-1 font-mono break-all">{{ selectedMutationLogRun.session_id ?? 'None' }}</p> + </div> + <div class="rounded border px-3 py-2 text-xs"> + <p class="text-muted-foreground">Started</p> + <p class="mt-1">{{ new Date(selectedMutationLogRun.started_at).toLocaleString() }}</p> + </div> + <div class="rounded border px-3 py-2 text-xs"> + <p class="text-muted-foreground">Completed</p> + <p class="mt-1"> + {{ selectedMutationLogRun.completed_at ? new Date(selectedMutationLogRun.completed_at).toLocaleString() : 'In progress' }} + </p> + </div> + </div> + <div class="grid gap-2 sm:grid-cols-2"> + <div class="rounded border px-3 py-2 text-xs"> + <p class="text-muted-foreground flex items-center gap-1.5"> + <Coins class="size-3.5" /> + Token usage + </p> + <p class="mt-1 font-medium">{{ (selectedMutationLogRun.token_usage_total ?? 0).toLocaleString() }}</p> + </div> + <div class="rounded border px-3 py-2 text-xs"> + <p class="text-muted-foreground flex items-center gap-1.5"> + <DollarSign class="size-3.5" /> + Cost (USD) + </p> + <p class="mt-1 font-medium">${{ (selectedMutationLogRun.cost_total_usd ?? 0).toFixed(2) }}</p> + </div> + </div> + <div class="rounded border p-3"> + <p class="mb-2 text-xs font-medium text-muted-foreground">Per-entry operation previews</p> + <div v-if="Object.keys(selectedMutationLogRun.operation_counts).length === 0" class="text-xs text-muted-foreground"> + No operation class counts recorded for this run. + </div> + <div v-else class="space-y-1.5"> + <div + v-for="([opClass, count]) in Object.entries(selectedMutationLogRun.operation_counts)" + :key="opClass" + class="flex items-center justify-between rounded border px-2 py-1.5 text-xs" + > + <span class="font-mono">{{ opClass }}</span> + <Badge variant="secondary">{{ count }}</Badge> + </div> + </div> + </div> + </div> + <div v-else class="rounded border border-dashed p-6 text-sm text-muted-foreground"> + Select a mutation run to view summary and per-entry previews. + </div> + </CardContent> + </Card> + </section> + + <section v-else class="space-y-6"> <Card> <CardHeader> <CardTitle class="text-base">Mode & Transition Controls</CardTitle> @@ -657,6 +911,7 @@ watch( </CardContent> </Card> </div> + </section> </template> </div> </template> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 7a850813b..65555968a 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -1,16 +1,47 @@ import { describe, it, expect } from 'vitest' import { readFileSync } from 'fs' import { resolve } from 'path' +import { + WORKSPACE_STEP_ORDER, + WORKSPACE_STEP_TITLES, + buildDataSourcesStepUrl, + buildMaintainStepUrl, + buildManageStepUrl, + buildSuggestedNextStep, + buildWorkspaceStepCards, + isMaintenanceReady, + resolveStepDestination, + stepStatusTintClass, +} from '../utils/kgManageWorkspace' const manageWorkspaceVue = readFileSync( resolve(__dirname, '../pages/knowledge-graphs/[kgId]/manage.vue'), 'utf-8', ) +const kgIndexVue = readFileSync( + resolve(__dirname, '../pages/knowledge-graphs/index.vue'), + 'utf-8', +) +const dataSourcesVue = readFileSync( + resolve(__dirname, '../pages/data-sources/index.vue'), + 'utf-8', +) const sharedConversationPanelVue = readFileSync( resolve(__dirname, '../components/extraction/SharedConversationPanel.vue'), 'utf-8', ) +const baseWorkspaceStatus = { + workspace_mode: 'schema_bootstrap' as const, + transition_eligible: false, + readiness: { + has_minimum_entity_types: false, + has_minimum_relationship_types: false, + prepopulated_types_ready: false, + blocking_reasons: ['Missing entity types'], + }, +} + describe('Knowledge Graph Manage Workspace - mode-aware controls', () => { it('loads workspace status projection from management API', () => { expect(manageWorkspaceVue).toContain('/workspace-status') @@ -109,6 +140,167 @@ describe('Knowledge Graph Manage Workspace - bootstrap readiness guidance', () = }) }) +describe('KG-MANAGE-001 - manage entry navigation', () => { + it('routes Manage action to graph-scoped manage workspace', () => { + expect(kgIndexVue).toContain('navigateTo(`/knowledge-graphs/${kg.id}/manage`)') + }) + + it('loads graph identity for manage header and back action', () => { + expect(manageWorkspaceVue).toContain('/management/knowledge-graphs/${kgId.value}') + expect(manageWorkspaceVue).toContain('loadKgIdentity') + expect(manageWorkspaceVue).toContain('Back to Knowledge Graphs') + }) +}) + +describe('KG-MANAGE-002 - workspace step card set', () => { + it('renders Project workspace section with exactly four step cards', () => { + expect(manageWorkspaceVue).toContain('Project workspace') + expect(manageWorkspaceVue).toContain('workspaceStepCards') + for (const stepId of WORKSPACE_STEP_ORDER) { + expect(manageWorkspaceVue).toContain(WORKSPACE_STEP_TITLES[stepId]) + } + }) + + it('buildWorkspaceStepCards returns the canonical four-card set', () => { + const cards = buildWorkspaceStepCards({ + kgId: 'kg-1', + dataSourceCount: 1, + maintenanceReadyCount: 0, + mutationLogRunCount: 0, + workspaceStatus: baseWorkspaceStatus, + }) + + expect(cards.map((card) => card.title)).toEqual([ + 'Data Sources', + 'Graph Management', + 'MutationLogs', + 'Maintain', + ]) + }) +}) + +describe('KG-MANAGE-003 - suggested next step callout', () => { + it('renders Suggested next step callout above the card grid', () => { + expect(manageWorkspaceVue).toContain('Suggested next step') + expect(manageWorkspaceVue).toContain('suggestedNextStep') + expect(manageWorkspaceVue).toContain('openWorkspaceStep') + }) + + it('prioritizes data sources when no sources are connected', () => { + const next = buildSuggestedNextStep({ + kgId: 'kg-1', + dataSourceCount: 0, + maintenanceReadyCount: 0, + mutationLogRunCount: 0, + workspaceStatus: baseWorkspaceStatus, + }) + + expect(next.stepId).toBe('data-sources') + expect(next.actionLabel).toBe('Open') + }) + + it('uses Run action when maintenance is ready', () => { + const next = buildSuggestedNextStep({ + kgId: 'kg-1', + dataSourceCount: 2, + maintenanceReadyCount: 1, + mutationLogRunCount: 3, + workspaceStatus: { + workspace_mode: 'extraction_operations', + transition_eligible: true, + readiness: { + has_minimum_entity_types: true, + has_minimum_relationship_types: true, + prepopulated_types_ready: true, + blocking_reasons: [], + }, + }, + }) + + expect(next.stepId).toBe('maintain') + expect(next.actionLabel).toBe('Run') + }) +}) + +describe('KG-MANAGE-004 - step card status semantics', () => { + it('renders status label, tint, detail text, and primary action per card', () => { + expect(manageWorkspaceVue).toContain('stepStatusTintClass') + expect(manageWorkspaceVue).toContain('card.status') + expect(manageWorkspaceVue).toContain('card.statusDetail') + expect(manageWorkspaceVue).toContain('card.actionLabel') + }) + + it('maps each status label to a tint class', () => { + expect(stepStatusTintClass('ready')).toContain('emerald') + expect(stepStatusTintClass('in_progress')).toContain('blue') + expect(stepStatusTintClass('needs_attention')).toContain('amber') + expect(stepStatusTintClass('blocked')).toContain('destructive') + }) + + it('uses Open, Revisit, or Run action labels on cards', () => { + const cards = buildWorkspaceStepCards({ + kgId: 'kg-1', + dataSourceCount: 2, + maintenanceReadyCount: 1, + mutationLogRunCount: 4, + workspaceStatus: { + workspace_mode: 'extraction_operations', + transition_eligible: true, + readiness: { + has_minimum_entity_types: true, + has_minimum_relationship_types: true, + prepopulated_types_ready: true, + blocking_reasons: [], + }, + }, + }) + + expect(cards.every((card) => ['Open', 'Revisit', 'Run'].includes(card.actionLabel))).toBe(true) + expect(cards.find((card) => card.id === 'maintain')?.actionLabel).toBe('Run') + }) +}) + +describe('KG-MANAGE-005 - graph-scoped data sources step', () => { + it('routes Data Sources step with kg_id and manage return context', () => { + expect(manageWorkspaceVue).toContain('buildDataSourcesStepUrl') + expect(buildDataSourcesStepUrl('kg-abc')).toBe('/data-sources?kg_id=kg-abc&from=manage') + }) + + it('data-sources page preserves manage return path without auto-opening wizard', () => { + expect(dataSourcesVue).toContain('from=manage') + expect(dataSourcesVue).toContain('scopedKnowledgeGraphId') + expect(dataSourcesVue).toContain('Back to workspace overview') + }) +}) + +describe('KG-MANAGE-015 - graph-scoped maintain step and round trip', () => { + it('routes Maintain step with graph scope and maintenance focus', () => { + expect(manageWorkspaceVue).toContain('buildMaintainStepUrl') + expect(buildMaintainStepUrl('kg-abc')).toBe( + '/data-sources?kg_id=kg-abc&from=manage&focus=maintain', + ) + }) + + it('returns to manage overview from in-page steps', () => { + expect(manageWorkspaceVue).toContain('returnToWorkspaceOverview') + expect(buildManageStepUrl('kg-abc')).toBe('/knowledge-graphs/kg-abc/manage') + expect(resolveStepDestination('kg-abc', 'graph-management')).toBe( + '/knowledge-graphs/kg-abc/manage?step=graph-management', + ) + }) + + it('detects maintenance readiness from commit diff semantics', () => { + expect(isMaintenanceReady({ + last_extraction_baseline_commit: 'abc', + tracked_branch_head_commit: 'def', + })).toBe(true) + expect(isMaintenanceReady({ + last_extraction_baseline_commit: 'abc', + tracked_branch_head_commit: 'abc', + })).toBe(false) + }) +}) + describe('Shared conversation panel - extraction UX contract', () => { it('renders resume-session action and explicit server-side persistence note', () => { expect(sharedConversationPanelVue).toContain('Resume session') diff --git a/src/dev-ui/app/utils/kgManageWorkspace.ts b/src/dev-ui/app/utils/kgManageWorkspace.ts new file mode 100644 index 000000000..7bec05d4c --- /dev/null +++ b/src/dev-ui/app/utils/kgManageWorkspace.ts @@ -0,0 +1,319 @@ +export type WorkspaceStepId = 'data-sources' | 'graph-management' | 'mutation-logs' | 'maintain' + +export type StepStatusLabel = 'ready' | 'in_progress' | 'needs_attention' | 'blocked' + +export type StepActionLabel = 'Open' | 'Revisit' | 'Run' + +export const WORKSPACE_STEP_TITLES: Record<WorkspaceStepId, string> = { + 'data-sources': 'Data Sources', + 'graph-management': 'Graph Management', + 'mutation-logs': 'MutationLogs', + maintain: 'Maintain', +} + +export const WORKSPACE_STEP_ORDER: WorkspaceStepId[] = [ + 'data-sources', + 'graph-management', + 'mutation-logs', + 'maintain', +] + +export interface WorkspaceReadinessSnapshot { + has_minimum_entity_types: boolean + has_minimum_relationship_types: boolean + prepopulated_types_ready: boolean + blocking_reasons: string[] +} + +export interface WorkspaceStatusSnapshot { + workspace_mode: 'schema_bootstrap' | 'extraction_operations' + transition_eligible: boolean + readiness: WorkspaceReadinessSnapshot +} + +export interface WorkspaceOverviewInputs { + kgId: string + dataSourceCount: number + maintenanceReadyCount: number + mutationLogRunCount: number + workspaceStatus: WorkspaceStatusSnapshot | null +} + +export interface WorkspaceStepCardView { + id: WorkspaceStepId + title: string + status: StepStatusLabel + statusDetail: string + actionLabel: StepActionLabel +} + +export interface SuggestedNextStepView { + stepId: WorkspaceStepId + title: string + description: string + actionLabel: StepActionLabel +} + +export function isMaintenanceReady(ds: { + last_extraction_baseline_commit?: string | null + tracked_branch_head_commit?: string | null +}): boolean { + if (!ds.last_extraction_baseline_commit || !ds.tracked_branch_head_commit) return false + return ds.last_extraction_baseline_commit !== ds.tracked_branch_head_commit +} + +export function buildDataSourcesStepUrl(kgId: string): string { + return `/data-sources?kg_id=${encodeURIComponent(kgId)}&from=manage` +} + +export function buildMaintainStepUrl(kgId: string): string { + return `/data-sources?kg_id=${encodeURIComponent(kgId)}&from=manage&focus=maintain` +} + +export function buildManageStepUrl(kgId: string, step?: WorkspaceStepId): string { + if (!step) { + return `/knowledge-graphs/${encodeURIComponent(kgId)}/manage` + } + return `/knowledge-graphs/${encodeURIComponent(kgId)}/manage?step=${step}` +} + +export function parseManageStepQuery(step: unknown): WorkspaceStepId | null { + if (step === 'graph-management' || step === 'mutation-logs') { + return step + } + return null +} + +export function stepStatusTintClass(status: StepStatusLabel): string { + switch (status) { + case 'ready': + return 'border-emerald-500/40 bg-emerald-50/30 dark:bg-emerald-950/20' + case 'in_progress': + return 'border-blue-500/40 bg-blue-50/30 dark:bg-blue-950/20' + case 'needs_attention': + return 'border-amber-500/40 bg-amber-50/30 dark:bg-amber-950/20' + case 'blocked': + return 'border-destructive/50 bg-destructive/5' + } +} + +function buildDataSourcesCard(input: WorkspaceOverviewInputs): WorkspaceStepCardView { + if (input.dataSourceCount === 0) { + return { + id: 'data-sources', + title: WORKSPACE_STEP_TITLES['data-sources'], + status: 'needs_attention', + statusDetail: 'No data sources connected yet.', + actionLabel: 'Open', + } + } + + return { + id: 'data-sources', + title: WORKSPACE_STEP_TITLES['data-sources'], + status: 'ready', + statusDetail: `${input.dataSourceCount} data source${input.dataSourceCount === 1 ? '' : 's'} connected.`, + actionLabel: 'Revisit', + } +} + +function buildGraphManagementCard(input: WorkspaceOverviewInputs): WorkspaceStepCardView { + const status = input.workspaceStatus + + if (!status) { + return { + id: 'graph-management', + title: WORKSPACE_STEP_TITLES['graph-management'], + status: 'in_progress', + statusDetail: 'Loading workspace readiness signals.', + actionLabel: 'Open', + } + } + + if (status.workspace_mode === 'schema_bootstrap') { + if (status.readiness.blocking_reasons.length > 0) { + return { + id: 'graph-management', + title: WORKSPACE_STEP_TITLES['graph-management'], + status: 'needs_attention', + statusDetail: `${status.readiness.blocking_reasons.length} blocking reason${status.readiness.blocking_reasons.length === 1 ? '' : 's'} before extraction.`, + actionLabel: 'Open', + } + } + + if (status.transition_eligible) { + return { + id: 'graph-management', + title: WORKSPACE_STEP_TITLES['graph-management'], + status: 'ready', + statusDetail: 'Schema bootstrap is ready to transition to extraction.', + actionLabel: 'Run', + } + } + + return { + id: 'graph-management', + title: WORKSPACE_STEP_TITLES['graph-management'], + status: 'in_progress', + statusDetail: 'Continue schema bootstrap and validation work.', + actionLabel: 'Open', + } + } + + return { + id: 'graph-management', + title: WORKSPACE_STEP_TITLES['graph-management'], + status: 'ready', + statusDetail: 'Extraction operations mode is active.', + actionLabel: 'Revisit', + } +} + +function buildMutationLogsCard(input: WorkspaceOverviewInputs): WorkspaceStepCardView { + if (input.dataSourceCount === 0) { + return { + id: 'mutation-logs', + title: WORKSPACE_STEP_TITLES['mutation-logs'], + status: 'blocked', + statusDetail: 'Connect a data source before reviewing mutation runs.', + actionLabel: 'Open', + } + } + + if (input.mutationLogRunCount === 0) { + return { + id: 'mutation-logs', + title: WORKSPACE_STEP_TITLES['mutation-logs'], + status: input.workspaceStatus?.workspace_mode === 'extraction_operations' + ? 'needs_attention' + : 'ready', + statusDetail: 'No mutation log runs recorded for this graph yet.', + actionLabel: 'Open', + } + } + + return { + id: 'mutation-logs', + title: WORKSPACE_STEP_TITLES['mutation-logs'], + status: 'ready', + statusDetail: `${input.mutationLogRunCount} mutation run${input.mutationLogRunCount === 1 ? '' : 's'} available.`, + actionLabel: 'Revisit', + } +} + +function buildMaintainCard(input: WorkspaceOverviewInputs): WorkspaceStepCardView { + if (input.dataSourceCount === 0) { + return { + id: 'maintain', + title: WORKSPACE_STEP_TITLES.maintain, + status: 'blocked', + statusDetail: 'Add a data source before maintenance can run.', + actionLabel: 'Open', + } + } + + if (input.maintenanceReadyCount > 0) { + return { + id: 'maintain', + title: WORKSPACE_STEP_TITLES.maintain, + status: 'needs_attention', + statusDetail: `${input.maintenanceReadyCount} source${input.maintenanceReadyCount === 1 ? '' : 's'} have new commits ready for maintenance.`, + actionLabel: 'Run', + } + } + + return { + id: 'maintain', + title: WORKSPACE_STEP_TITLES.maintain, + status: 'ready', + statusDetail: 'All tracked sources are up to date.', + actionLabel: 'Revisit', + } +} + +export function buildWorkspaceStepCards(input: WorkspaceOverviewInputs): WorkspaceStepCardView[] { + return [ + buildDataSourcesCard(input), + buildGraphManagementCard(input), + buildMutationLogsCard(input), + buildMaintainCard(input), + ] +} + +export function buildSuggestedNextStep(input: WorkspaceOverviewInputs): SuggestedNextStepView { + const cards = buildWorkspaceStepCards(input) + + if (input.dataSourceCount === 0) { + const card = cards.find((item) => item.id === 'data-sources')! + return { + stepId: 'data-sources', + title: card.title, + description: 'Connect a data source to start schema bootstrap and extraction.', + actionLabel: card.actionLabel, + } + } + + const maintainCard = cards.find((item) => item.id === 'maintain')! + if (maintainCard.status === 'needs_attention' && maintainCard.actionLabel === 'Run') { + return { + stepId: 'maintain', + title: maintainCard.title, + description: maintainCard.statusDetail, + actionLabel: 'Run', + } + } + + const graphCard = cards.find((item) => item.id === 'graph-management')! + if ( + input.workspaceStatus?.workspace_mode === 'schema_bootstrap' + && input.workspaceStatus.transition_eligible + ) { + return { + stepId: 'graph-management', + title: graphCard.title, + description: 'Validate readiness and transition into extraction operations.', + actionLabel: 'Run', + } + } + + if ( + graphCard.status === 'needs_attention' + || graphCard.status === 'in_progress' + ) { + return { + stepId: 'graph-management', + title: graphCard.title, + description: graphCard.statusDetail, + actionLabel: graphCard.actionLabel, + } + } + + const mutationCard = cards.find((item) => item.id === 'mutation-logs')! + if (mutationCard.status === 'needs_attention') { + return { + stepId: 'mutation-logs', + title: mutationCard.title, + description: mutationCard.statusDetail, + actionLabel: mutationCard.actionLabel, + } + } + + return { + stepId: 'graph-management', + title: graphCard.title, + description: graphCard.statusDetail, + actionLabel: 'Revisit', + } +} + +export function resolveStepDestination(kgId: string, stepId: WorkspaceStepId): string { + switch (stepId) { + case 'data-sources': + return buildDataSourcesStepUrl(kgId) + case 'maintain': + return buildMaintainStepUrl(kgId) + case 'graph-management': + case 'mutation-logs': + return buildManageStepUrl(kgId, stepId) + } +} From b5af3b1b9ca934325619b175cf9fd73d3ae26dfc Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Fri, 22 May 2026 12:20:31 -0400 Subject: [PATCH 042/153] feat(extraction): wire runtime credential injection for extraction workers (#730) Issue scoped credentials through the JobPackageProduced handler, enforce launcher scope checks, redact secrets in failure telemetry, and add tests for expiration and least-privilege enforcement. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../infrastructure/event_handler.py | 91 +++++++- src/api/extraction/ports/__init__.py | 2 + src/api/extraction/ports/runtime.py | 10 + src/api/extraction/ports/services.py | 7 +- src/api/main.py | 30 ++- .../test_workload_credential_injection.py | 215 ++++++++++++++++++ .../test_extraction_event_handler.py | 118 +++++++++- .../infrastructure/test_workload_runtime.py | 21 ++ 8 files changed, 487 insertions(+), 7 deletions(-) create mode 100644 src/api/tests/integration/extraction/test_workload_credential_injection.py diff --git a/src/api/extraction/infrastructure/event_handler.py b/src/api/extraction/infrastructure/event_handler.py index 32cceb1ee..a6303d386 100644 --- a/src/api/extraction/infrastructure/event_handler.py +++ b/src/api/extraction/infrastructure/event_handler.py @@ -10,9 +10,16 @@ from __future__ import annotations +import re from datetime import UTC, datetime from typing import TYPE_CHECKING, Any +from extraction.ports.runtime import ( + EphemeralWorkerLaunchRequest, + IEphemeralExtractionWorkerLauncher, + IWorkloadCredentialIssuer, + ScopedWorkloadCredentials, +) from extraction.ports.services import IExtractionService if TYPE_CHECKING: @@ -23,9 +30,10 @@ class ExtractionEventHandler: """Handles JobPackageProduced events by running the extraction pipeline. When a JobPackageProduced event is processed from the outbox, this handler: - 1. Delegates to IExtractionService.run() to extract entities and relationships - 2. On success: appends MutationLogProduced to the outbox - 3. On failure: appends ExtractionFailed to the outbox + 1. Issues short-lived scoped credentials and launches an ephemeral worker + 2. Delegates to IExtractionService.run() to extract entities and relationships + 3. On success: appends MutationLogProduced to the outbox + 4. On failure: appends ExtractionFailed to the outbox This handler is the entry point for the Extraction bounded context in the sync lifecycle. It creates the linkage between the Ingestion context @@ -41,6 +49,9 @@ def __init__( extraction_service: IExtractionService, outbox: "IOutboxRepository", runtime_context_builder: Any, + *, + credential_issuer: IWorkloadCredentialIssuer | None = None, + worker_launcher: IEphemeralExtractionWorkerLauncher | None = None, ) -> None: """Initialize the extraction event handler. @@ -48,19 +59,58 @@ def __init__( extraction_service: Service that runs the AI extraction pipeline outbox: Repository for writing output events (MutationLogProduced / ExtractionFailed) + runtime_context_builder: Resolves runtime paths for the workload + credential_issuer: Optional issuer for runtime-only workload credentials + worker_launcher: Optional launcher that enforces credential scope """ + if (credential_issuer is None) ^ (worker_launcher is None): + raise ValueError( + "credential_issuer and worker_launcher must be configured together" + ) + self._extraction_service = extraction_service self._outbox = outbox self._runtime_context_builder = runtime_context_builder + self._credential_issuer = credential_issuer + self._worker_launcher = worker_launcher def supported_event_types(self) -> frozenset[str]: """Return event types handled by this handler.""" return frozenset({"JobPackageProduced"}) + @staticmethod + def _redact_sensitive_error(message: str) -> str: + """Redact token-like secrets from error strings before persistence.""" + patterns = ( + re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b"), + re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._\-+/=]{16,}\b"), + re.compile( + r"(?i)\b(token|access_token|password|api[_-]?key)\b\s*[:=]\s*['\"]?[^\s,'\"]+" + ), + ) + redacted = message + for pattern in patterns: + redacted = pattern.sub("***REDACTED***", redacted) + return redacted + + @classmethod + def _sanitize_failure_error( + cls, + exc: Exception, + *, + workload_credentials: ScopedWorkloadCredentials | None, + ) -> str: + message = str(exc) + if workload_credentials is not None and workload_credentials.token: + message = message.replace(workload_credentials.token, "***REDACTED***") + return cls._redact_sensitive_error(message) + async def handle( self, event_type: str, payload: dict[str, Any], + *, + tenant_id: str | None = None, ) -> None: """Process a JobPackageProduced event by running the extraction pipeline. @@ -71,6 +121,7 @@ async def handle( - data_source_id: The data source being extracted - knowledge_graph_id: The target knowledge graph - job_package_id: The JobPackage to process + tenant_id: Tenant scope used for runtime credential issuance """ if event_type != "JobPackageProduced": return @@ -81,7 +132,32 @@ async def handle( job_package_id = payload["job_package_id"] now = datetime.now(UTC) + workload_credentials: ScopedWorkloadCredentials | None = None + worker_id: str | None = None + try: + if self._credential_issuer is not None and self._worker_launcher is not None: + if not tenant_id: + raise ValueError( + "tenant_id is required for scoped workload credential injection" + ) + + workload_credentials = self._credential_issuer.issue( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + ) + launch_result = self._worker_launcher.launch( + request=EphemeralWorkerLaunchRequest( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + session_id=f"sync:{sync_run_id}", + sync_run_id=sync_run_id, + job_package_id=job_package_id, + ), + credentials=workload_credentials, + ) + worker_id = launch_result.worker_id + runtime_context = self._runtime_context_builder.build( sync_run_id=sync_run_id, job_package_id=job_package_id, @@ -92,6 +168,7 @@ async def handle( knowledge_graph_id=knowledge_graph_id, job_package_id=job_package_id, runtime_context=runtime_context, + workload_credentials=workload_credentials, ) except Exception as exc: await self._outbox.append( @@ -99,7 +176,10 @@ async def handle( payload={ "sync_run_id": sync_run_id, "data_source_id": data_source_id, - "error": str(exc), + "error": self._sanitize_failure_error( + exc, + workload_credentials=workload_credentials, + ), "occurred_at": now.isoformat(), }, occurred_at=now, @@ -107,6 +187,9 @@ async def handle( aggregate_id=sync_run_id, ) return + finally: + if worker_id is not None and self._worker_launcher is not None: + self._worker_launcher.complete_worker(worker_id) # Extraction succeeded — append success event outside the try block so # that an outbox write failure here is not mistaken for an extraction diff --git a/src/api/extraction/ports/__init__.py b/src/api/extraction/ports/__init__.py index 2e253a7ea..10262ea8e 100644 --- a/src/api/extraction/ports/__init__.py +++ b/src/api/extraction/ports/__init__.py @@ -9,6 +9,7 @@ EphemeralWorkerLaunchResult, IEphemeralExtractionWorkerLauncher, IStickySessionRuntimeManager, + IWorkloadCredentialIssuer, ScopedWorkloadCredentials, StickySessionRuntimeLease, ) @@ -20,6 +21,7 @@ "IExtractionSkillOverrideRepository", "IStickySessionRuntimeManager", "IEphemeralExtractionWorkerLauncher", + "IWorkloadCredentialIssuer", "StickySessionRuntimeLease", "ScopedWorkloadCredentials", "EphemeralWorkerLaunchRequest", diff --git a/src/api/extraction/ports/runtime.py b/src/api/extraction/ports/runtime.py index 624973ebd..b446abeba 100644 --- a/src/api/extraction/ports/runtime.py +++ b/src/api/extraction/ports/runtime.py @@ -50,6 +50,16 @@ class EphemeralWorkerLaunchResult: credentials_expires_at: datetime +class IWorkloadCredentialIssuer(Protocol): + """Issues short-lived credentials scoped to tenant and knowledge graph.""" + + def issue( + self, *, tenant_id: str, knowledge_graph_id: str + ) -> ScopedWorkloadCredentials: + """Return runtime-only credentials for one extraction workload.""" + ... + + class IStickySessionRuntimeManager(Protocol): """Manages sticky chat runtime containers for active sessions.""" diff --git a/src/api/extraction/ports/services.py b/src/api/extraction/ports/services.py index 7c73d4865..851dfd3bc 100644 --- a/src/api/extraction/ports/services.py +++ b/src/api/extraction/ports/services.py @@ -3,7 +3,10 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Protocol +from typing import TYPE_CHECKING, Protocol + +if TYPE_CHECKING: + from extraction.ports.runtime import ScopedWorkloadCredentials @dataclass(frozen=True) @@ -34,6 +37,7 @@ async def run( knowledge_graph_id: str, job_package_id: str, runtime_context: ExtractionRuntimeContext, + workload_credentials: ScopedWorkloadCredentials | None = None, ) -> str: """Run the AI extraction pipeline for a JobPackage. @@ -44,6 +48,7 @@ async def run( job_package_id: Identifier for the JobPackage to process runtime_context: Resolved runtime context paths for ingestion resources, reconstructed repository files, and skills availability. + workload_credentials: Short-lived runtime credentials injected into the worker Returns: mutation_log_id: Identifier for the produced MutationLog (JSONL) diff --git a/src/api/main.py b/src/api/main.py index 1b0236b2f..fba1699f1 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -311,6 +311,7 @@ async def run( knowledge_graph_id: str, job_package_id: str, runtime_context: Any, + workload_credentials: Any = None, ) -> str: raise NotImplementedError( "AI extraction pipeline is not yet implemented. " @@ -336,14 +337,25 @@ def supported_event_types(self) -> frozenset[str]: return self._SUPPORTED async def handle(self, event_type: str, payload: dict[str, Any]) -> None: + from datetime import timedelta + from infrastructure.outbox.repository import OutboxRepository from extraction.infrastructure.event_handler import ExtractionEventHandler from extraction.infrastructure.runtime_context_builder import ( FilesystemExtractionRuntimeContextBuilder, ) + from extraction.infrastructure.workload_runtime import ( + InMemoryEphemeralExtractionWorkerLauncher, + ScopedWorkloadCredentialIssuer, + ) + from management.domain.value_objects import KnowledgeGraphId + from management.infrastructure.repositories.knowledge_graph_repository import ( + KnowledgeGraphRepository, + ) async with self._session_factory() as session: outbox = OutboxRepository(session=session) + kg_repo = KnowledgeGraphRepository(session=session, outbox=outbox) runtime_context_builder = FilesystemExtractionRuntimeContextBuilder( work_dir=_JOB_PACKAGE_WORK_DIR, skills_dir=_EXTRACTION_SKILLS_DIR, @@ -352,8 +364,24 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: extraction_service=self._extraction_service, outbox=outbox, runtime_context_builder=runtime_context_builder, + credential_issuer=ScopedWorkloadCredentialIssuer( + default_ttl=timedelta(minutes=15) + ), + worker_launcher=InMemoryEphemeralExtractionWorkerLauncher(), + ) + + tenant_id = str(payload.get("tenant_id", "")) if payload.get("tenant_id") else "" + knowledge_graph_id = str(payload.get("knowledge_graph_id", "")) + if not tenant_id and knowledge_graph_id: + kg = await kg_repo.get_by_id(KnowledgeGraphId(value=knowledge_graph_id)) + if kg is not None: + tenant_id = kg.tenant_id + + await extraction_handler.handle( + event_type, + payload, + tenant_id=tenant_id or None, ) - await extraction_handler.handle(event_type, payload) await session.commit() diff --git a/src/api/tests/integration/extraction/test_workload_credential_injection.py b/src/api/tests/integration/extraction/test_workload_credential_injection.py new file mode 100644 index 000000000..da85476eb --- /dev/null +++ b/src/api/tests/integration/extraction/test_workload_credential_injection.py @@ -0,0 +1,215 @@ +"""Integration tests for extraction workload credential injection.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from typing import Any +from uuid import UUID + +import pytest + +from extraction.infrastructure.event_handler import ExtractionEventHandler +from extraction.infrastructure.workload_runtime import ( + InMemoryEphemeralExtractionWorkerLauncher, + ScopedWorkloadCredentialIssuer, +) +from extraction.ports.runtime import ScopedWorkloadCredentials +from extraction.ports.services import ExtractionRuntimeContext + +pytestmark = pytest.mark.integration + + +class _RecordingOutbox: + def __init__(self) -> None: + self.appended: list[dict[str, Any]] = [] + + async def append( + self, + event_type: str, + payload: dict[str, Any], + occurred_at: datetime, + aggregate_type: str, + aggregate_id: str, + ) -> None: + self.appended.append( + { + "event_type": event_type, + "payload": payload, + "occurred_at": occurred_at, + "aggregate_type": aggregate_type, + "aggregate_id": aggregate_id, + } + ) + + async def fetch_unprocessed(self, limit: int = 100) -> list[Any]: + return [] + + async def mark_processed(self, entry_id: UUID) -> None: + pass + + +class _RecordingExtractionService: + def __init__(self) -> None: + self.calls: list[dict[str, Any]] = [] + + async def run( + self, + sync_run_id: str, + data_source_id: str, + knowledge_graph_id: str, + job_package_id: str, + runtime_context: ExtractionRuntimeContext, + workload_credentials: ScopedWorkloadCredentials | None = None, + ) -> str: + self.calls.append( + { + "sync_run_id": sync_run_id, + "workload_credentials": workload_credentials, + } + ) + return "mutation-log-integration" + + +class _StaticRuntimeContextBuilder: + def build(self, *, sync_run_id: str, job_package_id: str) -> ExtractionRuntimeContext: + return ExtractionRuntimeContext( + ingestion_context_dir="/tmp/ingestion-context", + repository_files_dir="/tmp/repository-files", + skills_dir="/app/skills", + job_package_archive="/tmp/job-package.zip", + ) + + +def _payload(*, tenant_id: str = "tenant-integration") -> dict[str, Any]: + return { + "sync_run_id": "sync-integration-1", + "data_source_id": "ds-integration-1", + "knowledge_graph_id": "kg-integration-1", + "job_package_id": "pkg-integration-1", + "tenant_id": tenant_id, + "occurred_at": datetime.now(UTC).isoformat(), + } + + +def _handler( + *, + service: _RecordingExtractionService | None = None, + launcher: InMemoryEphemeralExtractionWorkerLauncher | None = None, +) -> tuple[ExtractionEventHandler, _RecordingOutbox, _RecordingExtractionService, InMemoryEphemeralExtractionWorkerLauncher]: + outbox = _RecordingOutbox() + extraction_service = service or _RecordingExtractionService() + worker_launcher = launcher or InMemoryEphemeralExtractionWorkerLauncher() + handler = ExtractionEventHandler( + extraction_service=extraction_service, + outbox=outbox, + runtime_context_builder=_StaticRuntimeContextBuilder(), + credential_issuer=ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)), + worker_launcher=worker_launcher, + ) + return handler, outbox, extraction_service, worker_launcher + + +@pytest.mark.asyncio +async def test_scoped_credentials_are_injected_at_runtime_only() -> None: + handler, outbox, service, launcher = _handler() + + await handler.handle("JobPackageProduced", _payload(), tenant_id="tenant-integration") + + assert len(service.calls) == 1 + credentials = service.calls[0]["workload_credentials"] + assert credentials is not None + assert credentials.scopes == ( + "tenant:tenant-integration", + "knowledge_graph:kg-integration-1", + "workload:extraction", + ) + assert launcher.active_worker_count == 0 + assert len(outbox.appended) == 1 + success = outbox.appended[0] + assert success["event_type"] == "MutationLogProduced" + assert "token" not in success["payload"] + assert credentials.token not in str(success["payload"]) + + +@pytest.mark.asyncio +async def test_rejects_credentials_with_insufficient_scope() -> None: + outbox = _RecordingOutbox() + service = _RecordingExtractionService() + launcher = InMemoryEphemeralExtractionWorkerLauncher() + + class _WrongScopeIssuer: + def issue( + self, *, tenant_id: str, knowledge_graph_id: str + ) -> ScopedWorkloadCredentials: + return ScopedWorkloadCredentials( + token="wrong-scope-token", + expires_at=datetime.now(UTC) + timedelta(minutes=5), + scopes=( + "tenant:tenant-other", + f"knowledge_graph:{knowledge_graph_id}", + "workload:extraction", + ), + ) + + handler = ExtractionEventHandler( + extraction_service=service, + outbox=outbox, + runtime_context_builder=_StaticRuntimeContextBuilder(), + credential_issuer=_WrongScopeIssuer(), + worker_launcher=launcher, + ) + + await handler.handle( + "JobPackageProduced", + _payload(), + tenant_id="tenant-integration", + ) + + assert service.calls == [] + assert len(outbox.appended) == 1 + failure = outbox.appended[0] + assert failure["event_type"] == "ExtractionFailed" + assert "scope" in failure["payload"]["error"].lower() + assert "wrong-scope-token" not in failure["payload"]["error"] + + +@pytest.mark.asyncio +async def test_rejects_expired_credentials() -> None: + outbox = _RecordingOutbox() + service = _RecordingExtractionService() + launcher = InMemoryEphemeralExtractionWorkerLauncher() + + class _ExpiredIssuer: + def issue( + self, *, tenant_id: str, knowledge_graph_id: str + ) -> ScopedWorkloadCredentials: + return ScopedWorkloadCredentials( + token="expired-token-value", + expires_at=datetime.now(UTC) - timedelta(seconds=1), + scopes=( + f"tenant:{tenant_id}", + f"knowledge_graph:{knowledge_graph_id}", + "workload:extraction", + ), + ) + + handler = ExtractionEventHandler( + extraction_service=service, + outbox=outbox, + runtime_context_builder=_StaticRuntimeContextBuilder(), + credential_issuer=_ExpiredIssuer(), + worker_launcher=launcher, + ) + + await handler.handle( + "JobPackageProduced", + _payload(), + tenant_id="tenant-integration", + ) + + assert service.calls == [] + assert len(outbox.appended) == 1 + failure = outbox.appended[0] + assert failure["event_type"] == "ExtractionFailed" + assert "expired" in failure["payload"]["error"].lower() + assert "expired-token-value" not in failure["payload"]["error"] diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py index c04a92096..3b779f5e3 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py @@ -11,13 +11,18 @@ from __future__ import annotations -from datetime import UTC, datetime +from datetime import UTC, datetime, timedelta from typing import Any from uuid import UUID import pytest from extraction.infrastructure.event_handler import ExtractionEventHandler +from extraction.infrastructure.workload_runtime import ( + InMemoryEphemeralExtractionWorkerLauncher, + ScopedWorkloadCredentialIssuer, +) +from extraction.ports.runtime import ScopedWorkloadCredentials from extraction.ports.services import ExtractionRuntimeContext @@ -67,6 +72,7 @@ async def run( knowledge_graph_id: str, job_package_id: str, runtime_context: ExtractionRuntimeContext, + workload_credentials: ScopedWorkloadCredentials | None = None, ) -> str: self.calls.append( { @@ -75,6 +81,7 @@ async def run( "knowledge_graph_id": knowledge_graph_id, "job_package_id": job_package_id, "runtime_context": runtime_context, + "workload_credentials": workload_credentials, } ) if self._fail: @@ -263,6 +270,115 @@ async def test_extraction_failed_aggregate_type( assert event["aggregate_id"] == "run-003" +@pytest.mark.asyncio +class TestExtractionEventHandlerCredentialInjection: + """Tests for runtime credential issuance and worker launch enforcement.""" + + async def test_injects_scoped_credentials_before_extraction( + self, + extraction_service: _FakeExtractionService, + outbox: _FakeOutboxRepository, + ) -> None: + handler = ExtractionEventHandler( + extraction_service=extraction_service, + outbox=outbox, + runtime_context_builder=_FakeRuntimeContextBuilder(), + credential_issuer=ScopedWorkloadCredentialIssuer( + default_ttl=timedelta(minutes=10) + ), + worker_launcher=InMemoryEphemeralExtractionWorkerLauncher(), + ) + payload = _job_package_produced_payload(sync_run_id="run-cred") + + await handler.handle("JobPackageProduced", payload, tenant_id="tenant-1") + + assert len(extraction_service.calls) == 1 + credentials = extraction_service.calls[0]["workload_credentials"] + assert credentials is not None + assert credentials.scopes == ( + "tenant:tenant-1", + "knowledge_graph:kg-001", + "workload:extraction", + ) + + async def test_emits_extraction_failed_when_scope_is_invalid( + self, + outbox: _FakeOutboxRepository, + ) -> None: + class _WrongScopeIssuer: + def issue( + self, *, tenant_id: str, knowledge_graph_id: str + ) -> ScopedWorkloadCredentials: + return ScopedWorkloadCredentials( + token="wrong-scope-token", + expires_at=datetime.now(UTC) + timedelta(minutes=5), + scopes=( + "tenant:tenant-other", + f"knowledge_graph:{knowledge_graph_id}", + "workload:extraction", + ), + ) + + handler = ExtractionEventHandler( + extraction_service=_FakeExtractionService(), + outbox=outbox, + runtime_context_builder=_FakeRuntimeContextBuilder(), + credential_issuer=_WrongScopeIssuer(), + worker_launcher=InMemoryEphemeralExtractionWorkerLauncher(), + ) + payload = _job_package_produced_payload(sync_run_id="run-scope-fail") + + await handler.handle( + "JobPackageProduced", + payload, + tenant_id="tenant-1", + ) + + assert len(outbox.appended) == 1 + event = outbox.appended[0] + assert event["event_type"] == "ExtractionFailed" + assert "scope" in event["payload"]["error"].lower() + assert "wrong-scope-token" not in event["payload"]["error"] + + async def test_redacts_secret_material_from_failure_payload( + self, + outbox: _FakeOutboxRepository, + ) -> None: + class _LeakyService(_FakeExtractionService): + async def run( # type: ignore[override] + self, + sync_run_id: str, + data_source_id: str, + knowledge_graph_id: str, + job_package_id: str, + runtime_context: ExtractionRuntimeContext, + workload_credentials: ScopedWorkloadCredentials | None = None, + ) -> str: + raise RuntimeError( + "workload auth failed for token ghp_1234567890abcdef1234567890abcdef1234" + ) + + handler = ExtractionEventHandler( + extraction_service=_LeakyService(), + outbox=outbox, + runtime_context_builder=_FakeRuntimeContextBuilder(), + credential_issuer=ScopedWorkloadCredentialIssuer( + default_ttl=timedelta(minutes=10) + ), + worker_launcher=InMemoryEphemeralExtractionWorkerLauncher(), + ) + payload = _job_package_produced_payload(sync_run_id="run-redact") + + await handler.handle("JobPackageProduced", payload, tenant_id="tenant-1") + + event = outbox.appended[0] + assert event["event_type"] == "ExtractionFailed" + assert "ghp_1234567890abcdef1234567890abcdef1234" not in event["payload"][ + "error" + ] + assert "***REDACTED***" in event["payload"]["error"] + + class _FailingOutboxRepository(_FakeOutboxRepository): """Outbox repository that raises on the first write (simulates outbox failure).""" diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py index d2b8a943c..7e2b4d0d0 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py @@ -13,6 +13,7 @@ ) from extraction.ports.runtime import ( EphemeralWorkerLaunchRequest, + ScopedWorkloadCredentials, ) @@ -79,6 +80,26 @@ def test_cleanup_terminates_expired_sessions(self) -> None: class TestEphemeralWorkerLauncher: + def test_launch_rejects_expired_credentials(self) -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) + launcher = InMemoryEphemeralExtractionWorkerLauncher() + scoped_credentials = issuer.issue(tenant_id="tenant-1", knowledge_graph_id="kg-1") + expired_credentials = ScopedWorkloadCredentials( + token=scoped_credentials.token, + expires_at=datetime.now(UTC) - timedelta(seconds=1), + scopes=scoped_credentials.scopes, + ) + request = EphemeralWorkerLaunchRequest( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id="session-1", + sync_run_id="sync-1", + job_package_id="pkg-1", + ) + + with pytest.raises(ValueError, match="expired"): + launcher.launch(request=request, credentials=expired_credentials) + def test_launch_requires_credentials_scoped_to_request(self) -> None: issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) launcher = InMemoryEphemeralExtractionWorkerLauncher() From 1d9530c6d48e7943c7f2838901ac9bfe4b0e49cd Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Fri, 22 May 2026 12:20:35 -0400 Subject: [PATCH 043/153] feat(extraction): add session history views with run-level metrics retention (#731) Expose scoped session history API responses with linked mutation-run metrics, surface archived sessions in KG manage UI after clear chat, and verify retention with unit and integration coverage. Closes #719 Co-authored-by: Cursor <cursoragent@cursor.com> --- .../application/agent_session_service.py | 46 ++++- src/api/extraction/dependencies.py | 3 +- src/api/extraction/domain/value_objects.py | 17 ++ .../infrastructure/repositories/__init__.py | 10 +- .../session_run_metrics_reader.py | 82 ++++++++ src/api/extraction/ports/repositories.py | 13 +- src/api/extraction/presentation/models.py | 78 ++++++- src/api/extraction/presentation/routes.py | 32 +++ .../tests/integration/extraction/__init__.py | 1 + .../tests/integration/extraction/conftest.py | 3 + .../test_session_history_retention.py | 192 ++++++++++++++++++ .../tests/integration/management/conftest.py | 1 + .../test_session_history_service.py | 164 +++++++++++++++ .../extraction/presentation/test_routes.py | 27 +++ .../pages/knowledge-graphs/[kgId]/manage.vue | 112 ++++++++++ .../knowledge-graph-manage-workspace.test.ts | 11 +- 16 files changed, 784 insertions(+), 8 deletions(-) create mode 100644 src/api/extraction/infrastructure/repositories/session_run_metrics_reader.py create mode 100644 src/api/tests/integration/extraction/__init__.py create mode 100644 src/api/tests/integration/extraction/conftest.py create mode 100644 src/api/tests/integration/extraction/test_session_history_retention.py create mode 100644 src/api/tests/unit/extraction/application/test_session_history_service.py diff --git a/src/api/extraction/application/agent_session_service.py b/src/api/extraction/application/agent_session_service.py index b4a4c6a03..fdda14574 100644 --- a/src/api/extraction/application/agent_session_service.py +++ b/src/api/extraction/application/agent_session_service.py @@ -2,6 +2,7 @@ from __future__ import annotations +from dataclasses import dataclass from datetime import UTC, datetime from ulid import ULID @@ -11,7 +12,19 @@ ) from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.value_objects import BootstrapIntakePath, ExtractionSessionMode -from extraction.ports.repositories import IExtractionAgentSessionRepository +from extraction.domain.value_objects import ExtractionSessionRunMetric +from extraction.ports.repositories import ( + IExtractionAgentSessionRepository, + IExtractionSessionRunMetricsReader, +) + + +@dataclass(frozen=True) +class ExtractionSessionHistoryRecord: + """Session history entry with linked run-level metrics.""" + + session: ExtractionAgentSession + run_metrics: list[ExtractionSessionRunMetric] class ExtractionAgentSessionService: @@ -21,9 +34,11 @@ def __init__( self, repository: IExtractionAgentSessionRepository, skill_resolution_service: ExtractionSkillResolutionService | None = None, + run_metrics_reader: IExtractionSessionRunMetricsReader | None = None, ) -> None: self._repository = repository self._skill_resolution_service = skill_resolution_service + self._run_metrics_reader = run_metrics_reader @staticmethod def _build_bootstrap_intake_prompt() -> str: @@ -114,6 +129,35 @@ async def list_sessions( mode=mode, ) + async def list_session_history( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> list[ExtractionSessionHistoryRecord]: + sessions = await self._repository.list_by_scope( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + if not sessions: + return [] + + metrics_by_session: dict[str, list[ExtractionSessionRunMetric]] = {} + if self._run_metrics_reader is not None: + metrics_by_session = await self._run_metrics_reader.find_metrics_by_session_ids( + knowledge_graph_id=knowledge_graph_id, + session_ids=[session.id for session in sessions], + ) + + return [ + ExtractionSessionHistoryRecord( + session=session, + run_metrics=metrics_by_session.get(session.id, []), + ) + for session in sessions + ] + async def archive_session(self, session_id: str) -> ExtractionAgentSession | None: session = await self._repository.get_by_id(session_id) if session is None: diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index e720e81d8..2c6853001 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -11,6 +11,7 @@ ) from extraction.infrastructure.repositories import ( ExtractionAgentSessionRepository, + ExtractionSessionRunMetricsReader, ExtractionSkillOverrideRepository, ) from infrastructure.database.dependencies import get_write_session @@ -26,5 +27,5 @@ def get_extraction_agent_session_service( return ExtractionAgentSessionService( repository=ExtractionAgentSessionRepository(session=session), skill_resolution_service=skill_resolution_service, + run_metrics_reader=ExtractionSessionRunMetricsReader(session=session), ) - diff --git a/src/api/extraction/domain/value_objects.py b/src/api/extraction/domain/value_objects.py index 22ec0470c..906c77c22 100644 --- a/src/api/extraction/domain/value_objects.py +++ b/src/api/extraction/domain/value_objects.py @@ -1,5 +1,9 @@ """Value objects for Extraction session lifecycle.""" +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime from enum import StrEnum @@ -16,3 +20,16 @@ class BootstrapIntakePath(StrEnum): FIRST_PASS_SCHEMA_ATTEMPT = "first_pass_schema_attempt" GUIDED_CO_DESIGN = "guided_co_design" + +@dataclass(frozen=True) +class ExtractionSessionRunMetric: + """Run-level metrics linked to an extraction session.""" + + sync_run_id: str + mutation_log_id: str | None + status: str + started_at: datetime + completed_at: datetime | None = None + token_usage_total: int | None = None + cost_total_usd: float | None = None + operation_counts: dict[str, int] = field(default_factory=dict) diff --git a/src/api/extraction/infrastructure/repositories/__init__.py b/src/api/extraction/infrastructure/repositories/__init__.py index 00204177a..8cf46718b 100644 --- a/src/api/extraction/infrastructure/repositories/__init__.py +++ b/src/api/extraction/infrastructure/repositories/__init__.py @@ -3,9 +3,15 @@ from extraction.infrastructure.repositories.agent_session_repository import ( ExtractionAgentSessionRepository, ) +from extraction.infrastructure.repositories.session_run_metrics_reader import ( + ExtractionSessionRunMetricsReader, +) from extraction.infrastructure.repositories.skill_override_repository import ( ExtractionSkillOverrideRepository, ) -__all__ = ["ExtractionAgentSessionRepository", "ExtractionSkillOverrideRepository"] - +__all__ = [ + "ExtractionAgentSessionRepository", + "ExtractionSessionRunMetricsReader", + "ExtractionSkillOverrideRepository", +] diff --git a/src/api/extraction/infrastructure/repositories/session_run_metrics_reader.py b/src/api/extraction/infrastructure/repositories/session_run_metrics_reader.py new file mode 100644 index 000000000..e6888f7da --- /dev/null +++ b/src/api/extraction/infrastructure/repositories/session_run_metrics_reader.py @@ -0,0 +1,82 @@ +"""PostgreSQL reader for extraction session run metrics.""" + +from __future__ import annotations + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +from extraction.domain.value_objects import ExtractionSessionRunMetric +from extraction.ports.repositories import IExtractionSessionRunMetricsReader + + +class ExtractionSessionRunMetricsReader(IExtractionSessionRunMetricsReader): + """Resolve sync-run metrics for extraction sessions without Management imports.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + + async def find_metrics_by_session_ids( + self, + *, + knowledge_graph_id: str, + session_ids: list[str], + ) -> dict[str, list[ExtractionSessionRunMetric]]: + if not session_ids: + return {} + + stmt = text( + """ + SELECT + sr.id AS sync_run_id, + sr.status, + sr.started_at, + sr.completed_at, + sr.mutation_log_run + FROM data_source_sync_runs sr + JOIN data_sources ds ON ds.id = sr.data_source_id + WHERE ds.knowledge_graph_id = :knowledge_graph_id + AND sr.mutation_log_run IS NOT NULL + AND sr.mutation_log_run->>'session_id' = ANY(:session_ids) + ORDER BY sr.started_at DESC + """ + ) + result = await self._session.execute( + stmt, + { + "knowledge_graph_id": knowledge_graph_id, + "session_ids": session_ids, + }, + ) + + metrics_by_session: dict[str, list[ExtractionSessionRunMetric]] = { + session_id: [] for session_id in session_ids + } + for row in result.mappings().all(): + payload = row["mutation_log_run"] or {} + session_id = payload.get("session_id") + if session_id not in metrics_by_session: + continue + metrics_by_session[session_id].append( + ExtractionSessionRunMetric( + sync_run_id=row["sync_run_id"], + mutation_log_id=payload.get("mutation_log_id"), + status=row["status"], + started_at=row["started_at"], + completed_at=row["completed_at"], + token_usage_total=( + int(payload["token_usage_total"]) + if payload.get("token_usage_total") is not None + else None + ), + cost_total_usd=( + float(payload["cost_total_usd"]) + if payload.get("cost_total_usd") is not None + else None + ), + operation_counts={ + str(key): int(value) + for key, value in (payload.get("operation_counts") or {}).items() + }, + ) + ) + return metrics_by_session diff --git a/src/api/extraction/ports/repositories.py b/src/api/extraction/ports/repositories.py index c9c9bb597..03c902fed 100644 --- a/src/api/extraction/ports/repositories.py +++ b/src/api/extraction/ports/repositories.py @@ -5,7 +5,7 @@ from typing import Protocol from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import ExtractionSessionMode, ExtractionSessionRunMetric class IExtractionAgentSessionRepository(Protocol): @@ -30,6 +30,17 @@ async def list_by_scope( ) -> list[ExtractionAgentSession]: ... +class IExtractionSessionRunMetricsReader(Protocol): + """Read-only access to run-level metrics linked to extraction sessions.""" + + async def find_metrics_by_session_ids( + self, + *, + knowledge_graph_id: str, + session_ids: list[str], + ) -> dict[str, list[ExtractionSessionRunMetric]]: ... + + class IExtractionSkillOverrideRepository(Protocol): """Read KG-specific skill override templates.""" diff --git a/src/api/extraction/presentation/models.py b/src/api/extraction/presentation/models.py index 781791ad7..214deb3d4 100644 --- a/src/api/extraction/presentation/models.py +++ b/src/api/extraction/presentation/models.py @@ -7,8 +7,39 @@ from pydantic import BaseModel, Field +from extraction.application.agent_session_service import ExtractionSessionHistoryRecord from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import BootstrapIntakePath, ExtractionSessionMode +from extraction.domain.value_objects import ( + BootstrapIntakePath, + ExtractionSessionMode, + ExtractionSessionRunMetric, +) + + +class SessionRunMetricResponse(BaseModel): + """Run-level metrics linked to an extraction session.""" + + sync_run_id: str + mutation_log_id: str | None = None + status: str + started_at: datetime + completed_at: datetime | None = None + token_usage_total: int | None = None + cost_total_usd: float | None = None + operation_counts: dict[str, int] = Field(default_factory=dict) + + @classmethod + def from_domain(cls, metric: ExtractionSessionRunMetric) -> "SessionRunMetricResponse": + return cls( + sync_run_id=metric.sync_run_id, + mutation_log_id=metric.mutation_log_id, + status=metric.status, + started_at=metric.started_at, + completed_at=metric.completed_at, + token_usage_total=metric.token_usage_total, + cost_total_usd=metric.cost_total_usd, + operation_counts=dict(metric.operation_counts), + ) class ExtractionSessionResponse(BaseModel): @@ -46,6 +77,50 @@ class ExtractionSessionListResponse(BaseModel): count: int +class ExtractionSessionHistoryItemResponse(BaseModel): + """Historical session summary with linked run metrics.""" + + id: str + user_id: str + knowledge_graph_id: str + mode: ExtractionSessionMode + created_at: datetime + updated_at: datetime + archived_at: datetime | None = None + is_active: bool + message_count: int + run_metrics: list[SessionRunMetricResponse] = Field(default_factory=list) + + @classmethod + def from_history_record( + cls, + record: ExtractionSessionHistoryRecord, + ) -> "ExtractionSessionHistoryItemResponse": + session = record.session + return cls( + id=session.id, + user_id=session.user_id, + knowledge_graph_id=session.knowledge_graph_id, + mode=session.mode, + created_at=session.created_at, + updated_at=session.updated_at, + archived_at=session.archived_at, + is_active=session.is_active, + message_count=len(session.message_history), + run_metrics=[ + SessionRunMetricResponse.from_domain(metric) + for metric in record.run_metrics + ], + ) + + +class ExtractionSessionHistoryResponse(BaseModel): + """History response for scoped extraction sessions.""" + + sessions: list[ExtractionSessionHistoryItemResponse] + count: int + + class BootstrapIntakePathSelectionRequest(BaseModel): """Request model for bootstrap intake path selection.""" @@ -54,4 +129,3 @@ class BootstrapIntakePathSelectionRequest(BaseModel): default=None, description="Optional user summary of capabilities and schema goals", ) - diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index 1301cd224..0db6810af 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -11,6 +11,8 @@ from extraction.domain.value_objects import ExtractionSessionMode from extraction.presentation.models import ( BootstrapIntakePathSelectionRequest, + ExtractionSessionHistoryItemResponse, + ExtractionSessionHistoryResponse, ExtractionSessionListResponse, ExtractionSessionResponse, ) @@ -97,6 +99,36 @@ async def list_sessions( return ExtractionSessionListResponse(sessions=payload, count=len(payload)) +@router.get( + "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}/history", + response_model=ExtractionSessionHistoryResponse, +) +async def list_session_history( + knowledge_graph_id: str, + mode: ExtractionSessionMode, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[ + ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) + ], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], +) -> ExtractionSessionHistoryResponse: + await _assert_kg_edit_permission( + authz=authz, + current_user=current_user, + knowledge_graph_id=knowledge_graph_id, + ) + history = await service.list_session_history( + user_id=current_user.user_id.value, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + payload = [ + ExtractionSessionHistoryItemResponse.from_history_record(record) + for record in history + ] + return ExtractionSessionHistoryResponse(sessions=payload, count=len(payload)) + + @router.post( "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}/clear-chat", response_model=ExtractionSessionResponse, diff --git a/src/api/tests/integration/extraction/__init__.py b/src/api/tests/integration/extraction/__init__.py new file mode 100644 index 000000000..c4b79f6e5 --- /dev/null +++ b/src/api/tests/integration/extraction/__init__.py @@ -0,0 +1 @@ +"""Integration tests for Extraction bounded context.""" diff --git a/src/api/tests/integration/extraction/conftest.py b/src/api/tests/integration/extraction/conftest.py new file mode 100644 index 000000000..1ce85faff --- /dev/null +++ b/src/api/tests/integration/extraction/conftest.py @@ -0,0 +1,3 @@ +"""Integration test fixtures for Extraction bounded context.""" + +pytest_plugins = ["tests.integration.management.conftest"] diff --git a/src/api/tests/integration/extraction/test_session_history_retention.py b/src/api/tests/integration/extraction/test_session_history_retention.py new file mode 100644 index 000000000..14e1763bb --- /dev/null +++ b/src/api/tests/integration/extraction/test_session_history_retention.py @@ -0,0 +1,192 @@ +"""Integration tests for archived extraction session history and run metadata.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +import pytest +from sqlalchemy import text + +from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.domain.value_objects import ExtractionSessionMode +from extraction.infrastructure.repositories import ( + ExtractionAgentSessionRepository, + ExtractionSessionRunMetricsReader, +) +from management.application.services.data_source_service import DataSourceService +from management.application.services.knowledge_graph_service import KnowledgeGraphService +from management.domain.aggregates import KnowledgeGraph +from management.domain.entities.data_source_sync_run import MutationLogRunMetadata +from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig +from shared_kernel.datasource_types import DataSourceAdapterType +from tests.fakes.authorization import InMemoryAuthorizationProvider + +pytestmark = pytest.mark.integration + + +@pytest.mark.asyncio +async def test_archived_session_history_retains_linked_run_metadata( + async_session, + clean_management_data: None, + knowledge_graph_repository, + data_source_repository, + data_source_sync_run_repository, + test_tenant: str, + test_workspace: str, +) -> None: + """Clear chat archives sessions while history retrieval keeps run metrics.""" + table_check = await async_session.execute( + text( + """ + SELECT 1 + FROM information_schema.tables + WHERE table_name = 'extraction_agent_sessions' + """ + ) + ) + if table_check.scalar_one_or_none() is None: + pytest.skip("extraction_agent_sessions table is missing in local integration database") + await async_session.rollback() + + user_id = "user-integration-session-history" + authz = InMemoryAuthorizationProvider() + + kg_service = KnowledgeGraphService( + session=async_session, + knowledge_graph_repository=knowledge_graph_repository, + data_source_repository=data_source_repository, + sync_run_repository=data_source_sync_run_repository, + secret_store=None, + authz=authz, + scope_to_tenant=test_tenant, + ) + ds_service = DataSourceService( + session=async_session, + data_source_repository=data_source_repository, + knowledge_graph_repository=knowledge_graph_repository, + sync_run_repository=data_source_sync_run_repository, + secret_store=None, + authz=authz, + scope_to_tenant=test_tenant, + ) + + knowledge_graph = KnowledgeGraph.create( + tenant_id=test_tenant, + workspace_id=test_workspace, + name="Session History KG", + description="Archived session history retention", + created_by=user_id, + ) + ontology_config = OntologyConfig( + node_types=( + NodeTypeDefinition(label="Repository"), + NodeTypeDefinition( + label="SeedNode", + prepopulated=True, + prepopulated_instance_count=1, + ), + ), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("SeedNode",), + ), + ), + ) + knowledge_graph.set_ontology(ontology_config) + async with async_session.begin(): + await knowledge_graph_repository.save(knowledge_graph) + + await authz.write_relationship( + f"knowledge_graph:{knowledge_graph.id.value}", + "admin", + f"user:{user_id}", + ) + await kg_service.save_ontology( + user_id=user_id, + kg_id=knowledge_graph.id.value, + config=ontology_config, + ) + transitioned = await kg_service.transition_workspace_to_extraction( + user_id=user_id, + kg_id=knowledge_graph.id.value, + ) + assert transitioned.session_pointers.active_extraction_operations_session_id is not None + + session_repo = ExtractionAgentSessionRepository(session=async_session) + metrics_reader = ExtractionSessionRunMetricsReader(session=async_session) + session_service = ExtractionAgentSessionService( + repository=session_repo, + run_metrics_reader=metrics_reader, + ) + + active = await session_service.get_or_create_active_session( + user_id=user_id, + knowledge_graph_id=knowledge_graph.id.value, + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + session_id = active.id + + data_source = await ds_service.create( + user_id=user_id, + kg_id=knowledge_graph.id.value, + name="History Source", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"repo_url": "https://github.com/example/repo"}, + ) + await authz.write_relationship( + f"data_source:{data_source.id.value}", + "manage", + f"user:{user_id}", + ) + sync_run = await ds_service.trigger_sync( + user_id=user_id, + ds_id=data_source.id.value, + ) + sync_run.status = "completed" + sync_run.completed_at = datetime.now(UTC) + sync_run.mutation_log_run = MutationLogRunMetadata( + mutation_log_id="mlog-history-001", + knowledge_graph_id=knowledge_graph.id.value, + session_id=session_id, + actor_id=user_id, + started_at=sync_run.started_at, + completed_at=sync_run.completed_at, + token_usage_total=1024, + cost_total_usd=0.88, + operation_counts={"create_node": 4}, + ) + async with async_session.begin(): + await data_source_sync_run_repository.save(sync_run) + + archived_session = await session_service.clear_chat( + user_id=user_id, + knowledge_graph_id=knowledge_graph.id.value, + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + assert archived_session.id != session_id + + history = await session_service.list_session_history( + user_id=user_id, + knowledge_graph_id=knowledge_graph.id.value, + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ) + + assert len(history) == 2 + archived_record = next(item for item in history if item.session.id == session_id) + assert archived_record.session.archived_at is not None + assert archived_record.session.updated_at is not None + assert len(archived_record.run_metrics) == 1 + assert archived_record.run_metrics[0].mutation_log_id == "mlog-history-001" + assert archived_record.run_metrics[0].token_usage_total == 1024 + assert archived_record.run_metrics[0].operation_counts == {"create_node": 4} + + still_archived = await session_repo.get_by_id(session_id) + assert still_archived is not None + assert still_archived.archived_at is not None + + runs = await data_source_sync_run_repository.find_by_data_source(data_source.id.value) + assert len(runs) == 1 + assert runs[0].mutation_log_run is not None + assert runs[0].mutation_log_run.session_id == session_id diff --git a/src/api/tests/integration/management/conftest.py b/src/api/tests/integration/management/conftest.py index 8167f93bf..ebf6f1239 100644 --- a/src/api/tests/integration/management/conftest.py +++ b/src/api/tests/integration/management/conftest.py @@ -105,6 +105,7 @@ async def cleanup() -> None: ) ) await async_session.execute(text("DELETE FROM data_source_sync_runs")) + await async_session.execute(text("DELETE FROM extraction_agent_sessions")) await async_session.execute(text("DELETE FROM data_sources")) await async_session.execute(text("DELETE FROM knowledge_graphs")) await async_session.commit() diff --git a/src/api/tests/unit/extraction/application/test_session_history_service.py b/src/api/tests/unit/extraction/application/test_session_history_service.py new file mode 100644 index 000000000..9977f94c7 --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_session_history_service.py @@ -0,0 +1,164 @@ +"""Unit tests for extraction session history with run-level metrics.""" + +from __future__ import annotations + +from dataclasses import replace +from datetime import UTC, datetime + +import pytest + +from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import ExtractionSessionMode, ExtractionSessionRunMetric +from extraction.domain.value_objects import ExtractionSessionMode as Mode + + +class _InMemoryAgentSessionRepository: + def __init__(self) -> None: + self._by_id: dict[str, ExtractionAgentSession] = {} + + async def save(self, session: ExtractionAgentSession) -> None: + self._by_id[session.id] = replace(session) + + async def get_by_id(self, session_id: str) -> ExtractionAgentSession | None: + session = self._by_id.get(session_id) + return replace(session) if session else None + + async def find_active_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> ExtractionAgentSession | None: + for session in self._by_id.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.mode == mode + and session.archived_at is None + ): + return replace(session) + return None + + async def list_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode | None = None, + ) -> list[ExtractionAgentSession]: + sessions = [ + replace(session) + for session in self._by_id.values() + if session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and (mode is None or session.mode == mode) + ] + return sorted(sessions, key=lambda s: s.updated_at, reverse=True) + + +class _InMemoryRunMetricsReader: + def __init__(self) -> None: + self._metrics: dict[str, list[ExtractionSessionRunMetric]] = {} + + def seed(self, session_id: str, metric: ExtractionSessionRunMetric) -> None: + self._metrics.setdefault(session_id, []).append(metric) + + async def find_metrics_by_session_ids( + self, + *, + knowledge_graph_id: str, + session_ids: list[str], + ) -> dict[str, list[ExtractionSessionRunMetric]]: + del knowledge_graph_id + return { + session_id: list(self._metrics.get(session_id, [])) + for session_id in session_ids + } + + +@pytest.mark.asyncio +class TestExtractionSessionHistoryService: + async def test_list_session_history_includes_archived_sessions_with_metrics(self): + repo = _InMemoryAgentSessionRepository() + metrics_reader = _InMemoryRunMetricsReader() + service = ExtractionAgentSessionService( + repository=repo, + run_metrics_reader=metrics_reader, + ) + + archived = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=Mode.EXTRACTION_OPERATIONS, + ) + archived.message_history = [{"role": "user", "content": "hello"}] + archived.updated_at = datetime(2026, 5, 20, 12, 0, tzinfo=UTC) + await repo.save(archived) + metrics_reader.seed( + archived.id, + ExtractionSessionRunMetric( + sync_run_id="run-1", + mutation_log_id="mlog-1", + status="completed", + started_at=datetime(2026, 5, 20, 11, 0, tzinfo=UTC), + completed_at=datetime(2026, 5, 20, 11, 30, tzinfo=UTC), + token_usage_total=512, + cost_total_usd=0.42, + operation_counts={"create_node": 3}, + ), + ) + + await service.clear_chat( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=Mode.EXTRACTION_OPERATIONS, + ) + + history = await service.list_session_history( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=Mode.EXTRACTION_OPERATIONS, + ) + + assert len(history) == 2 + archived_record = next(item for item in history if item.session.archived_at is not None) + assert archived_record.session.id == archived.id + assert archived_record.session.updated_at is not None + assert archived_record.session.archived_at is not None + assert len(archived_record.run_metrics) == 1 + assert archived_record.run_metrics[0].mutation_log_id == "mlog-1" + assert archived_record.run_metrics[0].token_usage_total == 512 + + async def test_clear_chat_retains_archived_sessions_for_history(self): + repo = _InMemoryAgentSessionRepository() + metrics_reader = _InMemoryRunMetricsReader() + service = ExtractionAgentSessionService( + repository=repo, + run_metrics_reader=metrics_reader, + ) + + first = await service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=Mode.EXTRACTION_OPERATIONS, + ) + await service.clear_chat( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=Mode.EXTRACTION_OPERATIONS, + ) + await service.clear_chat( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=Mode.EXTRACTION_OPERATIONS, + ) + + history = await service.list_session_history( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=Mode.EXTRACTION_OPERATIONS, + ) + + assert len(history) == 3 + assert any(item.session.id == first.id and item.session.archived_at is not None for item in history) + assert sum(1 for item in history if item.session.archived_at is None) == 1 diff --git a/src/api/tests/unit/extraction/presentation/test_routes.py b/src/api/tests/unit/extraction/presentation/test_routes.py index 96a5d1df3..f82a6f5c1 100644 --- a/src/api/tests/unit/extraction/presentation/test_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_routes.py @@ -197,3 +197,30 @@ def test_select_bootstrap_intake_path_persists_choice(self, extraction_client): assert intake["selected_path"] == BootstrapIntakePath.GUIDED_CO_DESIGN.value assert intake["status"] == "path_selected" + def test_session_history_endpoint_returns_archived_sessions_with_run_metrics( + self, extraction_client + ): + client, _ = extraction_client + active = client.get( + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/active" + ) + assert active.status_code == status.HTTP_200_OK + archived_id = active.json()["id"] + + client.post( + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/clear-chat" + ) + + response = client.get( + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/history" + ) + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["count"] == 2 + archived = next( + row for row in payload["sessions"] if row["id"] == archived_id + ) + assert archived["archived_at"] is not None + assert archived["updated_at"] is not None + assert archived["run_metrics"] == [] + diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 21eaf07e4..5ad56ad70 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -78,6 +78,27 @@ interface ExtractionSessionResponse { updated_at: string } +interface SessionRunMetricView { + sync_run_id: string + mutation_log_id: string | null + status: string + started_at: string + completed_at: string | null + token_usage_total: number | null + cost_total_usd: number | null + operation_counts: Record<string, number> +} + +interface ExtractionSessionHistoryItem { + id: string + created_at: string + updated_at: string + archived_at: string | null + is_active: boolean + message_count: number + run_metrics: SessionRunMetricView[] +} + const route = useRoute() const { hasTenant, tenantVersion } = useTenant() const { extractErrorMessage } = useErrorHandler() @@ -90,8 +111,10 @@ const loading = ref(false) const validating = ref(false) const transitioning = ref(false) const sessionLoading = ref(false) +const sessionHistoryLoading = ref(false) const clearingChat = ref(false) const extractionSession = ref<ExtractionSessionResponse | null>(null) +const sessionHistory = ref<ExtractionSessionHistoryItem[]>([]) const extractionTab = ref('extraction-jobs') const draftMessage = ref('') const statusProjection = ref<WorkspaceStatusResponse | null>(null) @@ -311,6 +334,24 @@ async function loadExtractionSession() { } } +async function loadSessionHistory() { + if (!kgId.value) return + sessionHistoryLoading.value = true + try { + const response = await apiFetch<{ sessions: ExtractionSessionHistoryItem[] }>( + `/extraction/knowledge-graphs/${kgId.value}/sessions/${sessionMode.value}/history`, + ) + sessionHistory.value = response.sessions + } catch (err) { + sessionHistory.value = [] + toast.error('Failed to load session history', { + description: extractErrorMessage(err), + }) + } finally { + sessionHistoryLoading.value = false + } +} + async function validateWorkspace() { if (!kgId.value) return validating.value = true @@ -358,6 +399,7 @@ async function clearChat() { { method: 'POST' }, ) toast.success('Extraction chat cleared') + await loadSessionHistory() } catch (err) { toast.error('Failed to clear chat', { description: extractErrorMessage(err), @@ -391,6 +433,7 @@ watch( (mode) => { if (mode) { loadExtractionSession() + loadSessionHistory() } }, ) @@ -749,6 +792,75 @@ watch( </p> </div> </CardContent> + <CardContent class="space-y-3 border-t pt-4"> + <div class="flex items-center justify-between"> + <p class="text-xs font-medium uppercase tracking-wider text-muted-foreground"> + Session History + </p> + <Button + size="sm" + variant="ghost" + class="h-6 px-2 text-[10px]" + :disabled="sessionHistoryLoading" + @click="loadSessionHistory" + > + Refresh + </Button> + </div> + <div + v-if="sessionHistoryLoading" + class="flex items-center gap-2 text-xs text-muted-foreground" + > + <Loader2 class="size-3.5 animate-spin" /> + Loading session history... + </div> + <div + v-else-if="sessionHistory.length === 0" + class="rounded border border-dashed px-3 py-4 text-xs text-muted-foreground" + > + No archived or active sessions found for this scope yet. + </div> + <div v-else class="space-y-2"> + <div + v-for="entry in sessionHistory" + :key="entry.id" + class="rounded border px-3 py-2 text-xs" + > + <div class="flex flex-wrap items-center justify-between gap-2"> + <p class="font-mono break-all">{{ entry.id }}</p> + <Badge :variant="entry.is_active ? 'default' : 'secondary'"> + {{ entry.is_active ? 'Active' : 'Archived' }} + </Badge> + </div> + <p class="mt-1 text-muted-foreground"> + Updated {{ new Date(entry.updated_at).toLocaleString() }} + <span v-if="entry.archived_at"> + · Archived {{ new Date(entry.archived_at).toLocaleString() }} + </span> + </p> + <p class="mt-1 text-muted-foreground"> + {{ entry.message_count }} message(s) + · {{ entry.run_metrics.length }} linked run(s) + </p> + <div + v-if="entry.run_metrics.length > 0" + class="mt-2 space-y-1.5 rounded border bg-muted/20 p-2" + > + <div + v-for="metric in entry.run_metrics" + :key="metric.sync_run_id" + class="flex flex-wrap items-center justify-between gap-2" + > + <span class="font-mono">{{ metric.mutation_log_id ?? metric.sync_run_id }}</span> + <span class="text-muted-foreground"> + {{ metric.token_usage_total ?? 0 }} tokens · + ${{ (metric.cost_total_usd ?? 0).toFixed(2) }} + </span> + </div> + </div> + </div> + </div> + </CardContent> </Card> <div class="space-y-4"> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 65555968a..065facfd7 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -72,6 +72,15 @@ describe('Knowledge Graph Manage Workspace - mode-aware controls', () => { expect(manageWorkspaceVue).toContain('active_extraction_operations_session_id') }) + it('loads scoped session history with run metrics after clear chat', () => { + expect(manageWorkspaceVue).toContain('loadSessionHistory') + expect(manageWorkspaceVue).toContain('/sessions/${sessionMode.value}/history') + expect(manageWorkspaceVue).toContain('sessionHistory') + expect(manageWorkspaceVue).toContain('run_metrics') + expect(manageWorkspaceVue).toContain('Session History') + }) + + it('uses shared conversation panel for bootstrap and extraction sessions', () => { expect(manageWorkspaceVue).toContain('SharedConversationPanel') expect(manageWorkspaceVue).toContain('sessionMode') @@ -81,7 +90,7 @@ describe('Knowledge Graph Manage Workspace - mode-aware controls', () => { it('supports explicit Clear chat reset for extraction session', () => { expect(manageWorkspaceVue).toContain('clearChat') expect(manageWorkspaceVue).toContain('/sessions/${sessionMode.value}/clear-chat') - expect(manageWorkspaceVue).toContain('Clear chat') + expect(sharedConversationPanelVue).toContain('Clear chat') }) it('provides tabbed lower operations area for extraction workflows', () => { From cef63a1dabb7640f599408d2fce4932c31aa347a Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Fri, 22 May 2026 12:21:09 -0400 Subject: [PATCH 044/153] feat(management): persist canonical schema via graph type definitions (#733) Route ontology save/read and workspace readiness through KG-scoped mutation-log DEFINE persistence, with JSONB fallback for unmigrated data. Partial for #718: graph schema API and extraction bootstrap still use legacy in-memory type-definition storage pending full migration. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../graph/infrastructure/models/__init__.py | 1 + .../models/knowledge_graph_type_definition.py | 32 +++ .../infrastructure/noop_mutation_applier.py | 14 ++ .../postgres_kg_type_definition_store.py | 117 ++++++++++ .../canonical_schema/__init__.py | 1 + .../graph_canonical_schema_repository.py | 122 ++++++++++ .../ontology_mutation_builder.py | 56 +++++ .../canonical_schema/ontology_projection.py | 50 +++++ ...3e4f5a_create_kg_type_definitions_table.py | 63 ++++++ .../services/knowledge_graph_service.py | 40 +++- .../dependencies/knowledge_graph.py | 4 + src/api/management/ports/canonical_schema.py | 24 ++ src/api/management/ports/exceptions.py | 6 + src/api/tests/fakes/canonical_schema.py | 28 +++ .../tests/integration/management/conftest.py | 3 + .../test_canonical_schema_source.py | 209 ++++++++++++++++++ .../test_ontology_mutation_builder.py | 42 ++++ .../test_canonical_schema_service.py | 134 +++++++++++ .../test_knowledge_graph_service.py | 97 ++++---- 19 files changed, 983 insertions(+), 60 deletions(-) create mode 100644 src/api/graph/infrastructure/models/__init__.py create mode 100644 src/api/graph/infrastructure/models/knowledge_graph_type_definition.py create mode 100644 src/api/graph/infrastructure/noop_mutation_applier.py create mode 100644 src/api/graph/infrastructure/postgres_kg_type_definition_store.py create mode 100644 src/api/infrastructure/canonical_schema/__init__.py create mode 100644 src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py create mode 100644 src/api/infrastructure/canonical_schema/ontology_mutation_builder.py create mode 100644 src/api/infrastructure/canonical_schema/ontology_projection.py create mode 100644 src/api/infrastructure/migrations/versions/fb1c2d3e4f5a_create_kg_type_definitions_table.py create mode 100644 src/api/management/ports/canonical_schema.py create mode 100644 src/api/tests/fakes/canonical_schema.py create mode 100644 src/api/tests/integration/management/test_canonical_schema_source.py create mode 100644 src/api/tests/unit/infrastructure/canonical_schema/test_ontology_mutation_builder.py create mode 100644 src/api/tests/unit/management/application/test_canonical_schema_service.py diff --git a/src/api/graph/infrastructure/models/__init__.py b/src/api/graph/infrastructure/models/__init__.py new file mode 100644 index 000000000..978cc41f8 --- /dev/null +++ b/src/api/graph/infrastructure/models/__init__.py @@ -0,0 +1 @@ +"""Graph infrastructure SQLAlchemy models.""" diff --git a/src/api/graph/infrastructure/models/knowledge_graph_type_definition.py b/src/api/graph/infrastructure/models/knowledge_graph_type_definition.py new file mode 100644 index 000000000..102cb1546 --- /dev/null +++ b/src/api/graph/infrastructure/models/knowledge_graph_type_definition.py @@ -0,0 +1,32 @@ +"""SQLAlchemy model for KG-scoped graph type definitions.""" + +from __future__ import annotations + +from sqlalchemy import String, UniqueConstraint +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from infrastructure.database.models import Base + + +class KnowledgeGraphTypeDefinitionModel(Base): + """Persisted type definition for a knowledge graph schema layer.""" + + __tablename__ = "knowledge_graph_type_definitions" + __table_args__ = ( + UniqueConstraint( + "knowledge_graph_id", + "entity_type", + "label", + name="uq_kg_type_definitions_kg_entity_label", + ), + ) + + id: Mapped[str] = mapped_column(String(26), primary_key=True) + knowledge_graph_id: Mapped[str] = mapped_column(String(26), nullable=False, index=True) + entity_type: Mapped[str] = mapped_column(String(16), nullable=False) + label: Mapped[str] = mapped_column(String(255), nullable=False) + description: Mapped[str] = mapped_column(String(2048), nullable=False, default="") + required_properties: Mapped[list] = mapped_column(JSONB, nullable=False, default=list) + optional_properties: Mapped[list] = mapped_column(JSONB, nullable=False, default=list) + metadata_json: Mapped[dict | None] = mapped_column("metadata", JSONB, nullable=True) diff --git a/src/api/graph/infrastructure/noop_mutation_applier.py b/src/api/graph/infrastructure/noop_mutation_applier.py new file mode 100644 index 000000000..6832c440f --- /dev/null +++ b/src/api/graph/infrastructure/noop_mutation_applier.py @@ -0,0 +1,14 @@ +"""No-op mutation applier for schema-only DEFINE batches.""" + +from __future__ import annotations + +from graph.domain.value_objects import MutationOperation, MutationResult + + +class NoOpMutationApplier: + """Accept mutation batches without touching the graph database.""" + + def apply_batch(self, operations: list[MutationOperation]) -> MutationResult: + """Report success for schema-only batches.""" + _ = operations + return MutationResult(success=True, operations_applied=0) diff --git a/src/api/graph/infrastructure/postgres_kg_type_definition_store.py b/src/api/graph/infrastructure/postgres_kg_type_definition_store.py new file mode 100644 index 000000000..dedd29209 --- /dev/null +++ b/src/api/graph/infrastructure/postgres_kg_type_definition_store.py @@ -0,0 +1,117 @@ +"""Postgres-backed canonical schema storage for knowledge graphs.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from sqlalchemy import delete, select +from sqlalchemy.ext.asyncio import AsyncSession +from ulid import ULID + +from graph.domain.value_objects import EntityType, TypeDefinition +from graph.infrastructure.models.knowledge_graph_type_definition import ( + KnowledgeGraphTypeDefinitionModel, +) + + +@dataclass(frozen=True) +class StoredKnowledgeGraphTypeDefinition: + """Canonical type definition row projected for cross-context mapping.""" + + label: str + entity_type: str + description: str + required_properties: tuple[str, ...] + optional_properties: tuple[str, ...] + metadata: dict[str, Any] + + +class PostgresKnowledgeGraphTypeDefinitionStore: + """Async persistence for KG-scoped canonical type definitions.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + + async def delete_all_for_kg(self, kg_id: str) -> None: + """Remove all type definitions for a knowledge graph.""" + stmt = delete(KnowledgeGraphTypeDefinitionModel).where( + KnowledgeGraphTypeDefinitionModel.knowledge_graph_id == kg_id + ) + await self._session.execute(stmt) + + async def upsert_type_definition( + self, + *, + kg_id: str, + type_def: TypeDefinition, + metadata: dict[str, Any] | None = None, + ) -> None: + """Insert or replace a single type definition row.""" + entity_type = type_def.entity_type.value + stmt = select(KnowledgeGraphTypeDefinitionModel).where( + KnowledgeGraphTypeDefinitionModel.knowledge_graph_id == kg_id, + KnowledgeGraphTypeDefinitionModel.entity_type == entity_type, + KnowledgeGraphTypeDefinitionModel.label == type_def.label, + ) + result = await self._session.execute(stmt) + existing = result.scalar_one_or_none() + + payload = { + "description": type_def.description, + "required_properties": sorted(type_def.required_properties), + "optional_properties": sorted(type_def.optional_properties), + "metadata_json": metadata, + } + + if existing is None: + model = KnowledgeGraphTypeDefinitionModel( + id=str(ULID()), + knowledge_graph_id=kg_id, + entity_type=entity_type, + label=type_def.label, + **payload, + ) + self._session.add(model) + else: + existing.description = payload["description"] + existing.required_properties = payload["required_properties"] + existing.optional_properties = payload["optional_properties"] + existing.metadata_json = payload["metadata_json"] + + await self._session.flush() + + async def list_for_kg(self, kg_id: str) -> list[StoredKnowledgeGraphTypeDefinition]: + """Return all canonical type definitions for a knowledge graph.""" + stmt = ( + select(KnowledgeGraphTypeDefinitionModel) + .where(KnowledgeGraphTypeDefinitionModel.knowledge_graph_id == kg_id) + .order_by( + KnowledgeGraphTypeDefinitionModel.entity_type, + KnowledgeGraphTypeDefinitionModel.label, + ) + ) + result = await self._session.execute(stmt) + return [self._to_stored(row) for row in result.scalars().all()] + + @staticmethod + def to_type_definition(stored: StoredKnowledgeGraphTypeDefinition) -> TypeDefinition: + """Convert a stored projection to a graph TypeDefinition.""" + return TypeDefinition( + label=stored.label, + entity_type=EntityType(stored.entity_type), + description=stored.description, + required_properties=set(stored.required_properties), + optional_properties=set(stored.optional_properties), + ) + + @staticmethod + def _to_stored(model: KnowledgeGraphTypeDefinitionModel) -> StoredKnowledgeGraphTypeDefinition: + return StoredKnowledgeGraphTypeDefinition( + label=model.label, + entity_type=model.entity_type, + description=model.description, + required_properties=tuple(model.required_properties or []), + optional_properties=tuple(model.optional_properties or []), + metadata=model.metadata_json or {}, + ) diff --git a/src/api/infrastructure/canonical_schema/__init__.py b/src/api/infrastructure/canonical_schema/__init__.py new file mode 100644 index 000000000..42ea042a4 --- /dev/null +++ b/src/api/infrastructure/canonical_schema/__init__.py @@ -0,0 +1 @@ +"""Cross-context canonical schema wiring.""" diff --git a/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py b/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py new file mode 100644 index 000000000..6a023b13c --- /dev/null +++ b/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py @@ -0,0 +1,122 @@ +"""Graph-backed implementation of Management canonical schema port.""" + +from __future__ import annotations + +import json +from typing import Any + +from pydantic import ValidationError +from sqlalchemy.ext.asyncio import AsyncSession + +from graph.application.services.graph_mutation_service import GraphMutationService +from graph.domain.value_objects import EntityType, MutationOperation +from graph.infrastructure.noop_mutation_applier import NoOpMutationApplier +from graph.infrastructure.postgres_kg_type_definition_store import ( + PostgresKnowledgeGraphTypeDefinitionStore, +) +from graph.infrastructure.type_definition_repository import InMemoryTypeDefinitionRepository +from infrastructure.canonical_schema.ontology_mutation_builder import ( + edge_type_metadata, + node_type_metadata, + ontology_config_to_define_operations, +) +from infrastructure.canonical_schema.ontology_projection import ( + stored_definitions_to_ontology_config, +) +from management.domain.value_objects import OntologyConfig +from management.ports.canonical_schema import ICanonicalSchemaRepository +from management.ports.exceptions import CanonicalSchemaMutationError + + +class _CollectingTypeDefinitionRepository(InMemoryTypeDefinitionRepository): + """In-memory repository used while applying canonical schema mutations.""" + + pass + + +class GraphCanonicalSchemaRepository(ICanonicalSchemaRepository): + """Persist and read canonical schema through mutation-log DEFINE operations.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + self._store = PostgresKnowledgeGraphTypeDefinitionStore(session) + + async def get_ontology(self, kg_id: str) -> OntologyConfig | None: + rows = await self._store.list_for_kg(kg_id) + if not rows: + return None + return stored_definitions_to_ontology_config(rows) + + async def replace_ontology(self, kg_id: str, config: OntologyConfig) -> None: + await self._store.delete_all_for_kg(kg_id) + await self._apply_operations( + kg_id, ontology_config_to_define_operations(config), config + ) + + async def apply_mutation_log(self, kg_id: str, jsonl_content: str) -> None: + operations = self._parse_jsonl(jsonl_content) + if not operations: + return + + existing = await self.get_ontology(kg_id) or OntologyConfig() + await self._apply_operations(kg_id, operations, existing) + + async def _apply_operations( + self, + kg_id: str, + operations: list[MutationOperation], + config: OntologyConfig, + ) -> None: + metadata_by_key = _metadata_map_for_config(config) + repo = _CollectingTypeDefinitionRepository() + + for row in await self._store.list_for_kg(kg_id): + repo.save(self._store.to_type_definition(row)) + + service = GraphMutationService( + mutation_applier=NoOpMutationApplier(), + type_definition_repository=repo, + ) + result = service.apply_mutations(operations, knowledge_graph_id=kg_id) + if not result.success: + message = "; ".join(result.errors) if result.errors else "mutation failed" + raise CanonicalSchemaMutationError(message) + + for type_def in repo.get_all(): + metadata = metadata_by_key.get((type_def.label, type_def.entity_type.value)) + await self._store.upsert_type_definition( + kg_id=kg_id, + type_def=type_def, + metadata=metadata, + ) + + @staticmethod + def _parse_jsonl(jsonl_content: str) -> list[MutationOperation]: + operations: list[MutationOperation] = [] + for line_num, line in enumerate(jsonl_content.strip().split("\n"), start=1): + stripped = line.strip() + if not stripped: + continue + try: + operations.append(MutationOperation(**json.loads(stripped))) + except json.JSONDecodeError as exc: + raise CanonicalSchemaMutationError( + f"JSON parse error on line {line_num}: {exc}" + ) from exc + except ValidationError as exc: + raise CanonicalSchemaMutationError( + f"Validation error on line {line_num}: {exc}" + ) from exc + return operations + + +def _metadata_map_for_config( + config: OntologyConfig, +) -> dict[tuple[str, str], dict[str, Any]]: + """Build lookup for authoring metadata preserved outside graph TypeDefinition.""" + metadata: dict[tuple[str, str], dict[str, Any]] = {} + for node_type in config.node_types: + metadata[(node_type.label, EntityType.NODE.value)] = node_type_metadata(node_type) + for edge_type in config.edge_types: + metadata[(edge_type.label, EntityType.EDGE.value)] = edge_type_metadata(edge_type) + return metadata diff --git a/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py b/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py new file mode 100644 index 000000000..fdfec233b --- /dev/null +++ b/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py @@ -0,0 +1,56 @@ +"""Bridge Management ontology configs to graph DEFINE mutation operations.""" + +from __future__ import annotations + +from graph.domain.value_objects import EntityType, MutationOperation, MutationOperationType +from management.domain.value_objects import OntologyConfig + + +def ontology_config_to_define_operations( + config: OntologyConfig, +) -> list[MutationOperation]: + """Convert an ontology config into DEFINE mutation operations.""" + operations: list[MutationOperation] = [] + + for node_type in config.node_types: + operations.append( + MutationOperation( + op=MutationOperationType.DEFINE, + type=EntityType.NODE, + label=node_type.label, + description=node_type.description or node_type.label, + required_properties=set(node_type.required_properties), + optional_properties=set(node_type.optional_properties), + ) + ) + + for edge_type in config.edge_types: + operations.append( + MutationOperation( + op=MutationOperationType.DEFINE, + type=EntityType.EDGE, + label=edge_type.label, + description=edge_type.description or edge_type.label, + required_properties=set(edge_type.properties), + optional_properties=set(), + ) + ) + + return operations + + +def node_type_metadata(node_type) -> dict: + """Serialize node-type authoring metadata for canonical storage.""" + return { + "prepopulated": node_type.prepopulated, + "prepopulated_instance_count": node_type.prepopulated_instance_count, + } + + +def edge_type_metadata(edge_type) -> dict: + """Serialize edge-type authoring metadata for canonical storage.""" + return { + "source_labels": list(edge_type.source_labels), + "target_labels": list(edge_type.target_labels), + "properties": list(edge_type.properties), + } diff --git a/src/api/infrastructure/canonical_schema/ontology_projection.py b/src/api/infrastructure/canonical_schema/ontology_projection.py new file mode 100644 index 000000000..e8e89101f --- /dev/null +++ b/src/api/infrastructure/canonical_schema/ontology_projection.py @@ -0,0 +1,50 @@ +"""Map stored canonical schema rows to Management ontology configs.""" + +from __future__ import annotations + +from graph.infrastructure.postgres_kg_type_definition_store import ( + StoredKnowledgeGraphTypeDefinition, +) +from management.domain.value_objects import ( + EdgeTypeDefinition, + NodeTypeDefinition, + OntologyConfig, +) + + +def stored_definitions_to_ontology_config( + stored_definitions: list[StoredKnowledgeGraphTypeDefinition], +) -> OntologyConfig: + """Project graph-native type definitions to Management OntologyConfig.""" + node_types: list[NodeTypeDefinition] = [] + edge_types: list[EdgeTypeDefinition] = [] + + for stored in stored_definitions: + if stored.entity_type == "node": + node_types.append( + NodeTypeDefinition( + label=stored.label, + description=stored.description, + required_properties=stored.required_properties, + optional_properties=stored.optional_properties, + prepopulated=bool(stored.metadata.get("prepopulated", False)), + prepopulated_instance_count=int( + stored.metadata.get("prepopulated_instance_count", 0) + ), + ) + ) + elif stored.entity_type == "edge": + edge_types.append( + EdgeTypeDefinition( + label=stored.label, + description=stored.description, + source_labels=tuple(stored.metadata.get("source_labels", [])), + target_labels=tuple(stored.metadata.get("target_labels", [])), + properties=tuple(stored.metadata.get("properties", [])), + ) + ) + + return OntologyConfig( + node_types=tuple(node_types), + edge_types=tuple(edge_types), + ) diff --git a/src/api/infrastructure/migrations/versions/fb1c2d3e4f5a_create_kg_type_definitions_table.py b/src/api/infrastructure/migrations/versions/fb1c2d3e4f5a_create_kg_type_definitions_table.py new file mode 100644 index 000000000..2d0b2a2f5 --- /dev/null +++ b/src/api/infrastructure/migrations/versions/fb1c2d3e4f5a_create_kg_type_definitions_table.py @@ -0,0 +1,63 @@ +"""Create knowledge_graph_type_definitions table for canonical schema storage. + +Revision ID: fb1c2d3e4f5a +Revises: fa0b1c2d3e4f +Create Date: 2026-05-22 10:00:00.000000 +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +revision = "fb1c2d3e4f5a" +down_revision = "fa0b1c2d3e4f" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """Create table for graph-native canonical type definitions.""" + op.create_table( + "knowledge_graph_type_definitions", + sa.Column("id", sa.String(length=26), nullable=False), + sa.Column("knowledge_graph_id", sa.String(length=26), nullable=False), + sa.Column("entity_type", sa.String(length=16), nullable=False), + sa.Column("label", sa.String(length=255), nullable=False), + sa.Column("description", sa.String(length=2048), nullable=False, server_default=""), + sa.Column( + "required_properties", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default=sa.text("'[]'::jsonb"), + ), + sa.Column( + "optional_properties", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default=sa.text("'[]'::jsonb"), + ), + sa.Column("metadata", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint( + "knowledge_graph_id", + "entity_type", + "label", + name="uq_kg_type_definitions_kg_entity_label", + ), + ) + op.create_index( + "ix_knowledge_graph_type_definitions_knowledge_graph_id", + "knowledge_graph_type_definitions", + ["knowledge_graph_id"], + ) + + +def downgrade() -> None: + """Drop canonical type definition table.""" + op.drop_index( + "ix_knowledge_graph_type_definitions_knowledge_graph_id", + table_name="knowledge_graph_type_definitions", + ) + op.drop_table("knowledge_graph_type_definitions") diff --git a/src/api/management/application/services/knowledge_graph_service.py b/src/api/management/application/services/knowledge_graph_service.py index 303b43bf4..ea9101d26 100644 --- a/src/api/management/application/services/knowledge_graph_service.py +++ b/src/api/management/application/services/knowledge_graph_service.py @@ -41,6 +41,7 @@ IDataSourceSyncRunRepository, IKnowledgeGraphRepository, ) +from management.ports.canonical_schema import ICanonicalSchemaRepository from management.ports.secret_store import ISecretStoreRepository from shared_kernel.authorization.protocols import AuthorizationProvider from shared_kernel.authorization.types import ( @@ -69,6 +70,7 @@ def __init__( data_source_repository: IDataSourceRepository | None = None, sync_run_repository: IDataSourceSyncRunRepository | None = None, secret_store: ISecretStoreRepository | None = None, + canonical_schema_repository: ICanonicalSchemaRepository | None = None, ) -> None: """Initialize KnowledgeGraphService with dependencies. @@ -80,6 +82,7 @@ def __init__( probe: Optional domain probe for observability data_source_repository: Optional DS repository for cascade delete secret_store: Optional secret store for credential cleanup on cascade delete + canonical_schema_repository: Optional graph-native canonical schema store """ self._session = session self._kg_repo = knowledge_graph_repository @@ -89,6 +92,7 @@ def __init__( self._ds_repo = data_source_repository self._sync_run_repo = sync_run_repository self._secret_store = secret_store + self._canonical_schema_repo = canonical_schema_repository def _compute_next_run_at_utc( self, @@ -764,7 +768,7 @@ async def get_ontology( if not has_view: return None - return await self._kg_repo.get_ontology(kg_id) + return await self._resolve_canonical_ontology(kg_id) async def save_ontology( self, @@ -809,22 +813,33 @@ async def save_ontology( if kg is None or kg.tenant_id != self._scope_to_tenant: raise KnowledgeGraphNotFoundError(f"Knowledge graph {kg_id} not found") - await self._kg_repo.save_ontology(kg_id, config) + if self._canonical_schema_repo is not None: + await self._canonical_schema_repo.replace_ontology(kg_id, config) + else: + await self._kg_repo.save_ontology(kg_id, config) await self._session.commit() return config + async def _resolve_canonical_ontology(self, kg_id: str) -> OntologyConfig | None: + """Load canonical schema from graph-native storage with JSONB fallback.""" + if self._canonical_schema_repo is not None: + canonical = await self._canonical_schema_repo.get_ontology(kg_id) + if canonical is not None: + return canonical + return await self._kg_repo.get_ontology(kg_id) + def _evaluate_workspace_readiness( - self, kg: KnowledgeGraph + self, ontology: OntologyConfig | None ) -> WorkspaceReadinessStatus: - """Evaluate transition readiness flags for workspace status projection.""" - node_type_count = len(kg.ontology.node_types) if kg.ontology else 0 - edge_type_count = len(kg.ontology.edge_types) if kg.ontology else 0 + """Evaluate transition readiness flags from canonical schema state.""" + node_type_count = len(ontology.node_types) if ontology else 0 + edge_type_count = len(ontology.edge_types) if ontology else 0 prepopulated_without_instances: tuple[str, ...] = () - if kg.ontology is not None: + if ontology is not None: prepopulated_without_instances = tuple( node_type.label - for node_type in kg.ontology.node_types + for node_type in ontology.node_types if node_type.prepopulated and node_type.prepopulated_instance_count <= 0 ) @@ -870,7 +885,8 @@ async def get_workspace_status( if not has_view: return None - readiness = self._evaluate_workspace_readiness(kg) + ontology = await self._resolve_canonical_ontology(kg_id) + readiness = self._evaluate_workspace_readiness(ontology) transition_eligible = ( kg.workspace_mode == WorkspaceMode.SCHEMA_BOOTSTRAP and readiness.is_ready ) @@ -915,7 +931,8 @@ async def validate_workspace( if kg is None or kg.tenant_id != self._scope_to_tenant: raise KnowledgeGraphNotFoundError(f"Knowledge graph {kg_id} not found") - readiness = self._evaluate_workspace_readiness(kg) + ontology = await self._resolve_canonical_ontology(kg_id) + readiness = self._evaluate_workspace_readiness(ontology) transition_eligible = ( kg.workspace_mode == WorkspaceMode.SCHEMA_BOOTSTRAP and readiness.is_ready ) @@ -945,7 +962,8 @@ async def transition_workspace_to_extraction( if kg is None or kg.tenant_id != self._scope_to_tenant: raise KnowledgeGraphNotFoundError(f"Knowledge graph {kg_id} not found") - readiness = self._evaluate_workspace_readiness(kg) + ontology = await self._resolve_canonical_ontology(kg_id) + readiness = self._evaluate_workspace_readiness(ontology) if not readiness.is_ready: joined_reasons = "; ".join(readiness.blocking_reasons) raise ValueError( diff --git a/src/api/management/dependencies/knowledge_graph.py b/src/api/management/dependencies/knowledge_graph.py index bd298cd9d..03ab069f4 100644 --- a/src/api/management/dependencies/knowledge_graph.py +++ b/src/api/management/dependencies/knowledge_graph.py @@ -25,6 +25,9 @@ FernetSecretStore, KnowledgeGraphRepository, ) +from infrastructure.canonical_schema.graph_canonical_schema_repository import ( + GraphCanonicalSchemaRepository, +) from shared_kernel.authorization.protocols import AuthorizationProvider @@ -62,4 +65,5 @@ def get_knowledge_graph_service( authz=authz, scope_to_tenant=current_user.tenant_id.value, probe=DefaultKnowledgeGraphServiceProbe(), + canonical_schema_repository=GraphCanonicalSchemaRepository(session), ) diff --git a/src/api/management/ports/canonical_schema.py b/src/api/management/ports/canonical_schema.py new file mode 100644 index 000000000..46e2a513d --- /dev/null +++ b/src/api/management/ports/canonical_schema.py @@ -0,0 +1,24 @@ +"""Port for graph-native canonical schema access in Management.""" + +from __future__ import annotations + +from typing import Protocol, runtime_checkable + +from management.domain.value_objects import OntologyConfig + + +@runtime_checkable +class ICanonicalSchemaRepository(Protocol): + """Read/write canonical schema state stored as graph type definitions.""" + + async def get_ontology(self, kg_id: str) -> OntologyConfig | None: + """Return canonical schema for a knowledge graph, if any exists.""" + ... + + async def replace_ontology(self, kg_id: str, config: OntologyConfig) -> None: + """Replace canonical schema via mutation-log DEFINE operations.""" + ... + + async def apply_mutation_log(self, kg_id: str, jsonl_content: str) -> None: + """Apply additive schema/entity mutations from JSONL content.""" + ... diff --git a/src/api/management/ports/exceptions.py b/src/api/management/ports/exceptions.py index a001125e4..9f9840c75 100644 --- a/src/api/management/ports/exceptions.py +++ b/src/api/management/ports/exceptions.py @@ -48,3 +48,9 @@ class UnauthorizedError(Exception): """ pass + + +class CanonicalSchemaMutationError(Exception): + """Raised when canonical schema mutation-log application fails.""" + + pass diff --git a/src/api/tests/fakes/canonical_schema.py b/src/api/tests/fakes/canonical_schema.py new file mode 100644 index 000000000..d99be1c19 --- /dev/null +++ b/src/api/tests/fakes/canonical_schema.py @@ -0,0 +1,28 @@ +"""In-memory fake for ICanonicalSchemaRepository.""" + +from __future__ import annotations + +from management.domain.value_objects import OntologyConfig + + +class InMemoryCanonicalSchemaRepository: + """Stores canonical schema per knowledge graph for unit tests.""" + + def __init__(self) -> None: + self._store: dict[str, OntologyConfig] = {} + self.replaced: list[tuple[str, OntologyConfig]] = [] + self.applied_logs: list[tuple[str, str]] = [] + + async def get_ontology(self, kg_id: str) -> OntologyConfig | None: + return self._store.get(kg_id) + + async def replace_ontology(self, kg_id: str, config: OntologyConfig) -> None: + self.replaced.append((kg_id, config)) + self._store[kg_id] = config + + async def apply_mutation_log(self, kg_id: str, jsonl_content: str) -> None: + self.applied_logs.append((kg_id, jsonl_content)) + + def seed(self, kg_id: str, config: OntologyConfig) -> None: + """Preload canonical schema for a knowledge graph.""" + self._store[kg_id] = config diff --git a/src/api/tests/integration/management/conftest.py b/src/api/tests/integration/management/conftest.py index ebf6f1239..3d40e75f3 100644 --- a/src/api/tests/integration/management/conftest.py +++ b/src/api/tests/integration/management/conftest.py @@ -107,6 +107,9 @@ async def cleanup() -> None: await async_session.execute(text("DELETE FROM data_source_sync_runs")) await async_session.execute(text("DELETE FROM extraction_agent_sessions")) await async_session.execute(text("DELETE FROM data_sources")) + await async_session.execute( + text("DELETE FROM knowledge_graph_type_definitions") + ) await async_session.execute(text("DELETE FROM knowledge_graphs")) await async_session.commit() except ProgrammingError: diff --git a/src/api/tests/integration/management/test_canonical_schema_source.py b/src/api/tests/integration/management/test_canonical_schema_source.py new file mode 100644 index 000000000..ff7c706ad --- /dev/null +++ b/src/api/tests/integration/management/test_canonical_schema_source.py @@ -0,0 +1,209 @@ +"""Integration tests for canonical graph-native schema storage.""" + +from __future__ import annotations + +import json + +import pytest +from sqlalchemy import text + +from graph.domain.value_objects import EntityType, MutationOperationType +from infrastructure.canonical_schema.graph_canonical_schema_repository import ( + GraphCanonicalSchemaRepository, +) +from management.application.services.knowledge_graph_service import KnowledgeGraphService +from management.domain.aggregates import KnowledgeGraph +from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig +from tests.fakes.authorization import InMemoryAuthorizationProvider + +pytestmark = pytest.mark.integration + + +async def _table_exists(async_session, table_name: str) -> bool: + result = await async_session.execute( + text( + """ + SELECT 1 + FROM information_schema.tables + WHERE table_name = :table_name + """ + ), + {"table_name": table_name}, + ) + return result.scalar_one_or_none() is not None + + +@pytest.mark.asyncio +async def test_bootstrap_schema_persisted_in_canonical_store_and_readiness( + async_session, + clean_management_data: None, + knowledge_graph_repository, + test_tenant: str, + test_workspace: str, +) -> None: + """Bootstrap ontology flows through mutation-log DEFINE path into canonical store.""" + if not await _table_exists(async_session, "knowledge_graph_type_definitions"): + pytest.skip("knowledge_graph_type_definitions table is missing") + + await async_session.rollback() + + user_id = "user-canonical-schema-001" + authz = InMemoryAuthorizationProvider() + canonical_repo = GraphCanonicalSchemaRepository(async_session) + kg_service = KnowledgeGraphService( + session=async_session, + knowledge_graph_repository=knowledge_graph_repository, + authz=authz, + scope_to_tenant=test_tenant, + canonical_schema_repository=canonical_repo, + ) + + knowledge_graph = KnowledgeGraph.create( + tenant_id=test_tenant, + workspace_id=test_workspace, + name="Canonical Schema KG", + description="Bootstrap canonical schema", + created_by=user_id, + ) + ontology_config = OntologyConfig( + node_types=( + NodeTypeDefinition(label="Repository"), + NodeTypeDefinition( + label="SeedNode", + prepopulated=True, + prepopulated_instance_count=1, + ), + ), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("SeedNode",), + ), + ), + ) + + async with async_session.begin(): + await knowledge_graph_repository.save(knowledge_graph) + + await authz.write_relationship( + f"knowledge_graph:{knowledge_graph.id.value}", + "admin", + f"user:{user_id}", + ) + + await kg_service.save_ontology( + user_id=user_id, + kg_id=knowledge_graph.id.value, + config=ontology_config, + ) + + canonical = await canonical_repo.get_ontology(knowledge_graph.id.value) + assert canonical is not None + assert {node.label for node in canonical.node_types} == {"Repository", "SeedNode"} + + row_count = await async_session.execute( + text( + """ + SELECT COUNT(*) AS count + FROM knowledge_graph_type_definitions + WHERE knowledge_graph_id = :kg_id + """ + ), + {"kg_id": knowledge_graph.id.value}, + ) + assert row_count.scalar_one() == 3 + + status = await kg_service.get_workspace_status( + user_id=user_id, + kg_id=knowledge_graph.id.value, + ) + assert status is not None + assert status.transition_eligible is True + + +@pytest.mark.asyncio +async def test_additive_schema_evolution_in_extraction_mode( + async_session, + clean_management_data: None, + knowledge_graph_repository, + test_tenant: str, + test_workspace: str, +) -> None: + """Extraction mode accepts additive DEFINE mutations via mutation log.""" + if not await _table_exists(async_session, "knowledge_graph_type_definitions"): + pytest.skip("knowledge_graph_type_definitions table is missing") + + await async_session.rollback() + + user_id = "user-canonical-schema-002" + authz = InMemoryAuthorizationProvider() + canonical_repo = GraphCanonicalSchemaRepository(async_session) + kg_service = KnowledgeGraphService( + session=async_session, + knowledge_graph_repository=knowledge_graph_repository, + authz=authz, + scope_to_tenant=test_tenant, + canonical_schema_repository=canonical_repo, + ) + + knowledge_graph = KnowledgeGraph.create( + tenant_id=test_tenant, + workspace_id=test_workspace, + name="Schema Evolution KG", + description="Additive schema evolution", + created_by=user_id, + ) + bootstrap_config = OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), + ), + ), + ) + + async with async_session.begin(): + await knowledge_graph_repository.save(knowledge_graph) + + await authz.write_relationship( + f"knowledge_graph:{knowledge_graph.id.value}", + "admin", + f"user:{user_id}", + ) + await kg_service.save_ontology( + user_id=user_id, + kg_id=knowledge_graph.id.value, + config=bootstrap_config, + ) + await kg_service.transition_workspace_to_extraction( + user_id=user_id, + kg_id=knowledge_graph.id.value, + ) + + additive_define = { + "op": MutationOperationType.DEFINE.value, + "type": EntityType.NODE.value, + "label": "Service", + "description": "A deployable service", + "required_properties": ["slug", "name"], + "optional_properties": [], + } + await canonical_repo.apply_mutation_log( + knowledge_graph.id.value, + json.dumps(additive_define), + ) + await async_session.commit() + + canonical = await canonical_repo.get_ontology(knowledge_graph.id.value) + assert canonical is not None + assert {node.label for node in canonical.node_types} == {"Repository", "Service"} + + status = await kg_service.get_workspace_status( + user_id=user_id, + kg_id=knowledge_graph.id.value, + ) + assert status is not None + assert status.workspace_mode.value == "extraction_operations" diff --git a/src/api/tests/unit/infrastructure/canonical_schema/test_ontology_mutation_builder.py b/src/api/tests/unit/infrastructure/canonical_schema/test_ontology_mutation_builder.py new file mode 100644 index 000000000..24602750c --- /dev/null +++ b/src/api/tests/unit/infrastructure/canonical_schema/test_ontology_mutation_builder.py @@ -0,0 +1,42 @@ +"""Unit tests for ontology to DEFINE mutation conversion.""" + +from __future__ import annotations + +from infrastructure.canonical_schema.ontology_mutation_builder import ( + ontology_config_to_define_operations, +) +from management.domain.value_objects import ( + EdgeTypeDefinition, + NodeTypeDefinition, + OntologyConfig, +) + + +class TestOntologyConfigToDefineOperations: + def test_converts_node_and_edge_types(self): + config = OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository", description="Repo"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + description="Contains relationship", + source_labels=("Repository",), + target_labels=("Repository",), + properties=("weight",), + ), + ), + ) + + operations = ontology_config_to_define_operations(config) + + assert len(operations) == 2 + node_op = operations[0] + edge_op = operations[1] + assert node_op.op == "DEFINE" + assert node_op.type == "node" + assert node_op.label == "Repository" + assert node_op.description == "Repo" + assert edge_op.op == "DEFINE" + assert edge_op.type == "edge" + assert edge_op.label == "CONTAINS" + assert edge_op.required_properties == {"weight"} diff --git a/src/api/tests/unit/management/application/test_canonical_schema_service.py b/src/api/tests/unit/management/application/test_canonical_schema_service.py new file mode 100644 index 000000000..07f41aae7 --- /dev/null +++ b/src/api/tests/unit/management/application/test_canonical_schema_service.py @@ -0,0 +1,134 @@ +"""Unit tests for canonical schema integration in KnowledgeGraphService.""" + +from __future__ import annotations + +import pytest + +from management.application.services.knowledge_graph_service import KnowledgeGraphService +from management.domain.value_objects import ( + EdgeTypeDefinition, + NodeTypeDefinition, + OntologyConfig, +) +from tests.fakes.authorization import InMemoryAuthorizationProvider +from tests.fakes.canonical_schema import InMemoryCanonicalSchemaRepository +from tests.fakes.management import ( + InMemoryDataSourceRepository, + InMemoryKnowledgeGraphRepository, + InMemorySecretStoreRepository, + RecordingKnowledgeGraphServiceProbe, +) +from tests.unit.management.application.test_knowledge_graph_service import ( + _grant_kg_edit, + _grant_kg_view, + _make_kg, +) + + +@pytest.fixture +def canonical_schema_repo(): + return InMemoryCanonicalSchemaRepository() + + +@pytest.fixture +def service_with_canonical( + mock_session, kg_repo, authz, canonical_schema_repo, tenant_id +): + return KnowledgeGraphService( + session=mock_session, + knowledge_graph_repository=kg_repo, + data_source_repository=InMemoryDataSourceRepository(), + secret_store=InMemorySecretStoreRepository(), + authz=authz, + scope_to_tenant=tenant_id, + probe=RecordingKnowledgeGraphServiceProbe(), + canonical_schema_repository=canonical_schema_repo, + ) + + +@pytest.fixture +def mock_session(): + from unittest.mock import AsyncMock, MagicMock + + session = MagicMock() + session.commit = AsyncMock() + session.rollback = AsyncMock() + return session + + +@pytest.fixture +def kg_repo(): + return InMemoryKnowledgeGraphRepository() + + +@pytest.fixture +def authz(): + return InMemoryAuthorizationProvider() + + +@pytest.fixture +def tenant_id(): + return "tenant-123" + + +@pytest.fixture +def user_id(): + return "user-456" + + +class TestKnowledgeGraphServiceCanonicalSchema: + @pytest.mark.asyncio + async def test_save_ontology_writes_to_canonical_repository( + self, service_with_canonical, canonical_schema_repo, authz, kg_repo, user_id + ): + kg = _make_kg() + kg_repo.seed(kg) + await _grant_kg_edit(authz, kg.id.value, user_id) + config = OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), + ), + ), + ) + + await service_with_canonical.save_ontology( + user_id=user_id, + kg_id=kg.id.value, + config=config, + ) + + assert len(canonical_schema_repo.replaced) == 1 + assert canonical_schema_repo.replaced[0][0] == kg.id.value + + @pytest.mark.asyncio + async def test_workspace_readiness_uses_canonical_schema( + self, service_with_canonical, canonical_schema_repo, authz, kg_repo, user_id + ): + kg = _make_kg() + kg_repo.seed(kg) + canonical_schema_repo.seed( + kg.id.value, + OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), + ), + ), + ), + ) + await _grant_kg_view(authz, kg.id.value, user_id) + + result = await service_with_canonical.get_workspace_status( + user_id=user_id, + kg_id=kg.id.value, + ) + + assert result is not None + assert result.transition_eligible is True diff --git a/src/api/tests/unit/management/application/test_knowledge_graph_service.py b/src/api/tests/unit/management/application/test_knowledge_graph_service.py index b90c9bf2f..1d7e63e8e 100644 --- a/src/api/tests/unit/management/application/test_knowledge_graph_service.py +++ b/src/api/tests/unit/management/application/test_knowledge_graph_service.py @@ -156,6 +156,13 @@ def _make_kg( return kg +async def _seed_stored_ontology(kg, kg_repo, config: OntologyConfig) -> None: + """Attach ontology to aggregate and persisted JSONB fallback store.""" + kg.set_ontology(config) + kg_repo.seed(kg) + await kg_repo.save_ontology(kg.id.value, config) + + def _make_ds( ds_id: str = "ds-001", kg_id: str = "kg-001", @@ -445,19 +452,17 @@ async def test_workspace_status_includes_mode_readiness_and_session_pointers( ): """Should project mode/readiness flags and default null session pointers.""" kg = _make_kg() - kg.set_ontology( - OntologyConfig( - node_types=(NodeTypeDefinition(label="Repository"),), - edge_types=( - EdgeTypeDefinition( - label="CONTAINS", - source_labels=("Repository",), - target_labels=("Repository",), - ), + ontology_config = OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), ), - ) + ), ) - kg_repo.seed(kg) + await _seed_stored_ontology(kg, kg_repo, ontology_config) await _grant_kg_view(authz, kg.id.value, user_id) result = await service.get_workspace_status(user_id=user_id, kg_id=kg.id.value) @@ -501,25 +506,23 @@ async def test_workspace_status_fails_for_prepopulated_type_without_instances( ): """Should block transition when prepopulated type has zero instances.""" kg = _make_kg() - kg.set_ontology( - OntologyConfig( - node_types=( - NodeTypeDefinition( - label="Repository", - prepopulated=True, - prepopulated_instance_count=0, - ), + ontology_config = OntologyConfig( + node_types=( + NodeTypeDefinition( + label="Repository", + prepopulated=True, + prepopulated_instance_count=0, ), - edge_types=( - EdgeTypeDefinition( - label="CONTAINS", - source_labels=("Repository",), - target_labels=("Repository",), - ), + ), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), ), - ) + ), ) - kg_repo.seed(kg) + await _seed_stored_ontology(kg, kg_repo, ontology_config) await _grant_kg_view(authz, kg.id.value, user_id) result = await service.get_workspace_status(user_id=user_id, kg_id=kg.id.value) @@ -562,19 +565,17 @@ async def test_transition_workspace_requires_edit_permission( self, service, authz, kg_repo, user_id ): kg = _make_kg() - kg.set_ontology( - OntologyConfig( - node_types=(NodeTypeDefinition(label="Repository"),), - edge_types=( - EdgeTypeDefinition( - label="CONTAINS", - source_labels=("Repository",), - target_labels=("Repository",), - ), + ontology_config = OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), ), - ) + ), ) - kg_repo.seed(kg) + await _seed_stored_ontology(kg, kg_repo, ontology_config) await _grant_kg_view(authz, kg.id.value, user_id) with pytest.raises(UnauthorizedError): @@ -588,19 +589,17 @@ async def test_transition_workspace_changes_mode_and_creates_session_pointer( self, service, authz, kg_repo, user_id ): kg = _make_kg() - kg.set_ontology( - OntologyConfig( - node_types=(NodeTypeDefinition(label="Repository"),), - edge_types=( - EdgeTypeDefinition( - label="CONTAINS", - source_labels=("Repository",), - target_labels=("Repository",), - ), + ontology_config = OntologyConfig( + node_types=(NodeTypeDefinition(label="Repository"),), + edge_types=( + EdgeTypeDefinition( + label="CONTAINS", + source_labels=("Repository",), + target_labels=("Repository",), ), - ) + ), ) - kg_repo.seed(kg) + await _seed_stored_ontology(kg, kg_repo, ontology_config) await _grant_kg_edit(authz, kg.id.value, user_id) result = await service.transition_workspace_to_extraction( From bb655db8027ccd29ac3a9652b5b13efe60ede136 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Fri, 22 May 2026 12:38:25 -0400 Subject: [PATCH 045/153] feat(dev-ui): KG Manage accessibility and state contracts (#725) (#729) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(dev-ui): add graph management parity with shared chat and hybrid panel Align Graph Management step with KG-MANAGE-006–011/016 using a persistent conversation surface, three-mode switcher, and status rail with mode-specific detail. Co-authored-by: Cursor <cursoragent@cursor.com> * feat(dev-ui): add KG manage accessibility and state contracts (#725) Enforce keyboard chat/send behavior, section-specific loading and error UX, and forbidden-action messaging so manage steps stay predictable and accessible. Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Cursor <cursoragent@cursor.com> --- .../extraction/SharedConversationPanel.vue | 110 +- .../pages/knowledge-graphs/[kgId]/manage.vue | 1088 +++++++++++------ src/dev-ui/app/tests/kgManageState.test.ts | 190 +++ .../knowledge-graph-manage-workspace.test.ts | 286 ++++- src/dev-ui/app/utils/kgGraphManagement.ts | 167 +++ src/dev-ui/app/utils/kgManageState.ts | 180 +++ src/dev-ui/vitest.config.ts | 5 + 7 files changed, 1569 insertions(+), 457 deletions(-) create mode 100644 src/dev-ui/app/tests/kgManageState.test.ts create mode 100644 src/dev-ui/app/utils/kgGraphManagement.ts create mode 100644 src/dev-ui/app/utils/kgManageState.ts diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index ca3593441..bbe2edcf5 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -1,9 +1,9 @@ <script setup lang="ts"> import { computed, nextTick, ref, watch } from 'vue' -import { Loader2, RefreshCw } from 'lucide-vue-next' +import { Loader2, RefreshCw, SendHorizontal } from 'lucide-vue-next' import { Button } from '@/components/ui/button' import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' -import { Input } from '@/components/ui/input' +import { Textarea } from '@/components/ui/textarea' import { AlertDialog, AlertDialogAction, @@ -14,6 +14,7 @@ import { AlertDialogHeader, AlertDialogTitle, } from '@/components/ui/alert-dialog' +import { handleChatInputKeydown } from '@/utils/kgManageState' interface ConversationEntry { role?: string @@ -31,18 +32,33 @@ const props = withDefaults(defineProps<{ session: ConversationSession | null loading?: boolean clearing?: boolean + sending?: boolean draftMessage?: string activityLines?: string[] + inputPlaceholder?: string + sessionStatusLabel?: string + inputDisabled?: boolean + inputDisabledReason?: string | null + forbidden?: boolean + forbiddenReason?: string | null }>(), { loading: false, clearing: false, + sending: false, draftMessage: '', activityLines: () => [], + inputPlaceholder: 'Describe what you want to do in this graph management session…', + sessionStatusLabel: 'No active session', + inputDisabled: false, + inputDisabledReason: null, + forbidden: false, + forbiddenReason: null, }) const emit = defineEmits<{ refresh: [] clearChat: [] + sendMessage: [message: string] 'update:draftMessage': [value: string] }>() @@ -56,6 +72,18 @@ const combinedTimelineLength = computed( () => messageHistory.value.length + activityTimeline.value.length, ) +const chatInputDisabled = computed( + () => props.loading || props.clearing || props.sending || props.inputDisabled || props.forbidden, +) + +const chatInputHelp = computed(() => { + if (props.forbidden) { + return props.forbiddenReason ?? 'Chat is unavailable because you lack permission for this action.' + } + if (props.inputDisabledReason) return props.inputDisabledReason + return 'Press Enter to send. Shift+Enter adds a new line.' +}) + watch(combinedTimelineLength, async () => { await nextTick() if (timelineRef.value) { @@ -67,17 +95,43 @@ function confirmClearChat() { clearConfirmOpen.value = false emit('clearChat') } + +function sendDraftMessage() { + const trimmed = props.draftMessage.trim() + if (!trimmed || chatInputDisabled.value) return + emit('sendMessage', trimmed) + emit('update:draftMessage', '') +} + +function onChatInputKeydown(event: KeyboardEvent) { + handleChatInputKeydown(event, sendDraftMessage) +} </script> <template> <Card> <CardHeader> - <CardTitle class="text-base">Conversation</CardTitle> - <CardDescription> - Shared conversation feed for {{ modeLabel }} with server-side session resume. - </CardDescription> + <div class="flex flex-wrap items-start justify-between gap-2"> + <div> + <CardTitle class="text-base">Conversation</CardTitle> + <CardDescription> + Shared conversation feed for {{ modeLabel }} with server-side session resume. + </CardDescription> + </div> + <p class="text-xs text-muted-foreground"> + Session: <span class="font-medium text-foreground">{{ sessionStatusLabel }}</span> + </p> + </div> </CardHeader> <CardContent class="space-y-3"> + <div + v-if="forbidden" + class="rounded border border-destructive/40 bg-destructive/5 px-3 py-2 text-xs text-destructive" + role="alert" + > + {{ forbiddenReason ?? 'You do not have permission to use graph management chat for this knowledge graph.' }} + </div> + <div class="flex items-center justify-between"> <p class="text-xs text-muted-foreground">No local cache: conversation state is server-side only.</p> <Button size="sm" variant="ghost" class="h-7 px-2 text-[11px]" :disabled="loading" @click="emit('refresh')"> @@ -117,21 +171,39 @@ function confirmClearChat() { v-if="messageHistory.length === 0 && activityTimeline.length === 0" class="text-xs text-muted-foreground" > - No messages yet. Use validate/transition actions to drive session activity. + No messages yet. Send a prompt or use validate/transition actions to drive session activity. </p> </div> - <div class="flex items-center gap-2"> - <Input - :model-value="draftMessage" - disabled - placeholder="NDJSON streaming send/receive wiring will attach here." - @update:model-value="(value) => emit('update:draftMessage', value)" - /> - <Button variant="outline" :disabled="clearing || loading" @click="clearConfirmOpen = true"> - <Loader2 v-if="clearing" class="mr-1.5 size-3.5 animate-spin" /> - Clear chat - </Button> + <div class="space-y-2"> + <div class="flex items-start gap-2"> + <Textarea + :model-value="draftMessage" + :disabled="chatInputDisabled" + :placeholder="inputPlaceholder" + class="min-h-20" + aria-label="Graph management chat input" + @update:model-value="(value) => emit('update:draftMessage', value)" + @keydown="onChatInputKeydown" + /> + <Button + variant="default" + class="shrink-0" + :disabled="chatInputDisabled || !draftMessage.trim()" + :title="chatInputHelp" + @click="sendDraftMessage" + > + <Loader2 v-if="sending" class="size-3.5 animate-spin" /> + <SendHorizontal v-else class="size-3.5" /> + </Button> + </div> + <div class="flex flex-wrap items-center justify-between gap-2"> + <p class="text-[11px] text-muted-foreground">{{ chatInputHelp }}</p> + <Button variant="outline" :disabled="clearing || loading || forbidden" @click="clearConfirmOpen = true"> + <Loader2 v-if="clearing" class="mr-1.5 size-3.5 animate-spin" /> + Clear chat + </Button> + </div> </div> </CardContent> </Card> @@ -141,7 +213,7 @@ function confirmClearChat() { <AlertDialogHeader> <AlertDialogTitle>Clear conversation?</AlertDialogTitle> <AlertDialogDescription> - This starts a fresh server-side session timeline for the current mode. + This starts a fresh server-side session timeline while keeping the selected graph management mode. </AlertDialogDescription> </AlertDialogHeader> <AlertDialogFooter> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 5ad56ad70..70b8b528d 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -6,8 +6,21 @@ import { Button } from '@/components/ui/button' import { Badge } from '@/components/ui/badge' import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' import { Separator } from '@/components/ui/separator' -import { Tabs, TabsList, TabsTrigger, TabsContent } from '@/components/ui/tabs' import SharedConversationPanel from '@/components/extraction/SharedConversationPanel.vue' +import { + GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS, + GRAPH_MANAGEMENT_MODE_LABELS, + GRAPH_MANAGEMENT_MODE_ORDER, + buildGraphManagementRailItems, + buildGraphManagementStepUrl, + filterRailItemsForMode, + parseGraphManagementModeQuery, + resolveDefaultGraphManagementMode, + resolveRailSelectionForMode, + resolveSharedSessionMode, + type GraphManagementMode, + type GraphManagementRailItemId, +} from '@/utils/kgGraphManagement' import { buildDataSourcesStepUrl, buildMaintainStepUrl, @@ -19,6 +32,15 @@ import { stepStatusTintClass, type WorkspaceStepId, } from '@/utils/kgManageWorkspace' +import { + appendLocalChatMessage, + buildTransitionRestrictionReason, + handleActivatableKeydown, + isForbiddenHttpError, + resolveForbiddenReason, + resolveSectionState, + shouldApplyMutationResult, +} from '@/utils/kgManageState' interface WorkspaceReadinessStatus { has_minimum_entity_types: boolean @@ -108,19 +130,28 @@ const kgIdentity = ref<KnowledgeGraphIdentity | null>(null) const dataSourceCount = ref(0) const maintenanceReadyCount = ref(0) const loading = ref(false) +const workspaceLoadError = ref<string | null>(null) +const workspaceForbidden = ref(false) +const workspaceForbiddenReason = ref<string | null>(null) const validating = ref(false) const transitioning = ref(false) const sessionLoading = ref(false) const sessionHistoryLoading = ref(false) +const sessionLoadError = ref<string | null>(null) +const sessionForbidden = ref(false) +const sessionForbiddenReason = ref<string | null>(null) const clearingChat = ref(false) +const sendingChat = ref(false) const extractionSession = ref<ExtractionSessionResponse | null>(null) const sessionHistory = ref<ExtractionSessionHistoryItem[]>([]) -const extractionTab = ref('extraction-jobs') const draftMessage = ref('') const statusProjection = ref<WorkspaceStatusResponse | null>(null) const mutationLogLoading = ref(false) +const mutationLogLoadError = ref<string | null>(null) const mutationLogRuns = ref<MutationLogRunView[]>([]) const selectedMutationLogRunId = ref<string | null>(null) +const graphManagementMode = ref<GraphManagementMode>('initial-schema-design') +const selectedRailItemId = ref<GraphManagementRailItemId | null>(null) const activeStep = computed(() => parseManageStepQuery(route.query.step)) const showOverview = computed(() => activeStep.value === null) @@ -146,10 +177,54 @@ const modeLabel = computed(() => : 'Schema Bootstrap', ) -const sessionMode = computed<'schema_bootstrap' | 'extraction_operations'>(() => - statusProjection.value?.workspace_mode === 'extraction_operations' - ? 'extraction_operations' - : 'schema_bootstrap', +const stepBadgeLabel = computed(() => { + if (activeStep.value === 'graph-management') { + return graphManagementModeLabel.value + } + return modeLabel.value +}) + +const sharedSessionMode = computed<'schema_bootstrap' | 'extraction_operations'>(() => + resolveSharedSessionMode( + statusProjection.value?.workspace_mode ?? 'schema_bootstrap', + ), +) + +const graphManagementModeLabel = computed( + () => GRAPH_MANAGEMENT_MODE_LABELS[graphManagementMode.value], +) + +const graphManagementInputPlaceholder = computed( + () => GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS[graphManagementMode.value], +) + +const sessionStatusLabel = computed(() => { + if (sessionLoading.value) return 'Loading session' + if (clearingChat.value) return 'Resetting chat' + if (extractionSession.value?.id) { + return `Active · ${extractionSession.value.id.slice(0, 8)}` + } + return 'No active session' +}) + +const graphManagementRailItems = computed(() => { + if (!statusProjection.value) return [] + return buildGraphManagementRailItems({ + workspaceMode: statusProjection.value.workspace_mode, + transitionEligible: statusProjection.value.transition_eligible, + blockingReasonCount: statusProjection.value.readiness.blocking_reasons.length, + prepopulatedGapCount: statusProjection.value.readiness.prepopulated_types_without_instances.length, + sessionUpdatedAt: extractionSession.value?.updated_at ?? null, + hasActiveSession: Boolean(extractionSession.value?.id), + }) +}) + +const visibleRailItems = computed(() => + filterRailItemsForMode(graphManagementRailItems.value, graphManagementMode.value), +) + +const selectedRailItem = computed(() => + visibleRailItems.value.find((item) => item.id === selectedRailItemId.value) ?? null, ) const canTransition = computed(() => @@ -157,6 +232,47 @@ const canTransition = computed(() => && statusProjection.value?.transition_eligible === true, ) +const transitionRestrictionReason = computed(() => + buildTransitionRestrictionReason( + canTransition.value, + statusProjection.value?.readiness.blocking_reasons ?? [], + ), +) + +const workspaceOverviewState = computed(() => + resolveSectionState({ + section: 'workspace-overview', + loading: loading.value, + error: workspaceLoadError.value, + forbidden: workspaceForbidden.value, + forbiddenReason: workspaceForbiddenReason.value, + }), +) + +const mutationLogsSectionState = computed(() => + resolveSectionState({ + section: 'mutation-logs', + loading: mutationLogLoading.value, + error: mutationLogLoadError.value, + forbidden: workspaceForbidden.value, + forbiddenReason: workspaceForbiddenReason.value, + empty: !mutationLogLoading.value + && !mutationLogLoadError.value + && mutationLogRuns.value.length === 0, + emptyActionLabel: 'Refresh runs', + }), +) + +const graphManagementSectionState = computed(() => + resolveSectionState({ + section: 'graph-management', + loading: sessionLoading.value, + error: sessionLoadError.value, + forbidden: sessionForbidden.value, + forbiddenReason: sessionForbiddenReason.value, + }), +) + const selectedMutationLogRun = computed(() => mutationLogRuns.value.find((run) => run.id === selectedMutationLogRunId.value) ?? null, ) @@ -256,15 +372,30 @@ function returnToWorkspaceOverview() { async function loadWorkspaceStatus() { if (!hasTenant.value || !kgId.value) return loading.value = true + workspaceLoadError.value = null try { statusProjection.value = await apiFetch<WorkspaceStatusResponse>( `/management/knowledge-graphs/${kgId.value}/workspace-status`, ) + workspaceForbidden.value = false + workspaceForbiddenReason.value = null } catch (err) { - statusProjection.value = null - toast.error('Failed to load knowledge graph workspace', { - description: extractErrorMessage(err), - }) + if (isForbiddenHttpError(err)) { + workspaceForbidden.value = true + workspaceForbiddenReason.value = resolveForbiddenReason( + err, + 'You do not have permission to view this knowledge graph workspace.', + ) + statusProjection.value = null + } else { + workspaceForbidden.value = false + workspaceForbiddenReason.value = null + statusProjection.value = null + workspaceLoadError.value = extractErrorMessage(err) + toast.error('Failed to load knowledge graph workspace', { + description: workspaceLoadError.value, + }) + } } finally { loading.value = false } @@ -273,6 +404,7 @@ async function loadWorkspaceStatus() { async function loadMutationLogRuns() { if (!hasTenant.value || !kgId.value) return mutationLogLoading.value = true + mutationLogLoadError.value = null try { const dataSources = await apiFetch<DataSourceRef[]>( `/management/knowledge-graphs/${kgId.value}/data-sources`, @@ -307,28 +439,50 @@ async function loadMutationLogRuns() { selectedMutationLogRunId.value = collected[0]?.id ?? null } } catch (err) { + if (isForbiddenHttpError(err)) { + mutationLogLoadError.value = resolveForbiddenReason( + err, + 'You do not have permission to view mutation logs for this graph.', + ) + } else { + mutationLogLoadError.value = extractErrorMessage(err) + toast.error('Failed to load mutation log runs', { + description: mutationLogLoadError.value, + }) + } mutationLogRuns.value = [] selectedMutationLogRunId.value = null - toast.error('Failed to load mutation log runs', { - description: extractErrorMessage(err), - }) } finally { mutationLogLoading.value = false } } async function loadExtractionSession() { - if (!kgId.value) return + if (!kgId.value || activeStep.value !== 'graph-management') return sessionLoading.value = true + sessionLoadError.value = null try { extractionSession.value = await apiFetch<ExtractionSessionResponse>( - `/extraction/knowledge-graphs/${kgId.value}/sessions/${sessionMode.value}/active`, + `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/active`, ) + sessionForbidden.value = false + sessionForbiddenReason.value = null } catch (err) { extractionSession.value = null - toast.error('Failed to load extraction conversation', { - description: extractErrorMessage(err), - }) + if (isForbiddenHttpError(err)) { + sessionForbidden.value = true + sessionForbiddenReason.value = resolveForbiddenReason( + err, + 'You do not have permission to manage this knowledge graph.', + ) + } else { + sessionForbidden.value = false + sessionForbiddenReason.value = null + sessionLoadError.value = extractErrorMessage(err) + toast.error('Failed to load extraction conversation', { + description: sessionLoadError.value, + }) + } } finally { sessionLoading.value = false } @@ -339,7 +493,7 @@ async function loadSessionHistory() { sessionHistoryLoading.value = true try { const response = await apiFetch<{ sessions: ExtractionSessionHistoryItem[] }>( - `/extraction/knowledge-graphs/${kgId.value}/sessions/${sessionMode.value}/history`, + `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/history`, ) sessionHistory.value = response.sessions } catch (err) { @@ -352,8 +506,83 @@ async function loadSessionHistory() { } } +function syncGraphManagementState() { + if (activeStep.value !== 'graph-management') return + const fromQuery = parseGraphManagementModeQuery(route.query.gm_mode) + graphManagementMode.value = fromQuery + ?? resolveDefaultGraphManagementMode( + statusProjection.value?.workspace_mode ?? 'schema_bootstrap', + ) + selectedRailItemId.value = resolveRailSelectionForMode( + selectedRailItemId.value, + graphManagementMode.value, + graphManagementRailItems.value, + ) +} + +function setGraphManagementMode(mode: GraphManagementMode) { + graphManagementMode.value = mode + selectedRailItemId.value = resolveRailSelectionForMode( + selectedRailItemId.value, + mode, + graphManagementRailItems.value, + ) + navigateTo(buildGraphManagementStepUrl(kgId.value, mode), { replace: true }) +} + +function selectRailItem(itemId: GraphManagementRailItemId) { + selectedRailItemId.value = itemId +} + +function onRailKeydown(event: KeyboardEvent, itemId: GraphManagementRailItemId) { + handleActivatableKeydown(event, () => selectRailItem(itemId)) +} + +function onStepActionKeydown(event: KeyboardEvent, stepId: WorkspaceStepId) { + handleActivatableKeydown(event, () => openWorkspaceStep(stepId)) +} + +function onModeSwitchKeydown(event: KeyboardEvent, mode: GraphManagementMode) { + handleActivatableKeydown(event, () => setGraphManagementMode(mode)) +} + +function selectMutationLogRun(runId: string) { + selectedMutationLogRunId.value = runId +} + +function onMutationRunKeydown(event: KeyboardEvent, runId: string) { + handleActivatableKeydown(event, () => selectMutationLogRun(runId)) +} + +function sendChatMessage(message: string) { + if (sessionForbidden.value || !shouldApplyMutationResult(sessionForbidden.value)) { + toast.error('Chat unavailable', { + description: sessionForbiddenReason.value + ?? 'You do not have permission to send messages for this knowledge graph.', + }) + return + } + + sendingChat.value = true + try { + const nextHistory = appendLocalChatMessage(extractionSession.value, message) + extractionSession.value = { + ...(extractionSession.value ?? { + id: 'local-session', + runtime_context: {}, + updated_at: new Date().toISOString(), + }), + message_history: nextHistory, + updated_at: new Date().toISOString(), + } + draftMessage.value = '' + } finally { + sendingChat.value = false + } +} + async function validateWorkspace() { - if (!kgId.value) return + if (!kgId.value || workspaceForbidden.value) return validating.value = true try { statusProjection.value = await apiFetch<WorkspaceStatusResponse>( @@ -362,17 +591,26 @@ async function validateWorkspace() { ) toast.success('Workspace validation complete') } catch (err) { - toast.error('Validation failed', { - description: extractErrorMessage(err), - }) + if (isForbiddenHttpError(err)) { + workspaceForbidden.value = true + workspaceForbiddenReason.value = resolveForbiddenReason( + err, + 'You do not have permission to validate this workspace.', + ) + } else { + toast.error('Validation failed', { + description: extractErrorMessage(err), + }) + } } finally { validating.value = false } } async function transitionToExtraction() { - if (!kgId.value || !canTransition.value) return + if (!kgId.value || !canTransition.value || workspaceForbidden.value) return transitioning.value = true + const previousStatus = statusProjection.value try { statusProjection.value = await apiFetch<WorkspaceStatusResponse>( `/management/knowledge-graphs/${kgId.value}/workspace/transition-to-extraction`, @@ -381,9 +619,18 @@ async function transitionToExtraction() { toast.success('Workspace transitioned to extraction operations') await loadExtractionSession() } catch (err) { - toast.error('Transition failed', { - description: extractErrorMessage(err), - }) + statusProjection.value = previousStatus + if (isForbiddenHttpError(err)) { + workspaceForbidden.value = true + workspaceForbiddenReason.value = resolveForbiddenReason( + err, + 'You do not have permission to transition this workspace.', + ) + } else { + toast.error('Transition failed', { + description: extractErrorMessage(err), + }) + } } finally { transitioning.value = false } @@ -391,11 +638,11 @@ async function transitionToExtraction() { async function clearChat() { // Clear chat resets the active extraction session for this knowledge graph. - if (!kgId.value) return + if (!kgId.value || sessionForbidden.value) return clearingChat.value = true try { extractionSession.value = await apiFetch<ExtractionSessionResponse>( - `/extraction/knowledge-graphs/${kgId.value}/sessions/${sessionMode.value}/clear-chat`, + `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/clear-chat`, { method: 'POST' }, ) toast.success('Extraction chat cleared') @@ -422,6 +669,13 @@ watch(tenantVersion, () => { extractionSession.value = null dataSourceCount.value = 0 maintenanceReadyCount.value = 0 + workspaceLoadError.value = null + workspaceForbidden.value = false + workspaceForbiddenReason.value = null + mutationLogLoadError.value = null + sessionLoadError.value = null + sessionForbidden.value = false + sessionForbiddenReason.value = null loadKgIdentity() loadWorkspaceStatus() loadOverviewMetrics() @@ -430,8 +684,19 @@ watch(tenantVersion, () => { watch( () => statusProjection.value?.workspace_mode, - (mode) => { - if (mode) { + () => { + if (activeStep.value === 'graph-management') { + syncGraphManagementState() + loadExtractionSession() + } + }, +) + +watch( + () => [activeStep.value, route.query.gm_mode] as const, + () => { + if (activeStep.value === 'graph-management') { + syncGraphManagementState() loadExtractionSession() loadSessionHistory() } @@ -445,14 +710,17 @@ watch( <div class="space-y-1"> <div class="flex items-center gap-2"> <h1 class="text-2xl font-semibold tracking-tight">{{ graphHeaderTitle }}</h1> - <Badge v-if="!showOverview" variant="secondary">{{ modeLabel }}</Badge> + <Badge v-if="!showOverview" variant="secondary">{{ stepBadgeLabel }}</Badge> </div> <p class="text-sm text-muted-foreground"> <template v-if="showOverview"> Project workspace for knowledge graph {{ kgId }}. </template> + <template v-else-if="activeStep === 'graph-management'"> + Conversation-first graph management with shared session and mode-specific workspace panels. + </template> <template v-else> - Validate readiness and move from schema bootstrap to extraction operations. + Knowledge-graph scoped mutation run visibility and run metrics. </template> </p> </div> @@ -472,9 +740,34 @@ watch( Select a tenant to manage this workspace. </div> - <div v-else-if="loading" class="flex items-center gap-2 text-sm text-muted-foreground"> + <div + v-else-if="workspaceOverviewState.phase === 'loading'" + class="flex items-center gap-2 text-sm text-muted-foreground" + role="status" + > <Loader2 class="size-4 animate-spin" /> - Loading workspace status... + {{ workspaceOverviewState.message }} + </div> + + <div + v-else-if="workspaceOverviewState.phase === 'forbidden'" + class="rounded-lg border border-destructive/40 bg-destructive/5 p-6 text-sm" + role="alert" + > + <p class="font-medium text-destructive">{{ workspaceOverviewState.title }}</p> + <p class="mt-1 text-muted-foreground">{{ workspaceOverviewState.message }}</p> + </div> + + <div + v-else-if="workspaceOverviewState.phase === 'error'" + class="rounded-lg border border-dashed p-6 text-sm" + role="alert" + > + <p class="font-medium">{{ workspaceOverviewState.title }}</p> + <p class="mt-1 text-muted-foreground">{{ workspaceOverviewState.message }}</p> + <Button class="mt-3" size="sm" variant="outline" @click="loadWorkspaceStatus"> + Retry workspace load + </Button> </div> <template v-else-if="statusProjection"> @@ -517,7 +810,9 @@ watch( <Button class="w-full" variant="outline" + tabindex="0" @click="openWorkspaceStep(card.id)" + @keydown="onStepActionKeydown($event, card.id)" > {{ card.actionLabel }} </Button> @@ -527,7 +822,26 @@ watch( </section> <section v-else-if="activeStep === 'mutation-logs'" class="space-y-4"> - <Card> + <div + v-if="mutationLogsSectionState.phase === 'forbidden'" + class="rounded-lg border border-destructive/40 bg-destructive/5 p-4 text-sm" + role="alert" + > + <p class="font-medium text-destructive">{{ mutationLogsSectionState.title }}</p> + <p class="mt-1 text-muted-foreground">{{ mutationLogsSectionState.message }}</p> + </div> + <div + v-else-if="mutationLogsSectionState.phase === 'error'" + class="rounded-lg border border-dashed p-4 text-sm" + role="alert" + > + <p class="font-medium">{{ mutationLogsSectionState.title }}</p> + <p class="mt-1 text-muted-foreground">{{ mutationLogsSectionState.message }}</p> + <Button class="mt-3" size="sm" variant="outline" @click="loadMutationLogRuns"> + Retry mutation log load + </Button> + </div> + <Card v-else> <CardHeader> <CardTitle class="text-base">MutationLogs</CardTitle> <CardDescription> @@ -542,20 +856,29 @@ watch( Refresh </Button> </div> - <div v-if="mutationLogLoading" class="flex items-center gap-2 px-3 py-4 text-xs text-muted-foreground"> + <div v-if="mutationLogLoading" class="flex items-center gap-2 px-3 py-4 text-xs text-muted-foreground" role="status"> <Loader2 class="size-3.5 animate-spin" /> - Loading mutation runs... + {{ mutationLogsSectionState.message }} </div> - <div v-else-if="mutationLogRuns.length === 0" class="px-3 py-4 text-xs text-muted-foreground"> - No mutation log runs found for this knowledge graph yet. + <div + v-else-if="mutationLogRuns.length === 0" + class="space-y-2 px-3 py-4 text-xs text-muted-foreground" + > + <p>{{ mutationLogsSectionState.message }}</p> + <Button size="sm" variant="outline" @click="loadMutationLogRuns"> + {{ mutationLogsSectionState.actionLabel ?? 'Refresh runs' }} + </Button> </div> <div v-else class="max-h-64 overflow-auto p-2 space-y-1.5"> <button v-for="run in mutationLogRuns" :key="run.id" - class="w-full rounded border px-2 py-1.5 text-left text-xs transition-colors" + type="button" + tabindex="0" + class="w-full rounded border px-2 py-1.5 text-left text-xs transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" :class="selectedMutationLogRunId === run.id ? 'border-primary bg-primary/5' : 'hover:bg-muted/40'" - @click="selectedMutationLogRunId = run.id" + @click="selectMutationLogRun(run.id)" + @keydown="onMutationRunKeydown($event, run.id)" > <p class="font-medium truncate">{{ run.data_source_name }}</p> <p class="text-muted-foreground truncate">{{ new Date(run.started_at).toLocaleString() }}</p> @@ -635,394 +958,355 @@ watch( </Card> </section> - <section v-else class="space-y-6"> - <Card> - <CardHeader> - <CardTitle class="text-base">Mode & Transition Controls</CardTitle> - <CardDescription> - Validate current readiness and transition when eligible. - </CardDescription> - </CardHeader> - <CardContent class="flex flex-wrap gap-2"> - <Button variant="outline" :disabled="validating || transitioning" @click="validateWorkspace"> - <Loader2 v-if="validating" class="mr-1.5 size-3.5 animate-spin" /> - <CheckCircle2 v-else class="mr-1.5 size-3.5" /> - Validate + <section v-else-if="activeStep === 'graph-management'" class="space-y-4"> + <div + v-if="graphManagementSectionState.phase === 'error'" + class="rounded-lg border border-dashed p-4 text-sm" + role="alert" + > + <p class="font-medium">{{ graphManagementSectionState.title }}</p> + <p class="mt-1 text-muted-foreground">{{ graphManagementSectionState.message }}</p> + <Button class="mt-3" size="sm" variant="outline" @click="loadExtractionSession"> + Retry session load </Button> - <Button - :disabled="!canTransition || transitioning || validating" - @click="transitionToExtraction" - > - <Loader2 v-if="transitioning" class="mr-1.5 size-3.5 animate-spin" /> - <PlayCircle v-else class="mr-1.5 size-3.5" /> - Go to Extraction/Mutations - </Button> - <Badge :variant="canTransition ? 'default' : 'secondary'"> - {{ canTransition ? 'Transition eligible' : 'Transition blocked' }} - </Badge> - </CardContent> - </Card> - - <Card> - <CardHeader> - <CardTitle class="text-base">Readiness Results</CardTitle> - <CardDescription> - Bootstrap readiness requirements from workspace validation. - </CardDescription> - </CardHeader> - <CardContent class="space-y-4 text-sm"> - <div class="rounded border p-3"> - <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> - Bootstrap Progress Checklist - </p> - <div class="space-y-2"> - <div - v-for="item in progressChecklist" - :key="item.id" - class="rounded border px-3 py-2" - > - <div class="flex items-center justify-between"> - <p class="font-medium">{{ item.label }}</p> - <Badge :variant="item.passed ? 'default' : 'destructive'"> - {{ item.passed ? 'Pass' : 'Fail' }} - </Badge> - </div> - <p class="mt-1 text-xs text-muted-foreground"> - {{ item.passed ? item.passDetail : item.failDetail }} - </p> - </div> - </div> - </div> - - <div class="flex items-center justify-between rounded border px-3 py-2"> - <span>Has minimum entity types</span> - <Badge :variant="statusProjection.readiness.has_minimum_entity_types ? 'default' : 'destructive'"> - {{ statusProjection.readiness.has_minimum_entity_types ? 'Yes' : 'No' }} - </Badge> - </div> - <div class="flex items-center justify-between rounded border px-3 py-2"> - <span>Has minimum relationship types</span> - <Badge :variant="statusProjection.readiness.has_minimum_relationship_types ? 'default' : 'destructive'"> - {{ statusProjection.readiness.has_minimum_relationship_types ? 'Yes' : 'No' }} - </Badge> - </div> - <div class="flex items-center justify-between rounded border px-3 py-2"> - <span>Prepopulated types ready</span> - <Badge :variant="statusProjection.readiness.prepopulated_types_ready ? 'default' : 'destructive'"> - {{ statusProjection.readiness.prepopulated_types_ready ? 'Yes' : 'No' }} - </Badge> - </div> + </div> - <div class="rounded border p-3"> - <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> - Validation Diagnostics - </p> + <Card class="graph-management-controls"> + <CardHeader class="pb-3"> + <CardTitle class="text-base">Graph Management</CardTitle> + <CardDescription> + Shared chat session with mode-specific assistant framing and workspace panels. + </CardDescription> + </CardHeader> + <CardContent class="space-y-3"> <div - v-if="statusProjection.readiness.prepopulated_types_without_instances.length > 0" - class="rounded border border-amber-400/60 bg-amber-50/60 p-2 text-xs dark:border-amber-800 dark:bg-amber-950/20" + class="flex flex-wrap gap-2" + role="tablist" + aria-label="Graph management modes" > - <p class="font-medium text-amber-800 dark:text-amber-300"> - Prepopulated types missing instances - </p> - <ul class="mt-1 list-disc space-y-1 pl-4 text-muted-foreground"> - <li - v-for="typeLabel in statusProjection.readiness.prepopulated_types_without_instances" - :key="typeLabel" - > - {{ typeLabel }} - </li> - </ul> - </div> - - <div v-if="statusProjection.readiness.blocking_reasons.length > 0" class="mt-2 rounded border border-destructive/50 p-3"> - <p class="mb-1 text-xs font-medium text-destructive flex items-center gap-1.5"> - <ShieldAlert class="size-3.5" /> - Blocking reasons - </p> - <ul class="list-disc pl-4 text-xs text-muted-foreground space-y-1"> - <li v-for="reason in statusProjection.readiness.blocking_reasons" :key="reason"> - {{ reason }} - </li> - </ul> + <Button + v-for="mode in GRAPH_MANAGEMENT_MODE_ORDER" + :key="mode" + size="sm" + role="tab" + :aria-selected="graphManagementMode === mode" + tabindex="0" + :variant="graphManagementMode === mode ? 'default' : 'outline'" + @click="setGraphManagementMode(mode)" + @keydown="onModeSwitchKeydown($event, mode)" + > + {{ GRAPH_MANAGEMENT_MODE_LABELS[mode] }} + </Button> </div> - <p - v-else-if="statusProjection.readiness.prepopulated_types_without_instances.length === 0" - class="text-xs text-muted-foreground" - > - No validation diagnostics are currently blocking transition. - </p> - </div> - - <div class="rounded border p-3"> - <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> - Next Steps - </p> - <ul class="list-disc pl-4 text-xs text-muted-foreground space-y-1"> - <li v-for="step in nextSteps" :key="step">{{ step }}</li> - </ul> - </div> - - </CardContent> - </Card> - - <Card> - <CardHeader> - <CardTitle class="text-base">Session Pointers</CardTitle> - <CardDescription> - Active and recent extraction session references for this knowledge graph. - </CardDescription> - </CardHeader> - <CardContent class="grid gap-2 md:grid-cols-3 text-xs"> - <div class="rounded border px-3 py-2"> - <p class="text-muted-foreground">Active schema bootstrap session</p> - <p class="font-mono break-all mt-1"> - {{ statusProjection.session_pointers.active_schema_bootstrap_session_id ?? 'None' }} - </p> - </div> - <div class="rounded border px-3 py-2"> - <p class="text-muted-foreground">Active extraction operations session</p> - <p class="font-mono break-all mt-1"> - {{ statusProjection.session_pointers.active_extraction_operations_session_id ?? 'None' }} - </p> - </div> - <div class="rounded border px-3 py-2"> - <p class="text-muted-foreground">Most recent completed session</p> - <p class="font-mono break-all mt-1"> - {{ statusProjection.session_pointers.most_recent_completed_session_id ?? 'None' }} - </p> - </div> - </CardContent> - <CardContent class="space-y-3 border-t pt-4"> - <div class="flex items-center justify-between"> - <p class="text-xs font-medium uppercase tracking-wider text-muted-foreground"> - Session History - </p> - <Button - size="sm" - variant="ghost" - class="h-6 px-2 text-[10px]" - :disabled="sessionHistoryLoading" - @click="loadSessionHistory" - > - Refresh - </Button> - </div> - <div - v-if="sessionHistoryLoading" - class="flex items-center gap-2 text-xs text-muted-foreground" - > - <Loader2 class="size-3.5 animate-spin" /> - Loading session history... - </div> - <div - v-else-if="sessionHistory.length === 0" - class="rounded border border-dashed px-3 py-4 text-xs text-muted-foreground" - > - No archived or active sessions found for this scope yet. - </div> - <div v-else class="space-y-2"> - <div - v-for="entry in sessionHistory" - :key="entry.id" - class="rounded border px-3 py-2 text-xs" - > - <div class="flex flex-wrap items-center justify-between gap-2"> - <p class="font-mono break-all">{{ entry.id }}</p> - <Badge :variant="entry.is_active ? 'default' : 'secondary'"> - {{ entry.is_active ? 'Active' : 'Archived' }} - </Badge> - </div> - <p class="mt-1 text-muted-foreground"> - Updated {{ new Date(entry.updated_at).toLocaleString() }} - <span v-if="entry.archived_at"> - · Archived {{ new Date(entry.archived_at).toLocaleString() }} - </span> - </p> - <p class="mt-1 text-muted-foreground"> - {{ entry.message_count }} message(s) - · {{ entry.run_metrics.length }} linked run(s) - </p> - <div - v-if="entry.run_metrics.length > 0" - class="mt-2 space-y-1.5 rounded border bg-muted/20 p-2" + <div class="flex flex-wrap items-center gap-2"> + <Badge variant="outline">{{ sessionStatusLabel }}</Badge> + <Button + variant="outline" + size="sm" + :disabled="validating || transitioning || workspaceForbidden" + :title="workspaceForbiddenReason ?? undefined" + @click="validateWorkspace" > - <div - v-for="metric in entry.run_metrics" - :key="metric.sync_run_id" - class="flex flex-wrap items-center justify-between gap-2" - > - <span class="font-mono">{{ metric.mutation_log_id ?? metric.sync_run_id }}</span> - <span class="text-muted-foreground"> - {{ metric.token_usage_total ?? 0 }} tokens · - ${{ (metric.cost_total_usd ?? 0).toFixed(2) }} - </span> - </div> - </div> + <Loader2 v-if="validating" class="mr-1.5 size-3.5 animate-spin" /> + <CheckCircle2 v-else class="mr-1.5 size-3.5" /> + Validate + </Button> + <Badge :variant="canTransition ? 'default' : 'secondary'"> + {{ canTransition ? 'Transition eligible' : 'Transition blocked' }} + </Badge> </div> - </div> - </CardContent> - </Card> + </CardContent> + </Card> - <div class="space-y-4"> <SharedConversationPanel v-model:draft-message="draftMessage" - :mode-label="modeLabel" + :mode-label="graphManagementModeLabel" + :input-placeholder="graphManagementInputPlaceholder" + :session-status-label="sessionStatusLabel" :session="extractionSession" :loading="sessionLoading" :clearing="clearingChat" + :sending="sendingChat" :activity-lines="sessionActivityLines" + :forbidden="sessionForbidden" + :forbidden-reason="sessionForbiddenReason" + :input-disabled="workspaceForbidden" + :input-disabled-reason="workspaceForbiddenReason" @refresh="loadExtractionSession" @clear-chat="clearChat" + @send-message="sendChatMessage" /> - <Card v-if="statusProjection.workspace_mode === 'extraction_operations'"> - <CardHeader> - <CardTitle class="text-base">Operations Workspace</CardTitle> - <CardDescription> - Tabbed controls for extraction jobs, manual mutations, and run/log visibility. - </CardDescription> - </CardHeader> - <CardContent> - <Tabs v-model="extractionTab" class="w-full"> - <TabsList class="grid w-full grid-cols-3"> - <TabsTrigger value="extraction-jobs">Extraction Jobs</TabsTrigger> - <TabsTrigger value="manual-mutations">Manual Mutations</TabsTrigger> - <TabsTrigger value="run-logs">Run/Logs</TabsTrigger> - </TabsList> - <TabsContent value="extraction-jobs" class="mt-3 space-y-2 text-sm"> + <div class="grid gap-4 xl:grid-cols-[280px_1fr]"> + <div + class="graph-management-rail rounded border" + role="listbox" + aria-label="Graph management status and artifacts" + > + <div class="border-b px-3 py-2"> + <p class="text-xs font-medium text-muted-foreground">Status & artifacts</p> + </div> + <div class="space-y-1.5 p-2"> + <button + v-for="item in visibleRailItems" + :key="item.id" + type="button" + role="option" + :aria-selected="selectedRailItemId === item.id" + tabindex="0" + class="w-full rounded border px-2 py-2 text-left text-xs transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" + :class="[ + stepStatusTintClass(item.status), + selectedRailItemId === item.id ? 'border-primary ring-1 ring-primary/30' : 'hover:bg-muted/40', + ]" + @click="selectRailItem(item.id)" + @keydown="onRailKeydown($event, item.id)" + > + <div class="flex items-center justify-between gap-2"> + <p class="font-medium">{{ item.label }}</p> + <Badge variant="outline" class="text-[10px]">{{ item.status }}</Badge> + </div> + <p class="mt-1 text-muted-foreground">{{ item.detailHint }}</p> + <p class="mt-1 text-[10px] text-muted-foreground">Updated {{ item.lastUpdated }}</p> + </button> + </div> + </div> + + <Card class="graph-management-detail"> + <CardHeader class="pb-3"> + <CardTitle class="text-base"> + {{ selectedRailItem?.label ?? 'Workspace detail' }} + </CardTitle> + <CardDescription> + Mode: + <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span> + </CardDescription> + </CardHeader> + <CardContent class="space-y-4 text-sm"> + <template v-if="selectedRailItemId === 'schema-readiness'"> + <div class="rounded border p-3"> + <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> + Bootstrap Progress Checklist + </p> + <div class="space-y-2"> + <div + v-for="item in progressChecklist" + :key="item.id" + class="rounded border px-3 py-2" + > + <div class="flex items-center justify-between"> + <p class="font-medium">{{ item.label }}</p> + <Badge :variant="item.passed ? 'default' : 'destructive'"> + {{ item.passed ? 'Pass' : 'Fail' }} + </Badge> + </div> + <p class="mt-1 text-xs text-muted-foreground"> + {{ item.passed ? item.passDetail : item.failDetail }} + </p> + </div> + </div> + </div> + <div class="flex flex-wrap gap-2"> + <Button variant="outline" :disabled="validating || transitioning || workspaceForbidden" @click="validateWorkspace"> + <Loader2 v-if="validating" class="mr-1.5 size-3.5 animate-spin" /> + <CheckCircle2 v-else class="mr-1.5 size-3.5" /> + Validate + </Button> + <Button + :disabled="!canTransition || transitioning || validating || workspaceForbidden" + :title="transitionRestrictionReason ?? undefined" + @click="transitionToExtraction" + > + <Loader2 v-if="transitioning" class="mr-1.5 size-3.5 animate-spin" /> + <PlayCircle v-else class="mr-1.5 size-3.5" /> + Go to Extraction/Mutations + </Button> + </div> + </template> + + <template v-else-if="selectedRailItemId === 'validation-diagnostics'"> + <div class="rounded border p-3"> + <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> + Validation Diagnostics + </p> + <div + v-if="statusProjection.readiness.prepopulated_types_without_instances.length > 0" + class="rounded border border-amber-400/60 bg-amber-50/60 p-2 text-xs dark:border-amber-800 dark:bg-amber-950/20" + > + <p class="font-medium text-amber-800 dark:text-amber-300"> + Prepopulated types missing instances + </p> + <ul class="mt-1 list-disc space-y-1 pl-4 text-muted-foreground"> + <li + v-for="typeLabel in statusProjection.readiness.prepopulated_types_without_instances" + :key="typeLabel" + > + {{ typeLabel }} + </li> + </ul> + </div> + <div + v-if="statusProjection.readiness.blocking_reasons.length > 0" + class="mt-2 rounded border border-destructive/50 p-3" + > + <p class="mb-1 flex items-center gap-1.5 text-xs font-medium text-destructive"> + <ShieldAlert class="size-3.5" /> + Blocking reasons + </p> + <ul class="list-disc space-y-1 pl-4 text-xs text-muted-foreground"> + <li v-for="reason in statusProjection.readiness.blocking_reasons" :key="reason"> + {{ reason }} + </li> + </ul> + </div> + <p + v-else-if="statusProjection.readiness.prepopulated_types_without_instances.length === 0" + class="text-xs text-muted-foreground" + > + No validation diagnostics are currently blocking transition. + </p> + </div> + <div class="rounded border p-3"> + <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> + Next Steps + </p> + <ul class="list-disc space-y-1 pl-4 text-xs text-muted-foreground"> + <li v-for="step in nextSteps" :key="step">{{ step }}</li> + </ul> + </div> + </template> + + <template v-else-if="selectedRailItemId === 'session-pointers'"> + <div class="grid gap-2 md:grid-cols-3 text-xs"> + <div class="rounded border px-3 py-2"> + <p class="text-muted-foreground">Active schema bootstrap session</p> + <p class="mt-1 break-all font-mono"> + {{ statusProjection.session_pointers.active_schema_bootstrap_session_id ?? 'None' }} + </p> + </div> + <div class="rounded border px-3 py-2"> + <p class="text-muted-foreground">Active extraction operations session</p> + <p class="mt-1 break-all font-mono"> + {{ statusProjection.session_pointers.active_extraction_operations_session_id ?? 'None' }} + </p> + </div> + <div class="rounded border px-3 py-2"> + <p class="text-muted-foreground">Most recent completed session</p> + <p class="mt-1 break-all font-mono"> + {{ statusProjection.session_pointers.most_recent_completed_session_id ?? 'None' }} + </p> + </div> + </div> + <div class="space-y-3 border-t pt-3"> + <div class="flex items-center justify-between"> + <p class="text-xs font-medium uppercase tracking-wider text-muted-foreground"> + Session History + </p> + <Button + size="sm" + variant="ghost" + class="h-6 px-2 text-[10px]" + :disabled="sessionHistoryLoading" + @click="loadSessionHistory" + > + Refresh + </Button> + </div> + <div + v-if="sessionHistoryLoading" + class="flex items-center gap-2 text-xs text-muted-foreground" + > + <Loader2 class="size-3.5 animate-spin" /> + Loading session history... + </div> + <div + v-else-if="sessionHistory.length === 0" + class="rounded border border-dashed px-3 py-4 text-xs text-muted-foreground" + > + No archived or active sessions found for this scope yet. + </div> + <div v-else class="space-y-2"> + <div + v-for="entry in sessionHistory" + :key="entry.id" + class="rounded border px-3 py-2 text-xs" + > + <div class="flex flex-wrap items-center justify-between gap-2"> + <p class="font-mono break-all">{{ entry.id }}</p> + <Badge :variant="entry.is_active ? 'default' : 'secondary'"> + {{ entry.is_active ? 'Active' : 'Archived' }} + </Badge> + </div> + <p class="mt-1 text-muted-foreground"> + Updated {{ new Date(entry.updated_at).toLocaleString() }} + <span v-if="entry.archived_at"> + · Archived {{ new Date(entry.archived_at).toLocaleString() }} + </span> + </p> + <p class="mt-1 text-muted-foreground"> + {{ entry.message_count }} message(s) + · {{ entry.run_metrics.length }} linked run(s) + </p> + <div + v-if="entry.run_metrics.length > 0" + class="mt-2 space-y-1.5 rounded border bg-muted/20 p-2" + > + <div + v-for="metric in entry.run_metrics" + :key="metric.sync_run_id" + class="flex flex-wrap items-center justify-between gap-2" + > + <span class="font-mono">{{ metric.mutation_log_id ?? metric.sync_run_id }}</span> + <span class="text-muted-foreground"> + {{ metric.token_usage_total ?? 0 }} tokens · + ${{ (metric.cost_total_usd ?? 0).toFixed(2) }} + </span> + </div> + </div> + </div> + </div> + </div> + </template> + + <template v-else-if="graphManagementMode === 'extraction-jobs'"> <p class="text-muted-foreground"> Trigger extraction and maintenance controls from the data sources operations panel. </p> - <Button size="sm" variant="outline" @click="navigateTo('/data-sources')"> - Open Data Source Operations - </Button> - </TabsContent> - <TabsContent value="manual-mutations" class="mt-3 space-y-2 text-sm"> + <div class="flex flex-wrap gap-2"> + <Button + size="sm" + variant="outline" + @click="navigateTo(buildDataSourcesStepUrl(kgId))" + > + Open Data Source Operations + </Button> + <Button + size="sm" + variant="outline" + @click="navigateTo(buildMaintainStepUrl(kgId))" + > + Open Maintain Step + </Button> + </div> + </template> + + <template v-else-if="graphManagementMode === 'one-off-mutations'"> <p class="text-muted-foreground"> Open the mutation editor scoped to this knowledge graph for minor direct edits. </p> <Button size="sm" @click="navigateTo(`/graph/mutations?kg_id=${kgId}&view=editor`)"> Open Manual Mutations </Button> - </TabsContent> - <TabsContent value="run-logs" class="mt-3 space-y-2 text-sm"> - <p class="text-muted-foreground"> - Review sync run history, maintenance outcomes, and operational logs. - </p> - <Button size="sm" variant="outline" @click="navigateTo('/data-sources')"> - Open Run and Log Views - </Button> - <Card class="mt-2"> - <CardHeader> - <CardTitle class="text-sm">MutationLog Browser</CardTitle> - <CardDescription> - Knowledge-graph scoped mutation runs with per-entry operation previews and run metrics. - </CardDescription> - </CardHeader> - <CardContent class="grid gap-3 xl:grid-cols-[280px_1fr]"> - <div class="rounded border"> - <div class="flex items-center justify-between border-b px-3 py-2"> - <p class="text-xs font-medium text-muted-foreground">Runs</p> - <Button size="sm" variant="ghost" class="h-6 px-2 text-[10px]" @click="loadMutationLogRuns"> - Refresh - </Button> - </div> - <div v-if="mutationLogLoading" class="flex items-center gap-2 px-3 py-4 text-xs text-muted-foreground"> - <Loader2 class="size-3.5 animate-spin" /> - Loading mutation runs... - </div> - <div v-else-if="mutationLogRuns.length === 0" class="px-3 py-4 text-xs text-muted-foreground"> - No mutation log runs found for this knowledge graph yet. - </div> - <div v-else class="max-h-64 overflow-auto p-2 space-y-1.5"> - <button - v-for="run in mutationLogRuns" - :key="run.id" - class="w-full rounded border px-2 py-1.5 text-left text-xs transition-colors" - :class="selectedMutationLogRunId === run.id ? 'border-primary bg-primary/5' : 'hover:bg-muted/40'" - @click="selectedMutationLogRunId = run.id" - > - <p class="font-medium truncate">{{ run.data_source_name }}</p> - <p class="text-muted-foreground truncate">{{ new Date(run.started_at).toLocaleString() }}</p> - <div class="mt-1 flex items-center justify-between"> - <Badge variant="outline" class="text-[10px]">{{ run.status }}</Badge> - <span class="font-mono text-[10px] text-muted-foreground">{{ run.mutation_log_id }}</span> - </div> - </button> - </div> - </div> + </template> - <div v-if="selectedMutationLogRun" class="space-y-3 rounded border p-3"> - <div class="flex flex-wrap items-center gap-2"> - <Badge>{{ selectedMutationLogRun.status }}</Badge> - <p class="text-xs text-muted-foreground"> - Data source: - <span class="font-medium text-foreground">{{ selectedMutationLogRun.data_source_name }}</span> - </p> - </div> - <div class="grid gap-2 sm:grid-cols-2"> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground">MutationLog</p> - <p class="mt-1 font-mono break-all">{{ selectedMutationLogRun.mutation_log_id }}</p> - </div> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground">Session</p> - <p class="mt-1 font-mono break-all">{{ selectedMutationLogRun.session_id ?? 'None' }}</p> - </div> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground">Started</p> - <p class="mt-1">{{ new Date(selectedMutationLogRun.started_at).toLocaleString() }}</p> - </div> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground">Completed</p> - <p class="mt-1"> - {{ selectedMutationLogRun.completed_at ? new Date(selectedMutationLogRun.completed_at).toLocaleString() : 'In progress' }} - </p> - </div> - </div> - <div class="grid gap-2 sm:grid-cols-2"> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground flex items-center gap-1.5"> - <Coins class="size-3.5" /> - Token usage - </p> - <p class="mt-1 font-medium">{{ (selectedMutationLogRun.token_usage_total ?? 0).toLocaleString() }}</p> - </div> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground flex items-center gap-1.5"> - <DollarSign class="size-3.5" /> - Cost (USD) - </p> - <p class="mt-1 font-medium">${{ (selectedMutationLogRun.cost_total_usd ?? 0).toFixed(2) }}</p> - </div> - </div> - <div class="rounded border p-3"> - <p class="mb-2 text-xs font-medium text-muted-foreground">Per-entry operation previews</p> - <div v-if="Object.keys(selectedMutationLogRun.operation_counts).length === 0" class="text-xs text-muted-foreground"> - No operation class counts recorded for this run. - </div> - <div v-else class="space-y-1.5"> - <div - v-for="([opClass, count]) in Object.entries(selectedMutationLogRun.operation_counts)" - :key="opClass" - class="flex items-center justify-between rounded border px-2 py-1.5 text-xs" - > - <span class="font-mono">{{ opClass }}</span> - <Badge variant="secondary">{{ count }}</Badge> - </div> - </div> - </div> - </div> - <div v-else class="rounded border border-dashed p-6 text-sm text-muted-foreground"> - Select a mutation run to view summary and per-entry previews. - </div> - </CardContent> - </Card> - </TabsContent> - </Tabs> - </CardContent> - </Card> - </div> + <template v-else> + <p class="text-xs text-muted-foreground"> + Select a status or artifact item to inspect mode-specific workspace content. + </p> + </template> + </CardContent> + </Card> + </div> </section> </template> </div> diff --git a/src/dev-ui/app/tests/kgManageState.test.ts b/src/dev-ui/app/tests/kgManageState.test.ts new file mode 100644 index 000000000..64fb1fd74 --- /dev/null +++ b/src/dev-ui/app/tests/kgManageState.test.ts @@ -0,0 +1,190 @@ +import { describe, it, expect, vi } from 'vitest' +import { + SECTION_STATE_MESSAGES, + appendLocalChatMessage, + buildTransitionRestrictionReason, + handleActivatableKeydown, + handleChatInputKeydown, + isForbiddenHttpError, + resolveForbiddenReason, + resolveSectionState, + shouldApplyMutationResult, +} from '../utils/kgManageState' + +describe('KG-MANAGE-017 - chat input keyboard contract', () => { + it('sends on Enter without Shift', () => { + const onSend = vi.fn() + const preventDefault = vi.fn() + const result = handleChatInputKeydown( + { key: 'Enter', shiftKey: false, preventDefault }, + onSend, + ) + + expect(result).toBe('send') + expect(preventDefault).toHaveBeenCalledOnce() + expect(onSend).toHaveBeenCalledOnce() + }) + + it('inserts newline on Shift+Enter without sending', () => { + const onSend = vi.fn() + const preventDefault = vi.fn() + const result = handleChatInputKeydown( + { key: 'Enter', shiftKey: true, preventDefault }, + onSend, + ) + + expect(result).toBe('newline') + expect(preventDefault).not.toHaveBeenCalled() + expect(onSend).not.toHaveBeenCalled() + }) + + it('ignores non-Enter keys', () => { + const onSend = vi.fn() + const preventDefault = vi.fn() + const result = handleChatInputKeydown( + { key: 'a', shiftKey: false, preventDefault }, + onSend, + ) + + expect(result).toBe('ignored') + expect(onSend).not.toHaveBeenCalled() + }) +}) + +describe('KG-MANAGE-018 - keyboard operable step and rail actions', () => { + it('activates step actions on Enter', () => { + const onActivate = vi.fn() + const preventDefault = vi.fn() + const handled = handleActivatableKeydown( + { key: 'Enter', preventDefault }, + onActivate, + ) + + expect(handled).toBe(true) + expect(preventDefault).toHaveBeenCalledOnce() + expect(onActivate).toHaveBeenCalledOnce() + }) + + it('activates step actions on Space', () => { + const onActivate = vi.fn() + const preventDefault = vi.fn() + const handled = handleActivatableKeydown( + { key: ' ', preventDefault }, + onActivate, + ) + + expect(handled).toBe(true) + expect(onActivate).toHaveBeenCalledOnce() + }) + + it('ignores unrelated keys for activatable controls', () => { + const onActivate = vi.fn() + const handled = handleActivatableKeydown( + { key: 'Tab', preventDefault: vi.fn() }, + onActivate, + ) + + expect(handled).toBe(false) + expect(onActivate).not.toHaveBeenCalled() + }) +}) + +describe('KG-MANAGE-019 - section-specific loading, empty, and error states', () => { + it('uses step-specific loading messages', () => { + const overview = resolveSectionState({ + section: 'workspace-overview', + loading: true, + }) + const mutationLogs = resolveSectionState({ + section: 'mutation-logs', + loading: true, + }) + + expect(overview.message).toBe(SECTION_STATE_MESSAGES['workspace-overview'].loading) + expect(mutationLogs.message).toBe(SECTION_STATE_MESSAGES['mutation-logs'].loading) + expect(overview.message).not.toBe(mutationLogs.message) + }) + + it('returns actionable empty states with optional next-step labels', () => { + const state = resolveSectionState({ + section: 'mutation-logs', + empty: true, + emptyActionLabel: 'Refresh runs', + }) + + expect(state.phase).toBe('empty') + expect(state.message).toBe(SECTION_STATE_MESSAGES['mutation-logs'].empty) + expect(state.actionLabel).toBe('Refresh runs') + }) + + it('surfaces section-specific error messaging', () => { + const state = resolveSectionState({ + section: 'graph-management', + error: 'Session service unavailable', + }) + + expect(state.phase).toBe('error') + expect(state.message).toBe('Session service unavailable') + }) +}) + +describe('KG-MANAGE-020 - forbidden and disabled action restrictions', () => { + it('detects forbidden HTTP errors', () => { + expect(isForbiddenHttpError({ statusCode: 403 })).toBe(true) + expect(isForbiddenHttpError(new Error('Forbidden'))).toBe(true) + expect(isForbiddenHttpError({ statusCode: 404 })).toBe(false) + }) + + it('builds explicit forbidden section messaging', () => { + const state = resolveSectionState({ + section: 'graph-management', + forbidden: true, + forbiddenReason: 'You do not have permission to perform this action', + }) + + expect(state.phase).toBe('forbidden') + expect(state.message).toBe('You do not have permission to perform this action') + }) + + it('explains why transition is disabled', () => { + expect( + buildTransitionRestrictionReason(false, ['Missing entity types']), + ).toBe('Transition blocked: Missing entity types') + expect(buildTransitionRestrictionReason(true, [])).toBeNull() + }) + + it('blocks mutation result application when forbidden', () => { + expect(shouldApplyMutationResult(true)).toBe(false) + expect(shouldApplyMutationResult(false)).toBe(true) + }) + + it('extracts forbidden reasons from API errors', () => { + expect( + resolveForbiddenReason( + { data: { detail: 'You do not have permission to perform this action' } }, + 'Access restricted', + ), + ).toBe('You do not have permission to perform this action') + }) +}) + +describe('KG-MANAGE-017 - local chat send helper', () => { + it('appends trimmed user messages to session history', () => { + const history = appendLocalChatMessage( + { message_history: [{ role: 'assistant', content: 'Hello' }] }, + ' Define schema ', + ) + + expect(history).toHaveLength(2) + expect(history[1]).toEqual({ role: 'user', content: 'Define schema' }) + }) + + it('ignores blank chat submissions', () => { + const history = appendLocalChatMessage( + { message_history: [{ role: 'assistant', content: 'Hello' }] }, + ' ', + ) + + expect(history).toHaveLength(1) + }) +}) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 065facfd7..e020cd2ec 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -13,6 +13,18 @@ import { resolveStepDestination, stepStatusTintClass, } from '../utils/kgManageWorkspace' +import { + GRAPH_MANAGEMENT_MODE_LABELS, + GRAPH_MANAGEMENT_MODE_ORDER, + buildGraphManagementRailItems, + buildGraphManagementStepUrl, + filterRailItemsForMode, + isRailItemValidInMode, + parseGraphManagementModeQuery, + resolveDefaultGraphManagementMode, + resolveRailSelectionForMode, + resolveSharedSessionMode, +} from '../utils/kgGraphManagement' const manageWorkspaceVue = readFileSync( resolve(__dirname, '../pages/knowledge-graphs/[kgId]/manage.vue'), @@ -42,7 +54,7 @@ const baseWorkspaceStatus = { }, } -describe('Knowledge Graph Manage Workspace - mode-aware controls', () => { +describe('Knowledge Graph Manage Workspace - graph management controls', () => { it('loads workspace status projection from management API', () => { expect(manageWorkspaceVue).toContain('/workspace-status') expect(manageWorkspaceVue).toContain('loadWorkspaceStatus') @@ -60,50 +72,18 @@ describe('Knowledge Graph Manage Workspace - mode-aware controls', () => { expect(manageWorkspaceVue).toContain('Go to Extraction/Mutations') }) - it('renders readiness result blocks and blocking reasons list', () => { - expect(manageWorkspaceVue).toContain('Readiness Results') - expect(manageWorkspaceVue).toContain('blocking_reasons') - expect(manageWorkspaceVue).toContain('prepopulated_types_ready') - }) - - it('renders session pointer references for bootstrap and extraction modes', () => { - expect(manageWorkspaceVue).toContain('Session Pointers') - expect(manageWorkspaceVue).toContain('active_schema_bootstrap_session_id') - expect(manageWorkspaceVue).toContain('active_extraction_operations_session_id') - }) - it('loads scoped session history with run metrics after clear chat', () => { expect(manageWorkspaceVue).toContain('loadSessionHistory') - expect(manageWorkspaceVue).toContain('/sessions/${sessionMode.value}/history') + expect(manageWorkspaceVue).toContain('/sessions/${sharedSessionMode.value}/history') expect(manageWorkspaceVue).toContain('sessionHistory') expect(manageWorkspaceVue).toContain('run_metrics') expect(manageWorkspaceVue).toContain('Session History') }) - - - it('uses shared conversation panel for bootstrap and extraction sessions', () => { - expect(manageWorkspaceVue).toContain('SharedConversationPanel') - expect(manageWorkspaceVue).toContain('sessionMode') - expect(manageWorkspaceVue).toContain('/sessions/${sessionMode.value}/active') - }) - - it('supports explicit Clear chat reset for extraction session', () => { - expect(manageWorkspaceVue).toContain('clearChat') - expect(manageWorkspaceVue).toContain('/sessions/${sessionMode.value}/clear-chat') - expect(sharedConversationPanelVue).toContain('Clear chat') - }) - - it('provides tabbed lower operations area for extraction workflows', () => { - expect(manageWorkspaceVue).toContain('Operations Workspace') - expect(manageWorkspaceVue).toContain('TabsTrigger value="extraction-jobs"') - expect(manageWorkspaceVue).toContain('TabsTrigger value="manual-mutations"') - expect(manageWorkspaceVue).toContain('TabsTrigger value="run-logs"') - }) }) describe('Knowledge Graph Manage Workspace - mutation log browser', () => { - it('renders mutation log browser card and scoped run listing', () => { - expect(manageWorkspaceVue).toContain('MutationLog Browser') + it('renders mutation log step with scoped run listing', () => { + expect(manageWorkspaceVue).toContain('MutationLogs') expect(manageWorkspaceVue).toContain('loadMutationLogRuns') expect(manageWorkspaceVue).toContain('/management/knowledge-graphs/${kgId.value}/data-sources') }) @@ -327,4 +307,238 @@ describe('Shared conversation panel - extraction UX contract', () => { expect(sharedConversationPanelVue).toContain('timelineRef') expect(sharedConversationPanelVue).toContain('scrollTop = timelineRef.value.scrollHeight') }) + + it('accepts mode-aware input placeholder and session status props', () => { + expect(sharedConversationPanelVue).toContain('inputPlaceholder') + expect(sharedConversationPanelVue).toContain('sessionStatusLabel') + }) +}) + +describe('KG-MANAGE-006 - graph management conversation-first layout', () => { + it('renders graph management step with shared conversation panel', () => { + expect(manageWorkspaceVue).toContain("activeStep === 'graph-management'") + expect(manageWorkspaceVue).toContain('SharedConversationPanel') + expect(manageWorkspaceVue).toContain('graph-management-controls') + }) + + it('uses one shared session endpoint across UI mode changes', () => { + expect(manageWorkspaceVue).toContain('sharedSessionMode') + expect(manageWorkspaceVue).toContain('/sessions/${sharedSessionMode.value}/active') + expect(manageWorkspaceVue).not.toContain('watch(graphManagementMode') + }) +}) + +describe('KG-MANAGE-007 - graph management modes', () => { + it('supports the three canonical graph management modes', () => { + for (const mode of GRAPH_MANAGEMENT_MODE_ORDER) { + expect(GRAPH_MANAGEMENT_MODE_LABELS[mode]).toBeTruthy() + expect(manageWorkspaceVue).toContain(mode) + } + expect(manageWorkspaceVue).toContain('graphManagementMode') + expect(manageWorkspaceVue).toContain('parseGraphManagementModeQuery') + }) + + it('defaults mode from workspace lifecycle state', () => { + expect(resolveDefaultGraphManagementMode('schema_bootstrap')).toBe('initial-schema-design') + expect(resolveDefaultGraphManagementMode('extraction_operations')).toBe('extraction-jobs') + }) + + it('updates chat placeholder by mode without changing session scope', () => { + expect(manageWorkspaceVue).toContain('graphManagementInputPlaceholder') + expect(manageWorkspaceVue).toContain('GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS') + }) +}) + +describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { + it('renders persistent status and artifact rail with keyboard selection', () => { + expect(manageWorkspaceVue).toContain('graph-management-rail') + expect(manageWorkspaceVue).toContain('buildGraphManagementRailItems') + expect(manageWorkspaceVue).toContain('role="listbox"') + expect(manageWorkspaceVue).toContain('@keydown') + }) + + it('builds rail items with status and last-updated metadata', () => { + const items = buildGraphManagementRailItems({ + workspaceMode: 'schema_bootstrap', + transitionEligible: false, + blockingReasonCount: 1, + prepopulatedGapCount: 0, + sessionUpdatedAt: '2026-05-22T12:00:00Z', + hasActiveSession: true, + }) + + expect(items.every((item) => item.status && item.lastUpdated && item.label)).toBe(true) + expect(items.find((item) => item.id === 'session-pointers')?.modes).toEqual( + GRAPH_MANAGEMENT_MODE_ORDER, + ) + }) +}) + +describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { + it('renders mode-specific detail panel content regions', () => { + expect(manageWorkspaceVue).toContain('graph-management-detail') + expect(manageWorkspaceVue).toContain('selectedRailItemId') + expect(manageWorkspaceVue).toContain("selectedRailItemId === 'schema-readiness'") + expect(manageWorkspaceVue).toContain("graphManagementMode === 'extraction-jobs'") + expect(manageWorkspaceVue).toContain("graphManagementMode === 'one-off-mutations'") + }) + + it('filters rail items to the active mode', () => { + const items = buildGraphManagementRailItems({ + workspaceMode: 'extraction_operations', + transitionEligible: true, + blockingReasonCount: 0, + prepopulatedGapCount: 0, + sessionUpdatedAt: null, + hasActiveSession: true, + }) + + expect(filterRailItemsForMode(items, 'extraction-jobs').map((item) => item.id)).toContain( + 'extraction-jobs-setup', + ) + expect(filterRailItemsForMode(items, 'one-off-mutations').map((item) => item.id)).toContain( + 'mutation-authoring', + ) + }) +}) + +describe('KG-MANAGE-010 - schema design parity behavior', () => { + it('exposes schema readiness and validation detail in initial schema design mode', () => { + expect(manageWorkspaceVue).toContain('progressChecklist') + expect(manageWorkspaceVue).toContain('Bootstrap Progress Checklist') + expect(manageWorkspaceVue).toContain('blocking_reasons') + expect(manageWorkspaceVue).toContain('prepopulated_types_without_instances') + }) + + it('keeps validate and transition controls available for schema design work', () => { + expect(manageWorkspaceVue).toContain('validateWorkspace') + expect(manageWorkspaceVue).toContain('transitionToExtraction') + expect(manageWorkspaceVue).toContain('canTransition') + }) +}) + +describe('KG-MANAGE-011 - session reset behavior', () => { + it('supports explicit clear chat reset on the shared session', () => { + expect(manageWorkspaceVue).toContain('clearChat') + expect(manageWorkspaceVue).toContain('/sessions/${sharedSessionMode.value}/clear-chat') + expect(sharedConversationPanelVue).toContain('Clear chat') + }) + + it('keeps graph management mode unchanged after clear chat', () => { + const clearChatBlock = manageWorkspaceVue.match( + /async function clearChat\(\) \{[\s\S]*?\n\}/, + )?.[0] ?? '' + expect(clearChatBlock).toContain('clearChat') + expect(clearChatBlock).not.toContain('graphManagementMode') + }) +}) + +describe('KG-MANAGE-016 - graph management top controls', () => { + it('renders mode switcher, session status, and validation affordance without scrolling', () => { + expect(manageWorkspaceVue).toContain('graph-management-controls') + expect(manageWorkspaceVue).toContain('graphManagementModeLabel') + expect(manageWorkspaceVue).toContain('sessionStatusLabel') + expect(manageWorkspaceVue).toContain('validateWorkspace') + expect(manageWorkspaceVue).toContain('Clear chat') + }) + + it('maps shared session mode from workspace lifecycle without UI mode coupling', () => { + expect(resolveSharedSessionMode('schema_bootstrap')).toBe('schema_bootstrap') + expect(resolveSharedSessionMode('extraction_operations')).toBe('extraction_operations') + }) + + it('preserves rail selection across mode changes when still valid', () => { + const items = buildGraphManagementRailItems({ + workspaceMode: 'extraction_operations', + transitionEligible: true, + blockingReasonCount: 0, + prepopulatedGapCount: 0, + sessionUpdatedAt: '2026-05-22T12:00:00Z', + hasActiveSession: true, + }) + + expect( + resolveRailSelectionForMode('session-pointers', 'extraction-jobs', items), + ).toBe('session-pointers') + expect( + isRailItemValidInMode('schema-readiness', 'extraction-jobs', items), + ).toBe(false) + expect( + resolveRailSelectionForMode('schema-readiness', 'extraction-jobs', items), + ).toBe('session-pointers') + }) + + it('builds graph management URLs with mode query for keyboard navigation', () => { + expect(buildGraphManagementStepUrl('kg-abc', 'one-off-mutations')).toBe( + '/knowledge-graphs/kg-abc/manage?step=graph-management&gm_mode=one-off-mutations', + ) + expect(parseGraphManagementModeQuery('initial-schema-design')).toBe('initial-schema-design') + }) +}) + +describe('KG-MANAGE-017 - chat input keyboard contract', () => { + it('wires Enter-to-send and Shift+Enter newline handling in shared conversation panel', () => { + expect(sharedConversationPanelVue).toContain('handleChatInputKeydown') + expect(sharedConversationPanelVue).toContain('@keydown="onChatInputKeydown"') + expect(sharedConversationPanelVue).toContain('Shift+Enter adds a new line') + expect(sharedConversationPanelVue).toContain("emit('sendMessage'") + expect(manageWorkspaceVue).toContain('@send-message="sendChatMessage"') + }) +}) + +describe('KG-MANAGE-018 - keyboard operable step and rail actions', () => { + it('supports keyboard activation for step card primary actions', () => { + expect(manageWorkspaceVue).toContain('onStepActionKeydown') + expect(manageWorkspaceVue).toContain('handleActivatableKeydown') + expect(manageWorkspaceVue).toContain('@keydown="onStepActionKeydown($event, card.id)"') + }) + + it('supports keyboard activation for graph management rail selection', () => { + expect(manageWorkspaceVue).toContain('onRailKeydown') + expect(manageWorkspaceVue).toContain('role="listbox"') + expect(manageWorkspaceVue).toContain('tabindex="0"') + expect(manageWorkspaceVue).toContain('@keydown="onRailKeydown($event, item.id)"') + }) + + it('exposes keyboard-reachable graph management mode switch tabs', () => { + expect(manageWorkspaceVue).toContain('role="tablist"') + expect(manageWorkspaceVue).toContain('onModeSwitchKeydown') + expect(manageWorkspaceVue).toContain('@keydown="onModeSwitchKeydown($event, mode)"') + }) +}) + +describe('KG-MANAGE-019 - section-specific loading, empty, and error states', () => { + it('uses section state contracts for workspace, graph management, and mutation logs', () => { + expect(manageWorkspaceVue).toContain('resolveSectionState') + expect(manageWorkspaceVue).toContain('workspaceOverviewState') + expect(manageWorkspaceVue).toContain('graphManagementSectionState') + expect(manageWorkspaceVue).toContain('mutationLogsSectionState') + expect(manageWorkspaceVue).toContain('Retry workspace load') + expect(manageWorkspaceVue).toContain('Retry mutation log load') + expect(manageWorkspaceVue).toContain('Retry session load') + }) + + it('renders actionable empty states for mutation log runs', () => { + expect(manageWorkspaceVue).toContain('mutationLogsSectionState.actionLabel') + expect(manageWorkspaceVue).toContain('Refresh runs') + }) +}) + +describe('KG-MANAGE-020 - forbidden and disabled action restrictions', () => { + it('detects forbidden responses and surfaces explicit restriction messaging', () => { + expect(manageWorkspaceVue).toContain('isForbiddenHttpError') + expect(manageWorkspaceVue).toContain('workspaceForbiddenReason') + expect(manageWorkspaceVue).toContain('sessionForbiddenReason') + expect(manageWorkspaceVue).toContain('role="alert"') + expect(manageWorkspaceVue).toContain(':forbidden="sessionForbidden"') + expect(sharedConversationPanelVue).toContain('forbidden?: boolean') + expect(sharedConversationPanelVue).toContain('v-if="forbidden"') + }) + + it('explains disabled transition actions and avoids partial updates on forbidden', () => { + expect(manageWorkspaceVue).toContain('transitionRestrictionReason') + expect(manageWorkspaceVue).toContain('buildTransitionRestrictionReason') + expect(manageWorkspaceVue).toContain('shouldApplyMutationResult') + expect(manageWorkspaceVue).toContain('statusProjection.value = previousStatus') + }) }) diff --git a/src/dev-ui/app/utils/kgGraphManagement.ts b/src/dev-ui/app/utils/kgGraphManagement.ts new file mode 100644 index 000000000..203c6ce7b --- /dev/null +++ b/src/dev-ui/app/utils/kgGraphManagement.ts @@ -0,0 +1,167 @@ +import type { StepStatusLabel } from './kgManageWorkspace' + +export type GraphManagementMode = + | 'initial-schema-design' + | 'extraction-jobs' + | 'one-off-mutations' + +export type GraphManagementRailItemId = + | 'schema-readiness' + | 'validation-diagnostics' + | 'session-pointers' + | 'extraction-jobs-setup' + | 'mutation-authoring' + +export const GRAPH_MANAGEMENT_MODE_ORDER: GraphManagementMode[] = [ + 'initial-schema-design', + 'extraction-jobs', + 'one-off-mutations', +] + +export const GRAPH_MANAGEMENT_MODE_LABELS: Record<GraphManagementMode, string> = { + 'initial-schema-design': 'Initial Schema Design', + 'extraction-jobs': 'Extraction Jobs', + 'one-off-mutations': 'One-off Mutations', +} + +export const GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS: Record<GraphManagementMode, string> = { + 'initial-schema-design': + 'Describe schema goals, entity types, or relationship constraints for this knowledge graph…', + 'extraction-jobs': + 'Ask about extraction job setup, sync runs, or maintenance execution for this graph…', + 'one-off-mutations': + 'Author or preview one-off graph mutations scoped to this knowledge graph…', +} + +export interface GraphManagementRailItem { + id: GraphManagementRailItemId + label: string + status: StepStatusLabel + lastUpdated: string + detailHint: string + modes: GraphManagementMode[] +} + +export interface GraphManagementRailInputs { + workspaceMode: 'schema_bootstrap' | 'extraction_operations' + transitionEligible: boolean + blockingReasonCount: number + prepopulatedGapCount: number + sessionUpdatedAt: string | null + hasActiveSession: boolean +} + +export function parseGraphManagementModeQuery(mode: unknown): GraphManagementMode | null { + if ( + mode === 'initial-schema-design' + || mode === 'extraction-jobs' + || mode === 'one-off-mutations' + ) { + return mode + } + return null +} + +export function resolveDefaultGraphManagementMode( + workspaceMode: 'schema_bootstrap' | 'extraction_operations', +): GraphManagementMode { + return workspaceMode === 'extraction_operations' ? 'extraction-jobs' : 'initial-schema-design' +} + +export function resolveSharedSessionMode( + workspaceMode: 'schema_bootstrap' | 'extraction_operations', +): 'schema_bootstrap' | 'extraction_operations' { + return workspaceMode === 'extraction_operations' ? 'extraction_operations' : 'schema_bootstrap' +} + +export function buildGraphManagementRailItems( + input: GraphManagementRailInputs, +): GraphManagementRailItem[] { + const sessionStamp = input.sessionUpdatedAt ?? 'Not loaded' + const readinessStatus: StepStatusLabel = input.blockingReasonCount > 0 + ? 'needs_attention' + : input.transitionEligible + ? 'ready' + : 'in_progress' + + return [ + { + id: 'schema-readiness', + label: 'Schema readiness', + status: readinessStatus, + lastUpdated: sessionStamp, + detailHint: 'Bootstrap checklist, validate, and transition controls.', + modes: ['initial-schema-design'], + }, + { + id: 'validation-diagnostics', + label: 'Validation diagnostics', + status: input.prepopulatedGapCount > 0 || input.blockingReasonCount > 0 + ? 'needs_attention' + : 'ready', + lastUpdated: sessionStamp, + detailHint: 'Blocking reasons and prepopulated type gaps.', + modes: ['initial-schema-design'], + }, + { + id: 'session-pointers', + label: 'Session pointers', + status: input.hasActiveSession ? 'ready' : 'in_progress', + lastUpdated: sessionStamp, + detailHint: 'Active bootstrap, extraction, and completed session references.', + modes: GRAPH_MANAGEMENT_MODE_ORDER, + }, + { + id: 'extraction-jobs-setup', + label: 'Extraction jobs setup', + status: input.workspaceMode === 'extraction_operations' ? 'ready' : 'blocked', + lastUpdated: sessionStamp, + detailHint: 'Job setup, execution controls, and run context.', + modes: ['extraction-jobs'], + }, + { + id: 'mutation-authoring', + label: 'Mutation authoring', + status: input.workspaceMode === 'extraction_operations' ? 'ready' : 'blocked', + lastUpdated: sessionStamp, + detailHint: 'One-off mutation preview and submit context.', + modes: ['one-off-mutations'], + }, + ] +} + +export function filterRailItemsForMode( + items: GraphManagementRailItem[], + mode: GraphManagementMode, +): GraphManagementRailItem[] { + return items.filter((item) => item.modes.includes(mode)) +} + +export function isRailItemValidInMode( + itemId: GraphManagementRailItemId, + mode: GraphManagementMode, + items: GraphManagementRailItem[], +): boolean { + const item = items.find((candidate) => candidate.id === itemId) + return item?.modes.includes(mode) ?? false +} + +export function resolveRailSelectionForMode( + selectedId: GraphManagementRailItemId | null, + mode: GraphManagementMode, + items: GraphManagementRailItem[], +): GraphManagementRailItemId | null { + const modeItems = filterRailItemsForMode(items, mode) + if (modeItems.length === 0) return null + if (selectedId && isRailItemValidInMode(selectedId, mode, items)) { + return selectedId + } + return modeItems[0]?.id ?? null +} + +export function buildGraphManagementStepUrl( + kgId: string, + mode: GraphManagementMode, +): string { + return `/knowledge-graphs/${encodeURIComponent(kgId)}/manage?step=graph-management&gm_mode=${mode}` +} diff --git a/src/dev-ui/app/utils/kgManageState.ts b/src/dev-ui/app/utils/kgManageState.ts new file mode 100644 index 000000000..2d567463d --- /dev/null +++ b/src/dev-ui/app/utils/kgManageState.ts @@ -0,0 +1,180 @@ +export type ManageSectionId = + | 'workspace-overview' + | 'graph-management' + | 'mutation-logs' + | 'data-sources' + | 'maintain' + +export type SectionPhase = 'loading' | 'empty' | 'error' | 'ready' | 'forbidden' + +export interface SectionStateContract { + phase: SectionPhase + title: string + message: string + actionLabel?: string +} + +export const SECTION_STATE_MESSAGES: Record< + ManageSectionId, + { loading: string; empty: string; error: string; forbidden: string } +> = { + 'workspace-overview': { + loading: 'Loading workspace overview and step readiness…', + empty: 'Workspace overview is unavailable until status loads.', + error: 'Could not load workspace overview for this knowledge graph.', + forbidden: 'You do not have permission to view this workspace overview.', + }, + 'graph-management': { + loading: 'Loading graph management session and workspace panels…', + empty: 'Graph management is ready, but no session activity is loaded yet.', + error: 'Could not load graph management session data.', + forbidden: 'You do not have permission to manage this knowledge graph.', + }, + 'mutation-logs': { + loading: 'Loading mutation log runs for this knowledge graph…', + empty: 'No mutation log runs recorded for this knowledge graph yet.', + error: 'Could not load mutation log runs for this knowledge graph.', + forbidden: 'You do not have permission to view mutation logs for this graph.', + }, + 'data-sources': { + loading: 'Loading data source readiness for this knowledge graph…', + empty: 'Connect a data source to continue workspace setup.', + error: 'Could not load data sources for this knowledge graph.', + forbidden: 'You do not have permission to view data sources for this graph.', + }, + maintain: { + loading: 'Loading maintenance readiness for tracked sources…', + empty: 'No tracked source changes are ready for maintenance.', + error: 'Could not load maintenance readiness for this knowledge graph.', + forbidden: 'You do not have permission to run maintenance for this graph.', + }, +} + +export function isForbiddenHttpError(err: unknown): boolean { + if (err && typeof err === 'object') { + const fetchErr = err as { statusCode?: number; status?: number } + const status = fetchErr.statusCode ?? fetchErr.status + if (status === 403) return true + } + if (err instanceof Error) { + const message = err.message.toLowerCase() + return message.includes('forbidden') || message.includes('403') + } + return false +} + +export function resolveForbiddenReason( + err: unknown, + fallback: string, +): string { + if (err instanceof Error && err.message.trim()) { + return err.message + } + if (err && typeof err === 'object') { + const fetchErr = err as { data?: { detail?: unknown } } + if (typeof fetchErr.data?.detail === 'string' && fetchErr.data.detail.trim()) { + return fetchErr.data.detail + } + } + return fallback +} + +export function resolveSectionState(input: { + section: ManageSectionId + loading?: boolean + error?: string | null + forbidden?: boolean + forbiddenReason?: string | null + empty?: boolean + emptyActionLabel?: string +}): SectionStateContract { + const defaults = SECTION_STATE_MESSAGES[input.section] + + if (input.forbidden) { + return { + phase: 'forbidden', + title: 'Access restricted', + message: input.forbiddenReason?.trim() || defaults.forbidden, + } + } + + if (input.loading) { + return { + phase: 'loading', + title: 'Loading', + message: defaults.loading, + } + } + + if (input.error) { + return { + phase: 'error', + title: 'Unable to load section', + message: input.error, + } + } + + if (input.empty) { + return { + phase: 'empty', + title: 'Nothing to show yet', + message: defaults.empty, + actionLabel: input.emptyActionLabel, + } + } + + return { + phase: 'ready', + title: 'Ready', + message: '', + } +} + +export function handleChatInputKeydown( + event: Pick<KeyboardEvent, 'key' | 'shiftKey' | 'preventDefault'>, + onSend: () => void, +): 'send' | 'newline' | 'ignored' { + if (event.key !== 'Enter') return 'ignored' + if (event.shiftKey) return 'newline' + event.preventDefault() + onSend() + return 'send' +} + +export function handleActivatableKeydown( + event: Pick<KeyboardEvent, 'key' | 'preventDefault'>, + onActivate: () => void, +): boolean { + if (event.key === 'Enter' || event.key === ' ') { + event.preventDefault() + onActivate() + return true + } + return false +} + +export function buildTransitionRestrictionReason( + canTransition: boolean, + blockingReasons: string[], +): string | null { + if (canTransition) return null + if (blockingReasons.length > 0) { + return `Transition blocked: ${blockingReasons.join('; ')}` + } + return 'Transition blocked until schema bootstrap readiness requirements are met.' +} + +export function shouldApplyMutationResult(forbidden: boolean): boolean { + return !forbidden +} + +export function appendLocalChatMessage( + session: { message_history: Array<{ role?: string; content?: string; message?: string }> } | null, + content: string, +): Array<{ role?: string; content?: string; message?: string }> { + const trimmed = content.trim() + if (!trimmed) return session?.message_history ?? [] + const history = [...(session?.message_history ?? [])] + history.push({ role: 'user', content: trimmed }) + return history +} diff --git a/src/dev-ui/vitest.config.ts b/src/dev-ui/vitest.config.ts index 37e491654..9537e4cf4 100644 --- a/src/dev-ui/vitest.config.ts +++ b/src/dev-ui/vitest.config.ts @@ -7,6 +7,11 @@ export default defineConfig({ test: { environment: 'happy-dom', globals: true, + exclude: [ + '**/node_modules/**', + '**/dist/**', + '**/local_modules/**', + ], }, resolve: { alias: { From bd226ec447d4243574c45ab9f7cc4a89bbea57a2 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Fri, 22 May 2026 12:38:29 -0400 Subject: [PATCH 046/153] feat(extraction): add docker/podman-backed workload runtime adapters (#732) Introduce container runtime ports and CLI adapters for sticky session and ephemeral worker lifecycle, with settings, factory wiring, and integration tests for local development execution. Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 4 + env/api.env | 6 +- src/api/extraction/dependencies.py | 21 ++ src/api/extraction/infrastructure/__init__.py | 13 +- .../container_workload_runtime.py | 236 ++++++++++++++++++ .../workload_runtime_factory.py | 58 +++++ .../workload_runtime_settings.py | 48 ++++ .../container_runtime/__init__.py | 19 ++ .../container_runtime/cli_runtime.py | 87 +++++++ .../container_runtime/factory.py | 30 +++ .../shared_kernel/container_runtime/ports.py | 51 ++++ src/api/tests/integration/conftest.py | 4 + .../tests/integration/extraction/conftest.py | 36 +++ .../test_container_workload_runtime.py | 174 +++++++++++++ .../test_container_workload_runtime.py | 181 ++++++++++++++ .../test_workload_runtime_factory.py | 42 ++++ .../test_workload_runtime_settings.py | 18 ++ .../container_runtime/test_cli_runtime.py | 80 ++++++ 18 files changed, 1106 insertions(+), 2 deletions(-) create mode 100644 src/api/extraction/infrastructure/container_workload_runtime.py create mode 100644 src/api/extraction/infrastructure/workload_runtime_factory.py create mode 100644 src/api/extraction/infrastructure/workload_runtime_settings.py create mode 100644 src/api/shared_kernel/container_runtime/__init__.py create mode 100644 src/api/shared_kernel/container_runtime/cli_runtime.py create mode 100644 src/api/shared_kernel/container_runtime/factory.py create mode 100644 src/api/shared_kernel/container_runtime/ports.py create mode 100644 src/api/tests/integration/extraction/test_container_workload_runtime.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py create mode 100644 src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py diff --git a/compose.dev.yaml b/compose.dev.yaml index e70679ff7..ab8e5bf51 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -5,10 +5,14 @@ services: user: "${UID}:${GID}" environment: UV_CACHE_DIR: /tmp/uv-cache + KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND: container + KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_ENGINE: auto volumes: # Mount the entire app directory (minus venv) for hot-reload - ./src/api:/app:z - /app/.venv + # Allow API process to launch sibling extraction runtime containers locally + - /var/run/docker.sock:/var/run/docker.sock command: - /bin/bash - -c diff --git a/env/api.env b/env/api.env index c909d14cf..781143af9 100644 --- a/env/api.env +++ b/env/api.env @@ -12,4 +12,8 @@ KARTOGRAPH_CORS_ORIGINS=["http://localhost:3000"] KARTOGRAPH_IAM_BOOTSTRAP_ADMIN_USERNAMES='["alice"]' KARTOGRAPH_IAM_SINGLE_TENANT_MODE=false # Generate with uv run python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())" -KARTOGRAPH_MGMT_ENCRYPTION_KEY="vwN4rUcH-KL-UyJsL8hc6apftRUTovwec6L2M5uF5OE=" \ No newline at end of file +KARTOGRAPH_MGMT_ENCRYPTION_KEY="vwN4rUcH-KL-UyJsL8hc6apftRUTovwec6L2M5uF5OE=" +# Extraction runtime defaults to in-memory adapters. Set backend=container and +# mount /var/run/docker.sock (see compose.dev.yaml) for local container execution. +KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND=memory +KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_ENGINE=auto \ No newline at end of file diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index 2c6853001..a4771b42c 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -1,5 +1,6 @@ """FastAPI dependencies for Extraction services.""" +from functools import lru_cache from typing import Annotated from fastapi import Depends @@ -14,9 +15,29 @@ ExtractionSessionRunMetricsReader, ExtractionSkillOverrideRepository, ) +from extraction.infrastructure.workload_runtime_factory import ( + create_ephemeral_extraction_worker_launcher, + create_sticky_session_runtime_manager, +) +from extraction.ports.runtime import ( + IEphemeralExtractionWorkerLauncher, + IStickySessionRuntimeManager, +) from infrastructure.database.dependencies import get_write_session +@lru_cache +def get_sticky_session_runtime_manager() -> IStickySessionRuntimeManager: + """Return configured sticky session runtime manager.""" + return create_sticky_session_runtime_manager() + + +@lru_cache +def get_ephemeral_extraction_worker_launcher() -> IEphemeralExtractionWorkerLauncher: + """Return configured ephemeral extraction worker launcher.""" + return create_ephemeral_extraction_worker_launcher() + + def get_extraction_agent_session_service( session: Annotated[AsyncSession, Depends(get_write_session)], ) -> ExtractionAgentSessionService: diff --git a/src/api/extraction/infrastructure/__init__.py b/src/api/extraction/infrastructure/__init__.py index ec40d91d8..f8bfd2360 100644 --- a/src/api/extraction/infrastructure/__init__.py +++ b/src/api/extraction/infrastructure/__init__.py @@ -1,5 +1,9 @@ """Extraction infrastructure adapters and event handlers.""" +from extraction.infrastructure.container_workload_runtime import ( + ContainerEphemeralExtractionWorkerLauncher, + ContainerStickySessionRuntimeManager, +) from extraction.infrastructure.event_handler import ExtractionEventHandler from extraction.infrastructure.repositories import ( ExtractionAgentSessionRepository, @@ -13,14 +17,21 @@ InMemoryStickySessionRuntimeManager, ScopedWorkloadCredentialIssuer, ) +from extraction.infrastructure.workload_runtime_factory import ( + create_ephemeral_extraction_worker_launcher, + create_sticky_session_runtime_manager, +) __all__ = [ "ExtractionEventHandler", "ExtractionAgentSessionRepository", "ExtractionSkillOverrideRepository", "FilesystemExtractionRuntimeContextBuilder", + "ContainerStickySessionRuntimeManager", + "ContainerEphemeralExtractionWorkerLauncher", "InMemoryStickySessionRuntimeManager", "ScopedWorkloadCredentialIssuer", "InMemoryEphemeralExtractionWorkerLauncher", + "create_sticky_session_runtime_manager", + "create_ephemeral_extraction_worker_launcher", ] - diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py new file mode 100644 index 000000000..95ccc1f6a --- /dev/null +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -0,0 +1,236 @@ +"""Container-backed extraction workload runtime adapters.""" + +from __future__ import annotations + +import re +from dataclasses import replace +from datetime import UTC, datetime, timedelta + +from ulid import ULID + +from extraction.ports.runtime import ( + EphemeralWorkerLaunchRequest, + EphemeralWorkerLaunchResult, + IEphemeralExtractionWorkerLauncher, + IStickySessionRuntimeManager, + ScopedWorkloadCredentials, + StickySessionRuntimeLease, +) +from shared_kernel.container_runtime.ports import ContainerRunSpec, IContainerRuntime + +_CONTAINER_NAME_SAFE = re.compile(r"[^a-zA-Z0-9_.-]+") + + +def _sanitize_container_name(prefix: str, identifier: str) -> str: + cleaned = _CONTAINER_NAME_SAFE.sub("-", identifier).strip("-") + name = f"{prefix}{cleaned}" + return name[:63].rstrip("-_.") or f"{prefix}runtime" + + +class ContainerStickySessionRuntimeManager(IStickySessionRuntimeManager): + """Sticky runtime manager backed by real container lifecycle operations.""" + + def __init__( + self, + *, + container_runtime: IContainerRuntime, + sticky_image: str, + sticky_command: tuple[str, ...], + session_ttl: timedelta = timedelta(minutes=30), + ) -> None: + self._container_runtime = container_runtime + self._sticky_image = sticky_image + self._sticky_command = sticky_command + self._session_ttl = session_ttl + self._leases: dict[str, StickySessionRuntimeLease] = {} + + def get_or_start_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> StickySessionRuntimeLease: + now = datetime.now(UTC) + existing = self._leases.get(session_id) + if ( + existing is not None + and existing.expires_at > now + and self._container_runtime.is_running(existing.container_id) + ): + refreshed = replace( + existing, + last_activity_at=now, + expires_at=now + self._session_ttl, + status="active", + ) + self._leases[session_id] = refreshed + return refreshed + + if existing is not None: + self._terminate_container(existing.container_id) + + lease = self._start_runtime( + session_id=session_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + now=now, + ) + self._leases[session_id] = lease + return lease + + def reset_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> StickySessionRuntimeLease: + existing = self._leases.pop(session_id, None) + if existing is not None: + self._terminate_container(existing.container_id) + return self.get_or_start_runtime( + session_id=session_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + + def cleanup_expired(self, *, now: datetime) -> list[str]: + expired_sessions = [ + session_id + for session_id, lease in self._leases.items() + if lease.expires_at <= now + ] + terminated: list[str] = [] + for session_id in expired_sessions: + lease = self._leases.pop(session_id) + self._terminate_container(lease.container_id) + terminated.append(lease.container_id) + return terminated + + def _start_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + now: datetime, + ) -> StickySessionRuntimeLease: + container_name = _sanitize_container_name("kartograph-sticky-", session_id) + launched = self._container_runtime.run( + ContainerRunSpec( + image=self._sticky_image, + name=container_name, + labels={ + "kartograph.runtime.kind": "sticky", + "kartograph.session_id": session_id, + "kartograph.user_id": user_id, + "kartograph.knowledge_graph_id": knowledge_graph_id, + "kartograph.mode": mode, + }, + command=self._sticky_command, + ) + ) + return StickySessionRuntimeLease( + session_id=session_id, + container_id=launched.container_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + status="active", + last_activity_at=now, + expires_at=now + self._session_ttl, + ) + + def _terminate_container(self, container_id: str) -> None: + if self._container_runtime.is_running(container_id): + self._container_runtime.stop(container_id) + self._container_runtime.remove(container_id, force=True) + + +class ContainerEphemeralExtractionWorkerLauncher(IEphemeralExtractionWorkerLauncher): + """Ephemeral worker launcher backed by real container lifecycle operations.""" + + def __init__( + self, + *, + container_runtime: IContainerRuntime, + worker_image: str, + worker_command: tuple[str, ...], + ) -> None: + self._container_runtime = container_runtime + self._worker_image = worker_image + self._worker_command = worker_command + self._active_workers: dict[str, tuple[EphemeralWorkerLaunchRequest, str]] = {} + + @property + def active_worker_count(self) -> int: + return len(self._active_workers) + + def worker_container_id(self, worker_id: str) -> str | None: + worker = self._active_workers.get(worker_id) + if worker is None: + return None + return worker[1] + + def launch( + self, + *, + request: EphemeralWorkerLaunchRequest, + credentials: ScopedWorkloadCredentials, + ) -> EphemeralWorkerLaunchResult: + required_scopes = { + f"tenant:{request.tenant_id}", + f"knowledge_graph:{request.knowledge_graph_id}", + "workload:extraction", + } + available_scopes = set(credentials.scopes) + if not required_scopes.issubset(available_scopes): + raise ValueError("credentials scope does not satisfy workload requirements") + if credentials.expires_at <= datetime.now(UTC): + raise ValueError("credentials are expired") + + worker_id = str(ULID()) + container_name = _sanitize_container_name("kartograph-worker-", worker_id) + launched = self._container_runtime.run( + ContainerRunSpec( + image=self._worker_image, + name=container_name, + env={ + "KARTOGRAPH_WORKLOAD_TOKEN": credentials.token, + "KARTOGRAPH_TENANT_ID": request.tenant_id, + "KARTOGRAPH_KNOWLEDGE_GRAPH_ID": request.knowledge_graph_id, + "KARTOGRAPH_SESSION_ID": request.session_id, + "KARTOGRAPH_SYNC_RUN_ID": request.sync_run_id, + "KARTOGRAPH_JOB_PACKAGE_ID": request.job_package_id, + }, + labels={ + "kartograph.runtime.kind": "ephemeral", + "kartograph.worker_id": worker_id, + "kartograph.session_id": request.session_id, + "kartograph.sync_run_id": request.sync_run_id, + "kartograph.job_package_id": request.job_package_id, + }, + command=self._worker_command, + ) + ) + self._active_workers[worker_id] = (request, launched.container_id) + return EphemeralWorkerLaunchResult( + worker_id=worker_id, + status="running", + credentials_expires_at=credentials.expires_at, + ) + + def complete_worker(self, worker_id: str) -> None: + worker = self._active_workers.pop(worker_id, None) + if worker is None: + return + container_id = worker[1] + if self._container_runtime.is_running(container_id): + self._container_runtime.stop(container_id) + self._container_runtime.remove(container_id, force=True) diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py new file mode 100644 index 000000000..176102028 --- /dev/null +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -0,0 +1,58 @@ +"""Factory helpers for extraction workload runtime adapters.""" + +from __future__ import annotations + +from datetime import timedelta + +from extraction.infrastructure.container_workload_runtime import ( + ContainerEphemeralExtractionWorkerLauncher, + ContainerStickySessionRuntimeManager, +) +from extraction.infrastructure.workload_runtime import ( + InMemoryEphemeralExtractionWorkerLauncher, + InMemoryStickySessionRuntimeManager, +) +from extraction.infrastructure.workload_runtime_settings import ( + ExtractionWorkloadRuntimeSettings, + get_extraction_workload_runtime_settings, +) +from extraction.ports.runtime import ( + IEphemeralExtractionWorkerLauncher, + IStickySessionRuntimeManager, +) +from shared_kernel.container_runtime.factory import create_container_runtime + + +def create_sticky_session_runtime_manager( + settings: ExtractionWorkloadRuntimeSettings | None = None, +) -> IStickySessionRuntimeManager: + """Build sticky runtime manager for configured backend.""" + resolved = settings or get_extraction_workload_runtime_settings() + if resolved.backend == "memory": + return InMemoryStickySessionRuntimeManager( + session_ttl=timedelta(minutes=resolved.session_ttl_minutes) + ) + + container_runtime = create_container_runtime(resolved.container_engine) + return ContainerStickySessionRuntimeManager( + container_runtime=container_runtime, + sticky_image=resolved.sticky_image, + sticky_command=resolved.sticky_command, + session_ttl=timedelta(minutes=resolved.session_ttl_minutes), + ) + + +def create_ephemeral_extraction_worker_launcher( + settings: ExtractionWorkloadRuntimeSettings | None = None, +) -> IEphemeralExtractionWorkerLauncher: + """Build ephemeral worker launcher for configured backend.""" + resolved = settings or get_extraction_workload_runtime_settings() + if resolved.backend == "memory": + return InMemoryEphemeralExtractionWorkerLauncher() + + container_runtime = create_container_runtime(resolved.container_engine) + return ContainerEphemeralExtractionWorkerLauncher( + container_runtime=container_runtime, + worker_image=resolved.worker_image, + worker_command=resolved.worker_command, + ) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py new file mode 100644 index 000000000..8a99c07d7 --- /dev/null +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -0,0 +1,48 @@ +"""Settings for extraction workload runtime execution.""" + +from __future__ import annotations + +from functools import lru_cache +from typing import Literal + +from pydantic import Field, field_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class ExtractionWorkloadRuntimeSettings(BaseSettings): + """Container and in-memory extraction runtime configuration.""" + + model_config = SettingsConfigDict( + env_prefix="KARTOGRAPH_EXTRACTION_RUNTIME_", + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + ) + + backend: Literal["memory", "container"] = Field(default="memory") + container_engine: Literal["auto", "docker", "podman"] = Field(default="auto") + sticky_image: str = Field(default="docker.io/library/busybox:1.36") + worker_image: str = Field(default="docker.io/library/busybox:1.36") + sticky_command: tuple[str, ...] = Field(default=("sleep", "3600")) + worker_command: tuple[str, ...] = Field(default=("sleep", "3600")) + session_ttl_minutes: int = Field(default=30, ge=1, le=24 * 60) + + @field_validator("sticky_command", "worker_command", mode="before") + @classmethod + def _parse_command(cls, value: object) -> tuple[str, ...]: + if isinstance(value, tuple): + return value + if isinstance(value, list): + return tuple(str(part) for part in value) + if isinstance(value, str): + parts = value.split() + if not parts: + raise ValueError("command must not be empty") + return tuple(parts) + raise TypeError("command must be a string or sequence") + + +@lru_cache +def get_extraction_workload_runtime_settings() -> ExtractionWorkloadRuntimeSettings: + """Get cached extraction workload runtime settings.""" + return ExtractionWorkloadRuntimeSettings() diff --git a/src/api/shared_kernel/container_runtime/__init__.py b/src/api/shared_kernel/container_runtime/__init__.py new file mode 100644 index 000000000..fe3433a33 --- /dev/null +++ b/src/api/shared_kernel/container_runtime/__init__.py @@ -0,0 +1,19 @@ +"""Container runtime abstractions for launching and managing workload containers.""" + +from shared_kernel.container_runtime.cli_runtime import CliContainerRuntime +from shared_kernel.container_runtime.factory import create_container_runtime +from shared_kernel.container_runtime.ports import ( + ContainerRunResult, + ContainerRunSpec, + ContainerRuntimeError, + IContainerRuntime, +) + +__all__ = [ + "CliContainerRuntime", + "ContainerRunResult", + "ContainerRunSpec", + "ContainerRuntimeError", + "IContainerRuntime", + "create_container_runtime", +] diff --git a/src/api/shared_kernel/container_runtime/cli_runtime.py b/src/api/shared_kernel/container_runtime/cli_runtime.py new file mode 100644 index 000000000..7eba19956 --- /dev/null +++ b/src/api/shared_kernel/container_runtime/cli_runtime.py @@ -0,0 +1,87 @@ +"""CLI-backed container runtime using docker or podman.""" + +from __future__ import annotations + +import subprocess +from typing import Final + +from shared_kernel.container_runtime.ports import ( + ContainerRunResult, + ContainerRunSpec, + ContainerRuntimeError, +) + + +class CliContainerRuntime: + """Launch and manage containers through a docker-compatible CLI.""" + + _RUNNING_TEMPLATE: Final[str] = "{{.State.Running}}" + + def __init__(self, *, binary: str) -> None: + self._binary = binary + + def run(self, spec: ContainerRunSpec) -> ContainerRunResult: + command = [self._binary, "run"] + if spec.detach: + command.append("--detach") + if spec.remove_on_exit: + command.append("--rm") + if spec.name is not None: + command.extend(["--name", spec.name]) + for key, value in sorted(spec.labels.items()): + command.extend(["--label", f"{key}={value}"]) + for key, value in sorted(spec.env.items()): + command.extend(["--env", f"{key}={value}"]) + command.append(spec.image) + if spec.command: + command.extend(spec.command) + + stdout = self._execute(command) + container_id = stdout.splitlines()[0].strip() + return ContainerRunResult(container_id=container_id, name=spec.name) + + def stop(self, container_id: str, *, timeout_seconds: int = 10) -> None: + self._execute([self._binary, "stop", "-t", str(timeout_seconds), container_id]) + + def remove(self, container_id: str, *, force: bool = False) -> None: + command = [self._binary, "rm"] + if force: + command.append("-f") + command.append(container_id) + self._execute(command) + + def is_running(self, container_id: str) -> bool: + result = subprocess.run( + [ + self._binary, + "inspect", + "-f", + self._RUNNING_TEMPLATE, + container_id, + ], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + detail = result.stderr.strip() or result.stdout.strip() + if "no such" in detail.lower(): + return False + raise ContainerRuntimeError( + f"{self._binary} inspect failed: {detail or 'unknown error'}" + ) + return result.stdout.strip().lower() == "true" + + def _execute(self, command: list[str]) -> str: + result = subprocess.run( + command, + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + detail = result.stderr.strip() or result.stdout.strip() or "unknown error" + raise ContainerRuntimeError( + f"{self._binary} {' '.join(command[1:])} failed: {detail}" + ) + return result.stdout diff --git a/src/api/shared_kernel/container_runtime/factory.py b/src/api/shared_kernel/container_runtime/factory.py new file mode 100644 index 000000000..666c17fcf --- /dev/null +++ b/src/api/shared_kernel/container_runtime/factory.py @@ -0,0 +1,30 @@ +"""Factory helpers for container runtime backends.""" + +from __future__ import annotations + +import shutil + +from shared_kernel.container_runtime.cli_runtime import CliContainerRuntime +from shared_kernel.container_runtime.ports import ContainerRuntimeError, IContainerRuntime + + +def create_container_runtime(engine: str = "auto") -> IContainerRuntime: + """Return a CLI container runtime for the requested engine.""" + binary = _resolve_engine_binary(engine) + return CliContainerRuntime(binary=binary) + + +def _resolve_engine_binary(engine: str) -> str: + if engine == "auto": + for candidate in ("docker", "podman"): + if shutil.which(candidate) is not None: + return candidate + raise ContainerRuntimeError("No docker or podman binary found on PATH") + + if engine not in {"docker", "podman"}: + raise ContainerRuntimeError(f"Unsupported container engine: {engine}") + + if shutil.which(engine) is None: + raise ContainerRuntimeError(f"{engine} binary not found on PATH") + + return engine diff --git a/src/api/shared_kernel/container_runtime/ports.py b/src/api/shared_kernel/container_runtime/ports.py new file mode 100644 index 000000000..1870e8923 --- /dev/null +++ b/src/api/shared_kernel/container_runtime/ports.py @@ -0,0 +1,51 @@ +"""Port contracts for container runtime backends.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Protocol + + +class ContainerRuntimeError(RuntimeError): + """Raised when a container runtime operation fails.""" + + +@dataclass(frozen=True) +class ContainerRunSpec: + """Launch parameters for a detached container.""" + + image: str + name: str | None = None + env: dict[str, str] = field(default_factory=dict) + labels: dict[str, str] = field(default_factory=dict) + command: tuple[str, ...] | None = None + detach: bool = True + remove_on_exit: bool = False + + +@dataclass(frozen=True) +class ContainerRunResult: + """Result of a successful container launch.""" + + container_id: str + name: str | None + + +class IContainerRuntime(Protocol): + """Backend-neutral container lifecycle operations.""" + + def run(self, spec: ContainerRunSpec) -> ContainerRunResult: + """Launch a container and return its identifier.""" + ... + + def stop(self, container_id: str, *, timeout_seconds: int = 10) -> None: + """Stop a running container.""" + ... + + def remove(self, container_id: str, *, force: bool = False) -> None: + """Remove a stopped container.""" + ... + + def is_running(self, container_id: str) -> bool: + """Return True when the container exists and is running.""" + ... diff --git a/src/api/tests/integration/conftest.py b/src/api/tests/integration/conftest.py index 0cd72cb14..dfbbd80ae 100644 --- a/src/api/tests/integration/conftest.py +++ b/src/api/tests/integration/conftest.py @@ -51,6 +51,10 @@ def pytest_configure(config): "markers", "keycloak: mark test as requiring Keycloak authentication server", ) + config.addinivalue_line( + "markers", + "container_runtime: mark test as requiring docker/podman engine", + ) @pytest.fixture(scope="session") diff --git a/src/api/tests/integration/extraction/conftest.py b/src/api/tests/integration/extraction/conftest.py index 1ce85faff..02f55197c 100644 --- a/src/api/tests/integration/extraction/conftest.py +++ b/src/api/tests/integration/extraction/conftest.py @@ -1,3 +1,39 @@ """Integration test fixtures for Extraction bounded context.""" +from __future__ import annotations + +import shutil +import subprocess + +import pytest + +from shared_kernel.container_runtime.factory import create_container_runtime + pytest_plugins = ["tests.integration.management.conftest"] + + +def _engine_available(engine: str) -> bool: + if shutil.which(engine) is None: + return False + result = subprocess.run( + [engine, "info"], + capture_output=True, + text=True, + check=False, + ) + return result.returncode == 0 + + +@pytest.fixture(scope="session") +def container_runtime_engine() -> str: + """Return the container engine binary used for integration tests.""" + for engine in ("docker", "podman"): + if _engine_available(engine): + return engine + pytest.skip("No docker/podman engine available for container runtime tests") + + +@pytest.fixture +def container_runtime(container_runtime_engine: str): + """Provide a CLI container runtime for integration tests.""" + return create_container_runtime(container_runtime_engine) diff --git a/src/api/tests/integration/extraction/test_container_workload_runtime.py b/src/api/tests/integration/extraction/test_container_workload_runtime.py new file mode 100644 index 000000000..516ef4f9e --- /dev/null +++ b/src/api/tests/integration/extraction/test_container_workload_runtime.py @@ -0,0 +1,174 @@ +"""Integration tests for container-backed extraction workload runtime adapters.""" + +from __future__ import annotations + +import time +from datetime import timedelta + +import pytest +from ulid import ULID + +from extraction.infrastructure.container_workload_runtime import ( + ContainerEphemeralExtractionWorkerLauncher, + ContainerStickySessionRuntimeManager, +) +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer +from extraction.ports.runtime import EphemeralWorkerLaunchRequest +from shared_kernel.container_runtime.ports import IContainerRuntime + +pytestmark = [pytest.mark.integration, pytest.mark.container_runtime] + +BUSYBOX_IMAGE = "docker.io/library/busybox:1.36" + + +@pytest.fixture(scope="module", autouse=True) +def ensure_busybox_image(container_runtime_engine: str) -> None: + """Pull the lightweight image used by runtime integration tests.""" + import subprocess + + result = subprocess.run( + [container_runtime_engine, "image", "inspect", BUSYBOX_IMAGE], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + pull = subprocess.run( + [container_runtime_engine, "pull", BUSYBOX_IMAGE], + capture_output=True, + text=True, + check=False, + ) + if pull.returncode != 0: + pytest.skip(f"Unable to pull test image {BUSYBOX_IMAGE}: {pull.stderr}") + + +@pytest.fixture +def sticky_manager(container_runtime: IContainerRuntime) -> ContainerStickySessionRuntimeManager: + return ContainerStickySessionRuntimeManager( + container_runtime=container_runtime, + sticky_image=BUSYBOX_IMAGE, + sticky_command=("sleep", "3600"), + session_ttl=timedelta(seconds=30), + ) + + +@pytest.fixture +def worker_launcher( + container_runtime: IContainerRuntime, +) -> ContainerEphemeralExtractionWorkerLauncher: + return ContainerEphemeralExtractionWorkerLauncher( + container_runtime=container_runtime, + worker_image=BUSYBOX_IMAGE, + worker_command=("sleep", "3600"), + ) + + +class TestContainerStickySessionRuntimeIntegration: + def test_happy_path_reuses_sticky_container_until_reset( + self, + sticky_manager: ContainerStickySessionRuntimeManager, + container_runtime: IContainerRuntime, + ) -> None: + first = sticky_manager.get_or_start_runtime( + session_id=f"integration-session-1-{ULID()}", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="extraction_operations", + ) + second = sticky_manager.get_or_start_runtime( + session_id=first.session_id, + user_id="user-1", + knowledge_graph_id="kg-1", + mode="extraction_operations", + ) + + assert first.container_id == second.container_id + assert container_runtime.is_running(first.container_id) + + rotated = sticky_manager.reset_runtime( + session_id=first.session_id, + user_id="user-1", + knowledge_graph_id="kg-1", + mode="extraction_operations", + ) + + assert rotated.container_id != first.container_id + assert not container_runtime.is_running(first.container_id) + assert container_runtime.is_running(rotated.container_id) + + sticky_manager.cleanup_expired(now=rotated.expires_at + timedelta(seconds=1)) + assert not container_runtime.is_running(rotated.container_id) + + def test_timeout_cleanup_terminates_expired_sticky_container( + self, + container_runtime: IContainerRuntime, + ) -> None: + manager = ContainerStickySessionRuntimeManager( + container_runtime=container_runtime, + sticky_image=BUSYBOX_IMAGE, + sticky_command=("sleep", "3600"), + session_ttl=timedelta(seconds=2), + ) + lease = manager.get_or_start_runtime( + session_id=f"integration-session-timeout-{ULID()}", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + assert container_runtime.is_running(lease.container_id) + + time.sleep(3) + terminated = manager.cleanup_expired( + now=lease.last_activity_at + timedelta(seconds=3) + ) + + assert terminated == [lease.container_id] + assert not container_runtime.is_running(lease.container_id) + + +class TestContainerEphemeralWorkerIntegration: + def test_happy_path_launches_and_completes_worker( + self, + worker_launcher: ContainerEphemeralExtractionWorkerLauncher, + container_runtime: IContainerRuntime, + ) -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=5)) + credentials = issuer.issue(tenant_id="tenant-1", knowledge_graph_id="kg-1") + request = EphemeralWorkerLaunchRequest( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id=f"integration-session-worker-{ULID()}", + sync_run_id="sync-1", + job_package_id="pkg-1", + ) + + result = worker_launcher.launch(request=request, credentials=credentials) + container_id = worker_launcher.worker_container_id(result.worker_id) + + assert container_id is not None + assert container_runtime.is_running(container_id) + + worker_launcher.complete_worker(result.worker_id) + + assert worker_launcher.active_worker_count == 0 + assert not container_runtime.is_running(container_id) + + def test_failure_path_rejects_bad_credentials_without_launching_container( + self, + worker_launcher: ContainerEphemeralExtractionWorkerLauncher, + ) -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=5)) + wrong_scope = issuer.issue(tenant_id="tenant-2", knowledge_graph_id="kg-2") + request = EphemeralWorkerLaunchRequest( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id=f"integration-session-worker-{ULID()}", + sync_run_id="sync-1", + job_package_id="pkg-1", + ) + + with pytest.raises(ValueError, match="scope"): + worker_launcher.launch(request=request, credentials=wrong_scope) + + assert worker_launcher.active_worker_count == 0 diff --git a/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py b/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py new file mode 100644 index 000000000..1947e8bd1 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py @@ -0,0 +1,181 @@ +"""Unit tests for container-backed extraction workload runtime adapters.""" + +from __future__ import annotations + +from datetime import timedelta +from unittest.mock import MagicMock + +import pytest + +from extraction.infrastructure.container_workload_runtime import ( + ContainerEphemeralExtractionWorkerLauncher, + ContainerStickySessionRuntimeManager, +) +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer +from extraction.ports.runtime import EphemeralWorkerLaunchRequest +from shared_kernel.container_runtime.ports import ContainerRunResult, ContainerRunSpec + + +class TestContainerStickySessionRuntimeManager: + def test_reuses_running_container_for_active_session(self) -> None: + runtime = MagicMock() + runtime.is_running.return_value = True + runtime.run.return_value = ContainerRunResult( + container_id="container-1", + name="kartograph-sticky-session-1", + ) + manager = ContainerStickySessionRuntimeManager( + container_runtime=runtime, + sticky_image="busybox:1.36", + sticky_command=("sleep", "3600"), + session_ttl=timedelta(minutes=30), + ) + + first = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="extraction_operations", + ) + second = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="extraction_operations", + ) + + assert first.container_id == second.container_id == "container-1" + runtime.run.assert_called_once() + + def test_reset_stops_existing_container_and_starts_new_one(self) -> None: + runtime = MagicMock() + runtime.is_running.return_value = True + runtime.run.side_effect = [ + ContainerRunResult(container_id="container-1", name="name-1"), + ContainerRunResult(container_id="container-2", name="name-2"), + ] + manager = ContainerStickySessionRuntimeManager( + container_runtime=runtime, + sticky_image="busybox:1.36", + sticky_command=("sleep", "3600"), + session_ttl=timedelta(minutes=30), + ) + manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + + rotated = manager.reset_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + + assert rotated.container_id == "container-2" + runtime.stop.assert_called_once_with("container-1") + runtime.remove.assert_called_once_with("container-1", force=True) + + def test_cleanup_expired_terminates_and_returns_container_ids(self) -> None: + runtime = MagicMock() + runtime.is_running.return_value = True + runtime.run.return_value = ContainerRunResult( + container_id="container-1", + name="name-1", + ) + manager = ContainerStickySessionRuntimeManager( + container_runtime=runtime, + sticky_image="busybox:1.36", + sticky_command=("sleep", "3600"), + session_ttl=timedelta(minutes=5), + ) + lease = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + + terminated = manager.cleanup_expired(now=lease.expires_at + timedelta(seconds=1)) + + assert terminated == ["container-1"] + + +class TestContainerEphemeralExtractionWorkerLauncher: + def test_launch_starts_worker_container_without_exposing_credentials(self) -> None: + runtime = MagicMock() + runtime.run.return_value = ContainerRunResult( + container_id="worker-container", + name="kartograph-worker-abc", + ) + launcher = ContainerEphemeralExtractionWorkerLauncher( + container_runtime=runtime, + worker_image="busybox:1.36", + worker_command=("sleep", "3600"), + ) + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) + credentials = issuer.issue(tenant_id="tenant-1", knowledge_graph_id="kg-1") + request = EphemeralWorkerLaunchRequest( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id="session-1", + sync_run_id="sync-1", + job_package_id="pkg-1", + ) + + result = launcher.launch(request=request, credentials=credentials) + + assert result.worker_id + assert result.status == "running" + spec: ContainerRunSpec = runtime.run.call_args.args[0] + assert spec.env["KARTOGRAPH_WORKLOAD_TOKEN"] == credentials.token + + def test_launch_rejects_invalid_credentials(self) -> None: + runtime = MagicMock() + launcher = ContainerEphemeralExtractionWorkerLauncher( + container_runtime=runtime, + worker_image="busybox:1.36", + worker_command=("sleep", "3600"), + ) + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) + wrong_scope = issuer.issue(tenant_id="tenant-2", knowledge_graph_id="kg-2") + request = EphemeralWorkerLaunchRequest( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id="session-1", + sync_run_id="sync-1", + job_package_id="pkg-1", + ) + + with pytest.raises(ValueError, match="scope"): + launcher.launch(request=request, credentials=wrong_scope) + + def test_complete_worker_terminates_running_container(self) -> None: + runtime = MagicMock() + runtime.is_running.return_value = True + runtime.run.return_value = ContainerRunResult( + container_id="worker-container", + name="kartograph-worker-abc", + ) + launcher = ContainerEphemeralExtractionWorkerLauncher( + container_runtime=runtime, + worker_image="busybox:1.36", + worker_command=("sleep", "3600"), + ) + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) + credentials = issuer.issue(tenant_id="tenant-1", knowledge_graph_id="kg-1") + request = EphemeralWorkerLaunchRequest( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id="session-1", + sync_run_id="sync-1", + job_package_id="pkg-1", + ) + result = launcher.launch(request=request, credentials=credentials) + + launcher.complete_worker(result.worker_id) + + runtime.stop.assert_called_once_with("worker-container") + assert launcher.active_worker_count == 0 diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py new file mode 100644 index 000000000..ac44244f9 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py @@ -0,0 +1,42 @@ +"""Unit tests for extraction workload runtime factory.""" + +from __future__ import annotations + +from extraction.infrastructure.container_workload_runtime import ( + ContainerEphemeralExtractionWorkerLauncher, + ContainerStickySessionRuntimeManager, +) +from extraction.infrastructure.workload_runtime import ( + InMemoryEphemeralExtractionWorkerLauncher, + InMemoryStickySessionRuntimeManager, +) +from extraction.infrastructure.workload_runtime_factory import ( + create_ephemeral_extraction_worker_launcher, + create_sticky_session_runtime_manager, +) +from extraction.infrastructure.workload_runtime_settings import ( + ExtractionWorkloadRuntimeSettings, +) + + +class TestWorkloadRuntimeFactory: + def test_memory_backend_returns_in_memory_adapters(self) -> None: + settings = ExtractionWorkloadRuntimeSettings(backend="memory") + + sticky = create_sticky_session_runtime_manager(settings) + worker = create_ephemeral_extraction_worker_launcher(settings) + + assert isinstance(sticky, InMemoryStickySessionRuntimeManager) + assert isinstance(worker, InMemoryEphemeralExtractionWorkerLauncher) + + def test_container_backend_returns_container_adapters(self) -> None: + settings = ExtractionWorkloadRuntimeSettings( + backend="container", + container_engine="docker", + ) + + sticky = create_sticky_session_runtime_manager(settings) + worker = create_ephemeral_extraction_worker_launcher(settings) + + assert isinstance(sticky, ContainerStickySessionRuntimeManager) + assert isinstance(worker, ContainerEphemeralExtractionWorkerLauncher) diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py new file mode 100644 index 000000000..a5902bca3 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py @@ -0,0 +1,18 @@ +"""Unit tests for extraction workload runtime settings.""" + +from __future__ import annotations + +from extraction.infrastructure.workload_runtime_settings import ( + ExtractionWorkloadRuntimeSettings, +) + + +class TestExtractionWorkloadRuntimeSettings: + def test_parses_command_strings_into_tuple(self) -> None: + settings = ExtractionWorkloadRuntimeSettings( + sticky_command="sleep 3600", + worker_command="sleep 120", + ) + + assert settings.sticky_command == ("sleep", "3600") + assert settings.worker_command == ("sleep", "120") diff --git a/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py b/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py new file mode 100644 index 000000000..4e6d4c199 --- /dev/null +++ b/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py @@ -0,0 +1,80 @@ +"""Unit tests for CLI-backed container runtime.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from shared_kernel.container_runtime.cli_runtime import CliContainerRuntime +from shared_kernel.container_runtime.ports import ContainerRunSpec, ContainerRuntimeError + + +class TestCliContainerRuntime: + def test_run_launches_detached_container_with_labels_and_env(self) -> None: + runtime = CliContainerRuntime(binary="docker") + + with patch("shared_kernel.container_runtime.cli_runtime.subprocess.run") as run: + run.return_value = MagicMock(returncode=0, stdout="abc123\n", stderr="") + + result = runtime.run( + ContainerRunSpec( + image="busybox:1.36", + name="kartograph-sticky-session-1", + env={"KARTOGRAPH_WORKLOAD_TOKEN": "secret"}, + labels={ + "kartograph.runtime.kind": "sticky", + "kartograph.session_id": "session-1", + }, + command=("sleep", "3600"), + ) + ) + + assert result.container_id == "abc123" + assert result.name == "kartograph-sticky-session-1" + command = run.call_args.args[0] + assert command[0] == "docker" + assert "run" in command + assert "--detach" in command + assert "busybox:1.36" in command + + def test_run_raises_when_cli_fails(self) -> None: + runtime = CliContainerRuntime(binary="docker") + + with patch("shared_kernel.container_runtime.cli_runtime.subprocess.run") as run: + run.return_value = MagicMock( + returncode=125, + stdout="", + stderr="image not found", + ) + + with pytest.raises(ContainerRuntimeError, match="image not found"): + runtime.run(ContainerRunSpec(image="missing:latest")) + + def test_stop_remove_and_is_running_delegate_to_cli(self) -> None: + runtime = CliContainerRuntime(binary="podman") + + with patch("shared_kernel.container_runtime.cli_runtime.subprocess.run") as run: + run.side_effect = [ + MagicMock(returncode=0, stdout="", stderr=""), + MagicMock(returncode=0, stdout="", stderr=""), + MagicMock(returncode=0, stdout="true\n", stderr=""), + ] + + runtime.stop("abc123", timeout_seconds=5) + runtime.remove("abc123", force=True) + assert runtime.is_running("abc123") is True + + assert run.call_args_list[0].args[0][:3] == ["podman", "stop", "-t"] + + def test_is_running_returns_false_for_missing_container(self) -> None: + runtime = CliContainerRuntime(binary="docker") + + with patch("shared_kernel.container_runtime.cli_runtime.subprocess.run") as run: + run.return_value = MagicMock( + returncode=1, + stdout="", + stderr="Error: No such object: abc123", + ) + + assert runtime.is_running("abc123") is False From c77aa8adcc4f873e3a4aaa101609515be65f78a7 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Fri, 22 May 2026 12:43:27 -0400 Subject: [PATCH 047/153] feat(manage-ui): harden MutationLogs step with scoped ordering and preview fallback (#728) Add KG-scoped run collection utilities, rich run detail panel separation, explicit no-preview fallback, and paginated mutation-log-entries API skeleton for #721 follow-on. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../presentation/data_sources/models.py | 36 ++++ .../presentation/data_sources/routes.py | 67 ++++++++ .../presentation/test_data_sources_routes.py | 56 +++++++ .../pages/knowledge-graphs/[kgId]/manage.vue | 154 ++++++++++++++---- src/dev-ui/app/tests/kgMutationLogs.test.ts | 120 ++++++++++++++ .../knowledge-graph-manage-workspace.test.ts | 49 +++++- src/dev-ui/app/utils/kgMutationLogs.ts | 100 ++++++++++++ 7 files changed, 546 insertions(+), 36 deletions(-) create mode 100644 src/dev-ui/app/tests/kgMutationLogs.test.ts create mode 100644 src/dev-ui/app/utils/kgMutationLogs.ts diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index b2508a79c..903d494aa 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -246,6 +246,33 @@ class SyncRunLogsResponse(BaseModel): ) +class MutationLogEntryPreviewResponse(BaseModel): + """Single mutation-log entry preview for a sync run.""" + + line_number: int = Field(..., description="1-based line number in the mutation log") + operation_class: str = Field(..., description="Operation class for this entry") + summary: str = Field(..., description="Human-readable preview summary for this entry") + + +class MutationLogEntryPreviewPageResponse(BaseModel): + """Paginated mutation-log entry previews for a sync run.""" + + entries: list[MutationLogEntryPreviewResponse] = Field( + default_factory=list, + description="Preview entries for the requested page", + ) + total: int = Field(..., description="Total preview entries available for this run") + offset: int = Field(..., description="Zero-based offset of this page") + limit: int = Field(..., description="Maximum entries requested for this page") + preview_available: bool = Field( + ..., + description=( + "False when detailed mutation-log entry previews are not yet stored " + "or cannot be retrieved for this run" + ), + ) + + class DiffChangedFileResponse(BaseModel): """Single changed file entry in a commit diff summary.""" @@ -299,6 +326,10 @@ class SyncRunResponse(BaseModel): mutation_log_id: str | None = Field( None, description="Associated mutation log run ID when available" ) + knowledge_graph_id: str | None = Field( + None, + description="Knowledge graph scope for this mutation run when available", + ) session_id: str | None = Field( None, description="Extraction session ID associated with this mutation run" ) @@ -341,6 +372,11 @@ def from_domain(cls, run: DataSourceSyncRun) -> SyncRunResponse: if run.mutation_log_run is not None else None ), + knowledge_graph_id=( + run.mutation_log_run.knowledge_graph_id + if run.mutation_log_run is not None + else None + ), session_id=( run.mutation_log_run.session_id if run.mutation_log_run is not None diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index c35ab68c7..dc8c64560 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -29,6 +29,7 @@ DataSourceWithSyncResponse, RunControlAction, RunControlResponse, + MutationLogEntryPreviewPageResponse, SyncRunLogsResponse, SyncRunResponse, UpdateDataSourceRequest, @@ -704,3 +705,69 @@ async def get_sync_run_logs( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to fetch sync run logs", ) + + +@router.get( + "/data-sources/{ds_id}/sync-runs/{run_id}/mutation-log-entries", + status_code=status.HTTP_200_OK, +) +async def list_mutation_log_entry_previews( + ds_id: str, + run_id: str, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[DataSourceService, Depends(get_data_source_service)], + sync_run_repo: Annotated[ + IDataSourceSyncRunRepository, Depends(get_sync_run_repository) + ], + offset: Annotated[int, Query(ge=0)] = 0, + limit: Annotated[int, Query(ge=1, le=100)] = 20, +) -> MutationLogEntryPreviewPageResponse: + """List paginated mutation-log entry previews for a sync run. + + Returns an empty page with ``preview_available=false`` until mutation-log + storage is wired for per-entry retrieval (#721 follow-on). + """ + try: + ds = await service.get( + user_id=current_user.user_id.value, + ds_id=ds_id, + ) + + if ds is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Data source not found", + ) + + sync_run = await sync_run_repo.get_by_id(run_id) + + if sync_run is None or sync_run.data_source_id != ds_id: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Sync run not found", + ) + + if sync_run.mutation_log_run is None: + return MutationLogEntryPreviewPageResponse( + entries=[], + total=0, + offset=offset, + limit=limit, + preview_available=False, + ) + + return MutationLogEntryPreviewPageResponse( + entries=[], + total=0, + offset=offset, + limit=limit, + preview_available=False, + ) + + except HTTPException: + raise + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to fetch mutation log entry previews", + ) diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index 62e1a9f53..b7e613ade 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -509,6 +509,7 @@ def test_list_sync_runs_includes_mutation_log_run_preview_fields( assert response.status_code == status.HTTP_200_OK payload = response.json()[0] assert payload["mutation_log_id"] == "mlog-preview-1" + assert payload["knowledge_graph_id"] == sample_data_source.knowledge_graph_id assert payload["session_id"] == "sess-preview-1" assert payload["actor_id"] == "actor-preview-1" assert payload["operation_counts"] == { @@ -534,6 +535,61 @@ def test_list_sync_runs_returns_404_when_ds_not_found( mock_sync_run_repo.find_by_data_source.assert_not_called() +class TestMutationLogEntryPreviewRoutes: + """Tests for GET /management/data-sources/{ds_id}/sync-runs/{run_id}/mutation-log-entries.""" + + def test_list_mutation_log_entries_returns_paginated_skeleton_when_storage_unavailable( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_sync_run_repo: AsyncMock, + sample_data_source: DataSource, + sample_sync_run: DataSourceSyncRun, + ) -> None: + sample_sync_run.mutation_log_run = MutationLogRunMetadata( + mutation_log_id="mlog-preview-1", + knowledge_graph_id=sample_data_source.knowledge_graph_id, + session_id="sess-preview-1", + actor_id="actor-preview-1", + started_at=sample_sync_run.started_at, + operation_counts={"create_node": 3}, + ) + mock_ds_service.get.return_value = sample_data_source + mock_sync_run_repo.get_by_id.return_value = sample_sync_run + + response = test_client.get( + f"/management/data-sources/{sample_data_source.id.value}/sync-runs/" + f"{sample_sync_run.id}/mutation-log-entries?offset=0&limit=20" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload == { + "entries": [], + "total": 0, + "offset": 0, + "limit": 20, + "preview_available": False, + } + + def test_list_mutation_log_entries_returns_404_when_run_missing( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_sync_run_repo: AsyncMock, + sample_data_source: DataSource, + ) -> None: + mock_ds_service.get.return_value = sample_data_source + mock_sync_run_repo.get_by_id.return_value = None + + response = test_client.get( + f"/management/data-sources/{sample_data_source.id.value}/sync-runs/" + "missing-run/mutation-log-entries" + ) + + assert response.status_code == status.HTTP_404_NOT_FOUND + + class TestRunControlRoutes: """Tests for POST /management/data-sources/{ds_id}/run-controls/{action}.""" diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 70b8b528d..21342aeb1 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -41,6 +41,16 @@ import { resolveSectionState, shouldApplyMutationResult, } from '@/utils/kgManageState' +import { + buildMutationLogEntryPreviewUrl, + collectScopedMutationLogRuns, + hasMutationLogEntryPreviewPage, + MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE, + MUTATION_LOG_NO_PREVIEW_MESSAGE, + resolveDefaultSelectedMutationLogRunId, + type MutationLogEntryPreviewPage, + type MutationLogRunRecord, +} from '@/utils/kgMutationLogs' interface WorkspaceReadinessStatus { has_minimum_entity_types: boolean @@ -77,20 +87,8 @@ interface DataSourceRef { tracked_branch_head_commit?: string | null } -interface MutationLogRunView { - id: string - data_source_id: string +interface MutationLogRunView extends MutationLogRunRecord { data_source_name: string - status: string - started_at: string - completed_at: string | null - mutation_log_id: string | null - session_id: string | null - actor_id: string | null - operation_counts: Record<string, number> - token_usage_total: number | null - cost_total_usd: number | null - error: string | null } interface ExtractionSessionResponse { @@ -152,6 +150,9 @@ const mutationLogRuns = ref<MutationLogRunView[]>([]) const selectedMutationLogRunId = ref<string | null>(null) const graphManagementMode = ref<GraphManagementMode>('initial-schema-design') const selectedRailItemId = ref<GraphManagementRailItemId | null>(null) +const mutationLogEntryPreviewLoading = ref(false) +const mutationLogEntryPreviewPage = ref<MutationLogEntryPreviewPage | null>(null) +const mutationLogEntryPreviewOffset = ref(0) const activeStep = computed(() => parseManageStepQuery(route.query.step)) const showOverview = computed(() => activeStep.value === null) @@ -410,34 +411,28 @@ async function loadMutationLogRuns() { `/management/knowledge-graphs/${kgId.value}/data-sources`, ) - const collected: MutationLogRunView[] = [] + const runsByDataSourceId: Record<string, MutationLogRunRecord[]> = {} for (const ds of dataSources) { try { - const runs = await apiFetch<MutationLogRunView[]>( + runsByDataSourceId[ds.id] = await apiFetch<MutationLogRunRecord[]>( `/management/data-sources/${ds.id}/sync-runs`, ) - for (const run of runs) { - if (!run.mutation_log_id) continue - collected.push({ - ...run, - data_source_name: ds.name, - }) - } } catch { - // Keep page resilient when one data source run list fails. + runsByDataSourceId[ds.id] = [] } } - collected.sort( - (a, b) => new Date(b.started_at).getTime() - new Date(a.started_at).getTime(), - ) + const collected = collectScopedMutationLogRuns( + kgId.value, + dataSources, + runsByDataSourceId, + ) as MutationLogRunView[] + mutationLogRuns.value = collected - if ( - !selectedMutationLogRunId.value - || !collected.some((run) => run.id === selectedMutationLogRunId.value) - ) { - selectedMutationLogRunId.value = collected[0]?.id ?? null - } + selectedMutationLogRunId.value = resolveDefaultSelectedMutationLogRunId( + collected, + selectedMutationLogRunId.value, + ) } catch (err) { if (isForbiddenHttpError(err)) { mutationLogLoadError.value = resolveForbiddenReason( @@ -452,11 +447,48 @@ async function loadMutationLogRuns() { } mutationLogRuns.value = [] selectedMutationLogRunId.value = null + mutationLogEntryPreviewPage.value = null } finally { mutationLogLoading.value = false } } +async function loadMutationLogEntryPreviews(offset = 0) { + const run = selectedMutationLogRun.value + if (!run) { + mutationLogEntryPreviewPage.value = null + mutationLogEntryPreviewOffset.value = 0 + return + } + + mutationLogEntryPreviewLoading.value = true + try { + mutationLogEntryPreviewPage.value = await apiFetch<MutationLogEntryPreviewPage>( + buildMutationLogEntryPreviewUrl( + run.data_source_id, + run.id, + offset, + MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE, + ), + ) + mutationLogEntryPreviewOffset.value = offset + } catch (err) { + mutationLogEntryPreviewPage.value = { + entries: [], + total: 0, + offset, + limit: MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE, + preview_available: false, + } + mutationLogEntryPreviewOffset.value = offset + toast.error('Failed to load mutation log entry previews', { + description: extractErrorMessage(err), + }) + } finally { + mutationLogEntryPreviewLoading.value = false + } +} + async function loadExtractionSession() { if (!kgId.value || activeStep.value !== 'graph-management') return sessionLoading.value = true @@ -702,6 +734,10 @@ watch( } }, ) + +watch(selectedMutationLogRunId, () => { + loadMutationLogEntryPreviews(0) +}) </script> <template> @@ -891,6 +927,7 @@ watch( </div> <div v-if="selectedMutationLogRun" class="space-y-3 rounded border p-3"> + <p class="text-xs font-medium text-muted-foreground">Run summary</p> <div class="flex flex-wrap items-center gap-2"> <Badge>{{ selectedMutationLogRun.status }}</Badge> <p class="text-xs text-muted-foreground"> @@ -935,7 +972,7 @@ watch( </div> </div> <div class="rounded border p-3"> - <p class="mb-2 text-xs font-medium text-muted-foreground">Per-entry operation previews</p> + <p class="mb-2 text-xs font-medium text-muted-foreground">Operation class counts</p> <div v-if="Object.keys(selectedMutationLogRun.operation_counts).length === 0" class="text-xs text-muted-foreground"> No operation class counts recorded for this run. </div> @@ -950,6 +987,57 @@ watch( </div> </div> </div> + <div class="rounded border p-3"> + <div class="mb-2 flex items-center justify-between gap-2"> + <p class="text-xs font-medium text-muted-foreground">Per-entry operation previews</p> + <div + v-if="hasMutationLogEntryPreviewPage(mutationLogEntryPreviewPage)" + class="flex items-center gap-1" + > + <Button + size="sm" + variant="ghost" + class="h-6 px-2 text-[10px]" + :disabled="mutationLogEntryPreviewLoading || mutationLogEntryPreviewOffset === 0" + @click="loadMutationLogEntryPreviews(mutationLogEntryPreviewOffset - MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE)" + > + Previous + </Button> + <Button + size="sm" + variant="ghost" + class="h-6 px-2 text-[10px]" + :disabled="mutationLogEntryPreviewLoading || (mutationLogEntryPreviewPage?.offset ?? 0) + (mutationLogEntryPreviewPage?.entries.length ?? 0) >= (mutationLogEntryPreviewPage?.total ?? 0)" + @click="loadMutationLogEntryPreviews(mutationLogEntryPreviewOffset + MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE)" + > + Next + </Button> + </div> + </div> + <div v-if="mutationLogEntryPreviewLoading" class="flex items-center gap-2 text-xs text-muted-foreground"> + <Loader2 class="size-3.5 animate-spin" /> + Loading entry previews... + </div> + <div + v-else-if="!hasMutationLogEntryPreviewPage(mutationLogEntryPreviewPage)" + class="rounded border border-dashed px-3 py-4 text-xs text-muted-foreground" + > + {{ MUTATION_LOG_NO_PREVIEW_MESSAGE }} + </div> + <div v-else class="space-y-1.5"> + <div + v-for="entry in mutationLogEntryPreviewPage?.entries ?? []" + :key="`${entry.line_number}-${entry.operation_class}`" + class="rounded border px-2 py-1.5 text-xs" + > + <div class="flex items-center justify-between gap-2"> + <span class="font-mono">{{ entry.operation_class }}</span> + <span class="text-[10px] text-muted-foreground">Line {{ entry.line_number }}</span> + </div> + <p class="mt-1 text-muted-foreground">{{ entry.summary }}</p> + </div> + </div> + </div> </div> <div v-else class="rounded border border-dashed p-6 text-sm text-muted-foreground"> Select a mutation run to view summary and per-entry previews. diff --git a/src/dev-ui/app/tests/kgMutationLogs.test.ts b/src/dev-ui/app/tests/kgMutationLogs.test.ts new file mode 100644 index 000000000..a02446294 --- /dev/null +++ b/src/dev-ui/app/tests/kgMutationLogs.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, it } from 'vitest' +import { + MUTATION_LOG_NO_PREVIEW_MESSAGE, + buildMutationLogEntryPreviewUrl, + collectScopedMutationLogRuns, + hasMutationLogEntryPreviewPage, + isMutationLogRunForKnowledgeGraph, + resolveDefaultSelectedMutationLogRunId, + sortMutationLogRunsNewestFirst, +} from '../utils/kgMutationLogs' + +const kgId = 'kg-target' + +function makeRun(overrides: Partial<ReturnType<typeof baseRun>> = {}) { + return { ...baseRun(), ...overrides } +} + +function baseRun() { + return { + id: 'run-1', + data_source_id: 'ds-1', + status: 'completed', + started_at: '2026-05-20T10:00:00Z', + completed_at: '2026-05-20T10:05:00Z', + mutation_log_id: 'mlog-1', + knowledge_graph_id: kgId, + session_id: 'sess-1', + actor_id: 'actor-1', + operation_counts: { create_node: 2 }, + token_usage_total: 100, + cost_total_usd: 0.5, + error: null, + } +} + +describe('KG-MANAGE-012 - graph-scoped mutation run list', () => { + it('includes only runs with mutation logs scoped to the selected knowledge graph', () => { + const runs = collectScopedMutationLogRuns( + kgId, + [{ id: 'ds-1', name: 'Source A' }], + { + 'ds-1': [ + makeRun({ id: 'run-a', mutation_log_id: 'mlog-a' }), + makeRun({ id: 'run-b', mutation_log_id: 'mlog-b', knowledge_graph_id: 'kg-other' }), + makeRun({ id: 'run-c', mutation_log_id: null }), + ], + }, + ) + + expect(runs.map((run) => run.id)).toEqual(['run-a']) + expect(runs[0]?.data_source_name).toBe('Source A') + }) + + it('orders runs newest-first by started_at', () => { + const runs = sortMutationLogRunsNewestFirst([ + makeRun({ id: 'older', started_at: '2026-05-01T10:00:00Z' }), + makeRun({ id: 'newer', started_at: '2026-05-22T10:00:00Z' }), + ]) + + expect(runs.map((run) => run.id)).toEqual(['newer', 'older']) + }) + + it('keeps current selection when still present otherwise selects newest run', () => { + const runs = [ + makeRun({ id: 'newest', started_at: '2026-05-22T10:00:00Z' }), + makeRun({ id: 'selected', started_at: '2026-05-21T10:00:00Z' }), + ] + + expect(resolveDefaultSelectedMutationLogRunId(runs, 'selected')).toBe('selected') + expect(resolveDefaultSelectedMutationLogRunId(runs, 'missing')).toBe('newest') + }) + + it('allows legacy runs without knowledge_graph_id when loaded from graph data sources', () => { + expect( + isMutationLogRunForKnowledgeGraph( + { mutation_log_id: 'mlog-legacy', knowledge_graph_id: null }, + kgId, + ), + ).toBe(true) + }) +}) + +describe('KG-MANAGE-013 - run detail richness helpers', () => { + it('builds paginated mutation-log entry preview URLs', () => { + expect(buildMutationLogEntryPreviewUrl('ds-1', 'run-1')).toBe( + '/management/data-sources/ds-1/sync-runs/run-1/mutation-log-entries?offset=0&limit=20', + ) + expect(buildMutationLogEntryPreviewUrl('ds-1', 'run-1', 20, 10)).toBe( + '/management/data-sources/ds-1/sync-runs/run-1/mutation-log-entries?offset=20&limit=10', + ) + }) +}) + +describe('KG-MANAGE-014 - no-preview fallback helpers', () => { + it('uses explicit no-preview messaging constant', () => { + expect(MUTATION_LOG_NO_PREVIEW_MESSAGE).toContain('not available') + }) + + it('detects when entry preview pages are unavailable', () => { + expect(hasMutationLogEntryPreviewPage(null)).toBe(false) + expect( + hasMutationLogEntryPreviewPage({ + entries: [], + total: 0, + offset: 0, + limit: 20, + preview_available: false, + }), + ).toBe(false) + expect( + hasMutationLogEntryPreviewPage({ + entries: [{ line_number: 1, operation_class: 'create_node', summary: 'Create Person' }], + total: 1, + offset: 0, + limit: 20, + preview_available: true, + }), + ).toBe(true) + }) +}) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index e020cd2ec..dbcadf16d 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -90,7 +90,7 @@ describe('Knowledge Graph Manage Workspace - mutation log browser', () => { it('loads sync runs per data source and filters to mutation-log runs', () => { expect(manageWorkspaceVue).toContain('/management/data-sources/${ds.id}/sync-runs') - expect(manageWorkspaceVue).toContain('if (!run.mutation_log_id) continue') + expect(manageWorkspaceVue).toContain('collectScopedMutationLogRuns') }) it('renders run detail summary with token and cost metrics', () => { @@ -100,10 +100,53 @@ describe('Knowledge Graph Manage Workspace - mutation log browser', () => { expect(manageWorkspaceVue).toContain('cost_total_usd') }) - it('renders per-entry operation preview rows from operation_counts', () => { + it('separates operation class counts from per-entry previews', () => { + expect(manageWorkspaceVue).toContain('Operation class counts') expect(manageWorkspaceVue).toContain('Per-entry operation previews') - expect(manageWorkspaceVue).toContain('operation_counts') expect(manageWorkspaceVue).toContain('Object.entries(selectedMutationLogRun.operation_counts)') + expect(manageWorkspaceVue).toContain('loadMutationLogEntryPreviews') + }) +}) + +describe('KG-MANAGE-012 - graph-scoped mutation run list', () => { + it('loads runs only from graph-scoped data sources with KG metadata filtering', () => { + expect(manageWorkspaceVue).toContain('collectScopedMutationLogRuns') + expect(manageWorkspaceVue).toContain('knowledge_graph_id') + }) + + it('defaults run list ordering to newest-first', () => { + expect(manageWorkspaceVue).toContain('collectScopedMutationLogRuns') + expect(manageWorkspaceVue).toContain('resolveDefaultSelectedMutationLogRunId') + }) + + it('shows status, timestamp, source, and run identifier in run list items', () => { + expect(manageWorkspaceVue).toContain('run.data_source_name') + expect(manageWorkspaceVue).toContain('run.started_at') + expect(manageWorkspaceVue).toContain('run.status') + expect(manageWorkspaceVue).toContain('run.mutation_log_id') + }) +}) + +describe('KG-MANAGE-013 - run detail richness', () => { + it('renders run summary, session reference, token/cost metrics, and operation counts', () => { + expect(manageWorkspaceVue).toContain('Run summary') + expect(manageWorkspaceVue).toContain('Session') + expect(manageWorkspaceVue).toContain('Token usage') + expect(manageWorkspaceVue).toContain('Cost (USD)') + expect(manageWorkspaceVue).toContain('Operation class counts') + }) + + it('loads paginated per-entry previews from mutation-log-entries API', () => { + expect(manageWorkspaceVue).toContain('buildMutationLogEntryPreviewUrl') + expect(manageWorkspaceVue).toContain('loadMutationLogEntryPreviews') + expect(manageWorkspaceVue).toContain('mutationLogEntryPreviewPage') + }) +}) + +describe('KG-MANAGE-014 - no-preview fallback state', () => { + it('shows explicit fallback when entry previews are unavailable', () => { + expect(manageWorkspaceVue).toContain('MUTATION_LOG_NO_PREVIEW_MESSAGE') + expect(manageWorkspaceVue).toContain('hasMutationLogEntryPreviewPage') }) }) diff --git a/src/dev-ui/app/utils/kgMutationLogs.ts b/src/dev-ui/app/utils/kgMutationLogs.ts new file mode 100644 index 000000000..1cf1a6a58 --- /dev/null +++ b/src/dev-ui/app/utils/kgMutationLogs.ts @@ -0,0 +1,100 @@ +export interface MutationLogRunRecord { + id: string + data_source_id: string + data_source_name?: string + status: string + started_at: string + completed_at: string | null + mutation_log_id: string | null + knowledge_graph_id: string | null + session_id: string | null + actor_id: string | null + operation_counts: Record<string, number> + token_usage_total: number | null + cost_total_usd: number | null + error: string | null +} + +export interface MutationLogEntryPreview { + line_number: number + operation_class: string + summary: string +} + +export interface MutationLogEntryPreviewPage { + entries: MutationLogEntryPreview[] + total: number + offset: number + limit: number + preview_available: boolean +} + +export const MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE = 20 + +export const MUTATION_LOG_NO_PREVIEW_MESSAGE = + 'Detailed entry previews are not available for this run yet.' + +export function isMutationLogRunForKnowledgeGraph( + run: Pick<MutationLogRunRecord, 'mutation_log_id' | 'knowledge_graph_id'>, + kgId: string, +): boolean { + if (!run.mutation_log_id) return false + if (run.knowledge_graph_id != null && run.knowledge_graph_id !== kgId) return false + return true +} + +export function sortMutationLogRunsNewestFirst<T extends { started_at: string }>( + runs: T[], +): T[] { + return [...runs].sort( + (a, b) => new Date(b.started_at).getTime() - new Date(a.started_at).getTime(), + ) +} + +export function resolveDefaultSelectedMutationLogRunId( + runs: Array<{ id: string }>, + currentId: string | null, +): string | null { + if (currentId && runs.some((run) => run.id === currentId)) return currentId + return runs[0]?.id ?? null +} + +export function collectScopedMutationLogRuns( + kgId: string, + dataSources: Array<{ id: string; name: string }>, + runsByDataSourceId: Record<string, MutationLogRunRecord[]>, +): MutationLogRunRecord[] { + const collected: MutationLogRunRecord[] = [] + + for (const ds of dataSources) { + const runs = runsByDataSourceId[ds.id] ?? [] + for (const run of runs) { + if (!isMutationLogRunForKnowledgeGraph(run, kgId)) continue + collected.push({ + ...run, + data_source_name: ds.name, + }) + } + } + + return sortMutationLogRunsNewestFirst(collected) +} + +export function buildMutationLogEntryPreviewUrl( + dataSourceId: string, + runId: string, + offset = 0, + limit = MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE, +): string { + const params = new URLSearchParams({ + offset: String(offset), + limit: String(limit), + }) + return `/management/data-sources/${encodeURIComponent(dataSourceId)}/sync-runs/${encodeURIComponent(runId)}/mutation-log-entries?${params}` +} + +export function hasMutationLogEntryPreviewPage( + page: MutationLogEntryPreviewPage | null, +): boolean { + return page?.preview_available === true && page.entries.length > 0 +} From 55c8e1e16b2993b95a1ca3c11396733b13c8ab7b Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 22 May 2026 13:17:24 -0400 Subject: [PATCH 048/153] feat(manage-ui): unify graph management operations in-place (#720) Keep extraction jobs, sync run logs, and one-off mutation actions inside KG Manage so users can complete workflows without cross-page redirects. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../pages/knowledge-graphs/[kgId]/manage.vue | 261 +++++++++++++++++- .../knowledge-graph-manage-workspace.test.ts | 26 +- 2 files changed, 272 insertions(+), 15 deletions(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 21342aeb1..017847d7e 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -22,8 +22,6 @@ import { type GraphManagementRailItemId, } from '@/utils/kgGraphManagement' import { - buildDataSourcesStepUrl, - buildMaintainStepUrl, buildManageStepUrl, buildSuggestedNextStep, buildWorkspaceStepCards, @@ -51,6 +49,7 @@ import { type MutationLogEntryPreviewPage, type MutationLogRunRecord, } from '@/utils/kgMutationLogs' +import { useGraphApi } from '@/composables/api/useGraphApi' interface WorkspaceReadinessStatus { has_minimum_entity_types: boolean @@ -91,6 +90,13 @@ interface MutationLogRunView extends MutationLogRunRecord { data_source_name: string } +interface InlineSyncRun { + id: string + status: string + started_at: string + completed_at: string | null +} + interface ExtractionSessionResponse { id: string message_history: Array<{ role?: string; content?: string; message?: string }> @@ -123,6 +129,7 @@ const route = useRoute() const { hasTenant, tenantVersion } = useTenant() const { extractErrorMessage } = useErrorHandler() const { apiFetch } = useApiClient() +const graphApi = useGraphApi() const kgId = computed(() => String(route.params.kgId ?? '')) const kgIdentity = ref<KnowledgeGraphIdentity | null>(null) const dataSourceCount = ref(0) @@ -153,6 +160,21 @@ const selectedRailItemId = ref<GraphManagementRailItemId | null>(null) const mutationLogEntryPreviewLoading = ref(false) const mutationLogEntryPreviewPage = ref<MutationLogEntryPreviewPage | null>(null) const mutationLogEntryPreviewOffset = ref(0) +const graphManagementDataSources = ref<DataSourceRef[]>([]) +const graphManagementDataSourcesLoading = ref(false) +const graphManagementDataSourcesError = ref<string | null>(null) +const selectedOpsDataSourceId = ref<string | null>(null) +const inlineSyncRuns = ref<InlineSyncRun[]>([]) +const inlineSyncRunsLoading = ref(false) +const inlineSyncRunsError = ref<string | null>(null) +const inlineSyncTriggering = ref(false) +const selectedInlineRunId = ref<string | null>(null) +const inlineRunLogs = ref<string[]>([]) +const inlineRunLogsLoading = ref(false) +const inlineRunLogsError = ref<string | null>(null) +const inlineMutationJsonl = ref('') +const inlineMutationApplying = ref(false) +const inlineMutationApplyError = ref<string | null>(null) const activeStep = computed(() => parseManageStepQuery(route.query.step)) const showOverview = computed(() => activeStep.value === null) @@ -278,6 +300,10 @@ const selectedMutationLogRun = computed(() => mutationLogRuns.value.find((run) => run.id === selectedMutationLogRunId.value) ?? null, ) +const selectedOpsDataSource = computed(() => + graphManagementDataSources.value.find((ds) => ds.id === selectedOpsDataSourceId.value) ?? null, +) + const progressChecklist = computed(() => { const readiness = statusProjection.value?.readiness if (!readiness) return [] @@ -362,6 +388,104 @@ async function loadOverviewMetrics() { } } +async function loadGraphManagementDataSources() { + if (!hasTenant.value || !kgId.value || activeStep.value !== 'graph-management') return + graphManagementDataSourcesLoading.value = true + graphManagementDataSourcesError.value = null + try { + const dataSources = await apiFetch<DataSourceRef[]>( + `/management/knowledge-graphs/${kgId.value}/data-sources`, + ) + graphManagementDataSources.value = dataSources + if ( + !selectedOpsDataSourceId.value + || !dataSources.some((ds) => ds.id === selectedOpsDataSourceId.value) + ) { + selectedOpsDataSourceId.value = dataSources[0]?.id ?? null + } + } catch (err) { + graphManagementDataSources.value = [] + selectedOpsDataSourceId.value = null + graphManagementDataSourcesError.value = extractErrorMessage(err) + } finally { + graphManagementDataSourcesLoading.value = false + } +} + +async function loadInlineSyncRuns() { + if (!selectedOpsDataSourceId.value) { + inlineSyncRuns.value = [] + return + } + inlineSyncRunsLoading.value = true + inlineSyncRunsError.value = null + try { + const runs = await apiFetch<InlineSyncRun[]>( + `/management/data-sources/${selectedOpsDataSourceId.value}/sync-runs`, + ) + inlineSyncRuns.value = runs + selectedInlineRunId.value = runs[0]?.id ?? null + } catch (err) { + inlineSyncRuns.value = [] + selectedInlineRunId.value = null + inlineSyncRunsError.value = extractErrorMessage(err) + } finally { + inlineSyncRunsLoading.value = false + } +} + +async function triggerInlineSync() { + if (!selectedOpsDataSourceId.value) return + inlineSyncTriggering.value = true + try { + await apiFetch(`/management/data-sources/${selectedOpsDataSourceId.value}/sync`, { method: 'POST' }) + toast.success('Sync triggered') + await loadInlineSyncRuns() + } catch (err) { + toast.error('Failed to trigger sync', { description: extractErrorMessage(err) }) + } finally { + inlineSyncTriggering.value = false + } +} + +async function loadInlineRunLogs(runId: string) { + if (!selectedOpsDataSourceId.value) return + selectedInlineRunId.value = runId + inlineRunLogsLoading.value = true + inlineRunLogsError.value = null + try { + const result = await apiFetch<{ logs: string[] }>( + `/management/data-sources/${selectedOpsDataSourceId.value}/sync-runs/${runId}/logs`, + ) + inlineRunLogs.value = result.logs ?? [] + } catch (err) { + inlineRunLogs.value = [] + inlineRunLogsError.value = extractErrorMessage(err) + } finally { + inlineRunLogsLoading.value = false + } +} + +async function applyInlineMutations() { + if (!kgId.value || inlineMutationJsonl.value.trim().length === 0) { + inlineMutationApplyError.value = 'Add one or more JSONL mutation operations first.' + return + } + inlineMutationApplying.value = true + inlineMutationApplyError.value = null + try { + await graphApi.applyMutations(kgId.value, inlineMutationJsonl.value.trim()) + toast.success('Mutations applied') + inlineMutationJsonl.value = '' + await loadMutationLogRuns() + } catch (err) { + inlineMutationApplyError.value = extractErrorMessage(err) + toast.error('Failed to apply mutations', { description: inlineMutationApplyError.value }) + } finally { + inlineMutationApplying.value = false + } +} + function openWorkspaceStep(stepId: WorkspaceStepId) { navigateTo(resolveStepDestination(kgId.value, stepId)) } @@ -731,6 +855,7 @@ watch( syncGraphManagementState() loadExtractionSession() loadSessionHistory() + loadGraphManagementDataSources() } }, ) @@ -738,6 +863,13 @@ watch( watch(selectedMutationLogRunId, () => { loadMutationLogEntryPreviews(0) }) + +watch(selectedOpsDataSourceId, () => { + inlineRunLogs.value = [] + inlineRunLogsError.value = null + selectedInlineRunId.value = null + loadInlineSyncRuns() +}) </script> <template> @@ -1358,33 +1490,140 @@ watch(selectedMutationLogRunId, () => { <template v-else-if="graphManagementMode === 'extraction-jobs'"> <p class="text-muted-foreground"> - Trigger extraction and maintenance controls from the data sources operations panel. + Trigger extraction jobs, inspect run history, and view run logs without leaving this workspace. </p> + <div class="space-y-3 rounded border p-3"> + <p class="text-xs font-medium text-muted-foreground">Data source</p> + <div + v-if="graphManagementDataSourcesLoading" + class="flex items-center gap-2 text-xs text-muted-foreground" + > + <Loader2 class="size-3.5 animate-spin" /> + Loading data sources... + </div> + <div v-else-if="graphManagementDataSourcesError" class="text-xs text-destructive"> + {{ graphManagementDataSourcesError }} + </div> + <div + v-else-if="graphManagementDataSources.length === 0" + class="text-xs text-muted-foreground" + > + No data sources are connected to this knowledge graph yet. + </div> + <div v-else class="flex flex-wrap gap-2"> + <Button + v-for="ds in graphManagementDataSources" + :key="ds.id" + size="sm" + :variant="selectedOpsDataSourceId === ds.id ? 'default' : 'outline'" + @click="selectedOpsDataSourceId = ds.id" + > + {{ ds.name }} + </Button> + </div> + </div> <div class="flex flex-wrap gap-2"> <Button size="sm" variant="outline" - @click="navigateTo(buildDataSourcesStepUrl(kgId))" + :disabled="!selectedOpsDataSourceId || inlineSyncTriggering" + @click="triggerInlineSync" > - Open Data Source Operations + <Loader2 v-if="inlineSyncTriggering" class="mr-1.5 size-3.5 animate-spin" /> + Trigger Sync </Button> <Button size="sm" variant="outline" - @click="navigateTo(buildMaintainStepUrl(kgId))" + :disabled="!selectedOpsDataSourceId || inlineSyncRunsLoading" + @click="loadInlineSyncRuns" > - Open Maintain Step + Refresh Runs </Button> </div> + <div class="grid gap-3 xl:grid-cols-[300px_1fr]"> + <div class="rounded border"> + <div class="border-b px-3 py-2 text-xs font-medium text-muted-foreground">Sync runs</div> + <div + v-if="inlineSyncRunsLoading" + class="flex items-center gap-2 px-3 py-4 text-xs text-muted-foreground" + > + <Loader2 class="size-3.5 animate-spin" /> + Loading sync runs... + </div> + <div v-else-if="inlineSyncRunsError" class="px-3 py-4 text-xs text-destructive"> + {{ inlineSyncRunsError }} + </div> + <div v-else-if="inlineSyncRuns.length === 0" class="px-3 py-4 text-xs text-muted-foreground"> + No sync runs found for this data source. + </div> + <div v-else class="max-h-72 space-y-1.5 overflow-auto p-2"> + <button + v-for="run in inlineSyncRuns" + :key="run.id" + class="w-full rounded border px-2 py-1.5 text-left text-xs transition-colors" + :class="selectedInlineRunId === run.id ? 'border-primary bg-primary/5' : 'hover:bg-muted/40'" + @click="loadInlineRunLogs(run.id)" + > + <div class="flex items-center justify-between gap-2"> + <span class="font-mono">{{ run.id }}</span> + <Badge variant="outline" class="text-[10px]">{{ run.status }}</Badge> + </div> + <p class="mt-1 text-muted-foreground"> + {{ new Date(run.started_at).toLocaleString() }} + </p> + </button> + </div> + </div> + <div class="rounded border p-3"> + <p class="mb-2 text-xs font-medium text-muted-foreground"> + Run logs + <span v-if="selectedOpsDataSource" class="font-normal text-muted-foreground/80"> + · {{ selectedOpsDataSource.name }} + </span> + </p> + <div v-if="inlineRunLogsLoading" class="flex items-center gap-2 text-xs text-muted-foreground"> + <Loader2 class="size-3.5 animate-spin" /> + Loading logs... + </div> + <div v-else-if="inlineRunLogsError" class="text-xs text-destructive"> + {{ inlineRunLogsError }} + </div> + <div v-else-if="inlineRunLogs.length === 0" class="text-xs text-muted-foreground"> + Select a sync run to view logs. + </div> + <pre + v-else + class="max-h-72 overflow-auto rounded border bg-muted/20 p-2 text-[11px]" + >{{ inlineRunLogs.join('\n') }}</pre> + </div> + </div> </template> <template v-else-if="graphManagementMode === 'one-off-mutations'"> <p class="text-muted-foreground"> - Open the mutation editor scoped to this knowledge graph for minor direct edits. + Author and apply one-off JSONL mutations directly in this workspace. </p> - <Button size="sm" @click="navigateTo(`/graph/mutations?kg_id=${kgId}&view=editor`)"> - Open Manual Mutations - </Button> + <div class="space-y-3 rounded border p-3"> + <p class="text-xs font-medium text-muted-foreground">Mutation payload (JSONL)</p> + <textarea + v-model="inlineMutationJsonl" + class="min-h-44 w-full rounded border bg-background px-3 py-2 font-mono text-xs" + placeholder='{"op":"CREATE","type":"node","label":"repo","id":"repo:example","set_properties":{"name":"example"}}' + /> + <div class="flex items-center gap-2"> + <Button size="sm" :disabled="inlineMutationApplying" @click="applyInlineMutations"> + <Loader2 v-if="inlineMutationApplying" class="mr-1.5 size-3.5 animate-spin" /> + Apply Mutations + </Button> + <span class="text-xs text-muted-foreground"> + Applies directly to this knowledge graph without page navigation. + </span> + </div> + <p v-if="inlineMutationApplyError" class="text-xs text-destructive"> + {{ inlineMutationApplyError }} + </p> + </div> </template> <template v-else> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index dbcadf16d..5c9c74b24 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -293,8 +293,8 @@ describe('KG-MANAGE-004 - step card status semantics', () => { }) describe('KG-MANAGE-005 - graph-scoped data sources step', () => { - it('routes Data Sources step with kg_id and manage return context', () => { - expect(manageWorkspaceVue).toContain('buildDataSourcesStepUrl') + it('keeps data-sources route utility for workspace cards but not graph-management redirects', () => { + expect(manageWorkspaceVue).not.toContain('navigateTo(buildDataSourcesStepUrl(kgId))') expect(buildDataSourcesStepUrl('kg-abc')).toBe('/data-sources?kg_id=kg-abc&from=manage') }) @@ -306,8 +306,8 @@ describe('KG-MANAGE-005 - graph-scoped data sources step', () => { }) describe('KG-MANAGE-015 - graph-scoped maintain step and round trip', () => { - it('routes Maintain step with graph scope and maintenance focus', () => { - expect(manageWorkspaceVue).toContain('buildMaintainStepUrl') + it('keeps maintain route utility for workspace cards but not graph-management redirects', () => { + expect(manageWorkspaceVue).not.toContain('navigateTo(buildMaintainStepUrl(kgId))') expect(buildMaintainStepUrl('kg-abc')).toBe( '/data-sources?kg_id=kg-abc&from=manage&focus=maintain', ) @@ -585,3 +585,21 @@ describe('KG-MANAGE-020 - forbidden and disabled action restrictions', () => { expect(manageWorkspaceVue).toContain('statusProjection.value = previousStatus') }) }) + +describe('KG-MANAGE-021 - unified in-place graph operations', () => { + it('runs extraction jobs and logs directly in graph-management without data-sources redirect', () => { + expect(manageWorkspaceVue).toContain('triggerInlineSync') + expect(manageWorkspaceVue).toContain('loadInlineSyncRuns') + expect(manageWorkspaceVue).toContain('loadInlineRunLogs') + expect(manageWorkspaceVue).toContain('Run logs') + expect(manageWorkspaceVue).not.toContain('Open Data Source Operations') + expect(manageWorkspaceVue).not.toContain('Open Maintain Step') + }) + + it('applies one-off mutations directly in graph-management without mutations-console redirect', () => { + expect(manageWorkspaceVue).toContain('inlineMutationJsonl') + expect(manageWorkspaceVue).toContain('applyInlineMutations') + expect(manageWorkspaceVue).toContain('graphApi.applyMutations') + expect(manageWorkspaceVue).not.toContain('navigateTo(`/graph/mutations?kg_id=${kgId}&view=editor`)') + }) +}) From 9f81a7c5f9f6d2985850527d7cdb9a5212352e62 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 22 May 2026 13:17:29 -0400 Subject: [PATCH 049/153] feat(management): add per-run mutation entry previews (#721) Expose paginated per-entry mutation previews from sync run metadata so run detail panels can show concrete operation samples beyond aggregate counts. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../presentation/data_sources/routes.py | 46 ++++++++++-- .../presentation/test_data_sources_routes.py | 75 ++++++++++++++++++- 2 files changed, 114 insertions(+), 7 deletions(-) diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index dc8c64560..c73f1b16f 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -39,6 +39,29 @@ router = APIRouter(tags=["data-sources"]) +def _build_operation_count_entry_previews( + operation_counts: dict[str, int], +) -> list[tuple[int, str, str]]: + """Expand operation counts into stable, per-entry preview rows.""" + previews: list[tuple[int, str, str]] = [] + line_number = 1 + for operation_class in sorted(operation_counts.keys()): + raw_count = operation_counts.get(operation_class, 0) + count = int(raw_count) if raw_count is not None else 0 + if count <= 0: + continue + for occurrence in range(1, count + 1): + previews.append( + ( + line_number, + operation_class, + f"{operation_class} operation {occurrence} of {count}", + ) + ) + line_number += 1 + return previews + + @router.post( "/data-sources/{ds_id}/commit-refs/refresh", status_code=status.HTTP_200_OK, @@ -724,8 +747,8 @@ async def list_mutation_log_entry_previews( ) -> MutationLogEntryPreviewPageResponse: """List paginated mutation-log entry previews for a sync run. - Returns an empty page with ``preview_available=false`` until mutation-log - storage is wired for per-entry retrieval (#721 follow-on). + Entry previews are derived from recorded per-run operation counts, + giving users line-by-line visibility beyond aggregate totals. """ try: ds = await service.get( @@ -756,12 +779,25 @@ async def list_mutation_log_entry_previews( preview_available=False, ) + expanded_previews = _build_operation_count_entry_previews( + sync_run.mutation_log_run.operation_counts + ) + total = len(expanded_previews) + page = expanded_previews[offset : offset + limit] + return MutationLogEntryPreviewPageResponse( - entries=[], - total=0, + entries=[ + { + "line_number": line_number, + "operation_class": operation_class, + "summary": summary, + } + for line_number, operation_class, summary in page + ], + total=total, offset=offset, limit=limit, - preview_available=False, + preview_available=total > 0, ) except HTTPException: diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index b7e613ade..5ad48dae2 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -538,7 +538,7 @@ def test_list_sync_runs_returns_404_when_ds_not_found( class TestMutationLogEntryPreviewRoutes: """Tests for GET /management/data-sources/{ds_id}/sync-runs/{run_id}/mutation-log-entries.""" - def test_list_mutation_log_entries_returns_paginated_skeleton_when_storage_unavailable( + def test_list_mutation_log_entries_returns_paginated_previews_from_operation_counts( self, test_client: TestClient, mock_ds_service: AsyncMock, @@ -564,7 +564,78 @@ def test_list_mutation_log_entries_returns_paginated_skeleton_when_storage_unava assert response.status_code == status.HTTP_200_OK payload = response.json() - assert payload == { + assert payload["total"] == 3 + assert payload["offset"] == 0 + assert payload["limit"] == 20 + assert payload["preview_available"] is True + assert payload["entries"][0] == { + "line_number": 1, + "operation_class": "create_node", + "summary": "create_node operation 1 of 3", + } + assert payload["entries"][2] == { + "line_number": 3, + "operation_class": "create_node", + "summary": "create_node operation 3 of 3", + } + + def test_list_mutation_log_entries_honors_offset_and_limit( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_sync_run_repo: AsyncMock, + sample_data_source: DataSource, + sample_sync_run: DataSourceSyncRun, + ) -> None: + sample_sync_run.mutation_log_run = MutationLogRunMetadata( + mutation_log_id="mlog-preview-2", + knowledge_graph_id=sample_data_source.knowledge_graph_id, + session_id="sess-preview-2", + actor_id="actor-preview-2", + started_at=sample_sync_run.started_at, + operation_counts={"create_edge": 1, "create_node": 2}, + ) + mock_ds_service.get.return_value = sample_data_source + mock_sync_run_repo.get_by_id.return_value = sample_sync_run + + response = test_client.get( + f"/management/data-sources/{sample_data_source.id.value}/sync-runs/" + f"{sample_sync_run.id}/mutation-log-entries?offset=1&limit=1" + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["total"] == 3 + assert payload["offset"] == 1 + assert payload["limit"] == 1 + assert payload["preview_available"] is True + assert payload["entries"] == [ + { + "line_number": 2, + "operation_class": "create_node", + "summary": "create_node operation 1 of 2", + } + ] + + def test_list_mutation_log_entries_returns_unavailable_when_no_mutation_metadata( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + mock_sync_run_repo: AsyncMock, + sample_data_source: DataSource, + sample_sync_run: DataSourceSyncRun, + ) -> None: + sample_sync_run.mutation_log_run = None + mock_ds_service.get.return_value = sample_data_source + mock_sync_run_repo.get_by_id.return_value = sample_sync_run + + response = test_client.get( + f"/management/data-sources/{sample_data_source.id.value}/sync-runs/" + f"{sample_sync_run.id}/mutation-log-entries?offset=0&limit=20" + ) + + assert response.status_code == status.HTTP_200_OK + assert response.json() == { "entries": [], "total": 0, "offset": 0, From 6867e0a2e4b7c1c7adab9761d6c52b1a254bb350 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 22 May 2026 13:17:36 -0400 Subject: [PATCH 050/153] feat(extraction): wire outbox workers to runtime factory adapters (#716) Route extraction outbox handling through the workload runtime factory so configured container-backed launchers are used instead of hardcoded in-memory launchers. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/api/main.py | 8 ++++---- .../infrastructure/test_workload_runtime_factory.py | 9 +++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/api/main.py b/src/api/main.py index fba1699f1..257c9f0a6 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -344,9 +344,9 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: from extraction.infrastructure.runtime_context_builder import ( FilesystemExtractionRuntimeContextBuilder, ) - from extraction.infrastructure.workload_runtime import ( - InMemoryEphemeralExtractionWorkerLauncher, - ScopedWorkloadCredentialIssuer, + from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer + from extraction.infrastructure.workload_runtime_factory import ( + create_ephemeral_extraction_worker_launcher, ) from management.domain.value_objects import KnowledgeGraphId from management.infrastructure.repositories.knowledge_graph_repository import ( @@ -367,7 +367,7 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: credential_issuer=ScopedWorkloadCredentialIssuer( default_ttl=timedelta(minutes=15) ), - worker_launcher=InMemoryEphemeralExtractionWorkerLauncher(), + worker_launcher=create_ephemeral_extraction_worker_launcher(), ) tenant_id = str(payload.get("tenant_id", "")) if payload.get("tenant_id") else "" diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py index ac44244f9..c6f3afa61 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py @@ -2,6 +2,8 @@ from __future__ import annotations +from pathlib import Path + from extraction.infrastructure.container_workload_runtime import ( ContainerEphemeralExtractionWorkerLauncher, ContainerStickySessionRuntimeManager, @@ -40,3 +42,10 @@ def test_container_backend_returns_container_adapters(self) -> None: assert isinstance(sticky, ContainerStickySessionRuntimeManager) assert isinstance(worker, ContainerEphemeralExtractionWorkerLauncher) + + def test_outbox_extraction_handler_uses_runtime_factory_wiring(self) -> None: + main_source = Path(__file__).resolve().parents[4] / "main.py" + content = main_source.read_text(encoding="utf-8") + + assert "create_ephemeral_extraction_worker_launcher" in content + assert "InMemoryEphemeralExtractionWorkerLauncher" not in content From dcdc1a1383ae1c5b55cff92b5b44bf05f53828c2 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 22 May 2026 13:17:43 -0400 Subject: [PATCH 051/153] feat(management): enforce graph-native canonical schema source (#718) Remove legacy ontology fallback reads/writes so canonical schema flows only through graph-native storage and update service tests to require canonical repository wiring. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../services/knowledge_graph_service.py | 17 +++---- .../test_canonical_schema_service.py | 25 ++++++++++ .../test_knowledge_graph_service.py | 46 ++++++++++++++----- 3 files changed, 66 insertions(+), 22 deletions(-) diff --git a/src/api/management/application/services/knowledge_graph_service.py b/src/api/management/application/services/knowledge_graph_service.py index ea9101d26..afd20c6ba 100644 --- a/src/api/management/application/services/knowledge_graph_service.py +++ b/src/api/management/application/services/knowledge_graph_service.py @@ -813,21 +813,18 @@ async def save_ontology( if kg is None or kg.tenant_id != self._scope_to_tenant: raise KnowledgeGraphNotFoundError(f"Knowledge graph {kg_id} not found") - if self._canonical_schema_repo is not None: - await self._canonical_schema_repo.replace_ontology(kg_id, config) - else: - await self._kg_repo.save_ontology(kg_id, config) + if self._canonical_schema_repo is None: + raise ValueError("Canonical schema repository is not configured") + await self._canonical_schema_repo.replace_ontology(kg_id, config) await self._session.commit() return config async def _resolve_canonical_ontology(self, kg_id: str) -> OntologyConfig | None: - """Load canonical schema from graph-native storage with JSONB fallback.""" - if self._canonical_schema_repo is not None: - canonical = await self._canonical_schema_repo.get_ontology(kg_id) - if canonical is not None: - return canonical - return await self._kg_repo.get_ontology(kg_id) + """Load canonical schema from graph-native storage only.""" + if self._canonical_schema_repo is None: + return None + return await self._canonical_schema_repo.get_ontology(kg_id) def _evaluate_workspace_readiness( self, ontology: OntologyConfig | None diff --git a/src/api/tests/unit/management/application/test_canonical_schema_service.py b/src/api/tests/unit/management/application/test_canonical_schema_service.py index 07f41aae7..a453fba2a 100644 --- a/src/api/tests/unit/management/application/test_canonical_schema_service.py +++ b/src/api/tests/unit/management/application/test_canonical_schema_service.py @@ -132,3 +132,28 @@ async def test_workspace_readiness_uses_canonical_schema( assert result is not None assert result.transition_eligible is True + + @pytest.mark.asyncio + async def test_save_ontology_requires_canonical_repository_configuration( + self, mock_session, kg_repo, authz, tenant_id, user_id + ): + service_without_canonical = KnowledgeGraphService( + session=mock_session, + knowledge_graph_repository=kg_repo, + data_source_repository=InMemoryDataSourceRepository(), + secret_store=InMemorySecretStoreRepository(), + authz=authz, + scope_to_tenant=tenant_id, + probe=RecordingKnowledgeGraphServiceProbe(), + canonical_schema_repository=None, + ) + kg = _make_kg() + kg_repo.seed(kg) + await _grant_kg_edit(authz, kg.id.value, user_id) + + with pytest.raises(ValueError, match="Canonical schema repository is not configured"): + await service_without_canonical.save_ontology( + user_id=user_id, + kg_id=kg.id.value, + config=OntologyConfig(), + ) diff --git a/src/api/tests/unit/management/application/test_knowledge_graph_service.py b/src/api/tests/unit/management/application/test_knowledge_graph_service.py index 1d7e63e8e..710a6cb0d 100644 --- a/src/api/tests/unit/management/application/test_knowledge_graph_service.py +++ b/src/api/tests/unit/management/application/test_knowledge_graph_service.py @@ -42,6 +42,7 @@ ) from shared_kernel.authorization.types import Permission from tests.fakes.authorization import InMemoryAuthorizationProvider +from tests.fakes.canonical_schema import InMemoryCanonicalSchemaRepository from tests.fakes.management import ( InMemoryDataSourceRepository, InMemoryKnowledgeGraphRepository, @@ -115,7 +116,22 @@ def workspace_id(): @pytest.fixture -def service(mock_session, kg_repo, ds_repo, secret_store, authz, probe, tenant_id): +def canonical_schema_repo(): + """In-memory canonical schema repository.""" + return InMemoryCanonicalSchemaRepository() + + +@pytest.fixture +def service( + mock_session, + kg_repo, + ds_repo, + secret_store, + authz, + probe, + tenant_id, + canonical_schema_repo, +): """KnowledgeGraphService wired with in-memory fakes.""" return KnowledgeGraphService( session=mock_session, @@ -125,6 +141,7 @@ def service(mock_session, kg_repo, ds_repo, secret_store, authz, probe, tenant_i authz=authz, scope_to_tenant=tenant_id, probe=probe, + canonical_schema_repository=canonical_schema_repo, ) @@ -156,11 +173,16 @@ def _make_kg( return kg -async def _seed_stored_ontology(kg, kg_repo, config: OntologyConfig) -> None: - """Attach ontology to aggregate and persisted JSONB fallback store.""" +async def _seed_stored_ontology( + kg, + kg_repo, + canonical_schema_repo: InMemoryCanonicalSchemaRepository, + config: OntologyConfig, +) -> None: + """Attach ontology to aggregate and canonical schema store.""" kg.set_ontology(config) kg_repo.seed(kg) - await kg_repo.save_ontology(kg.id.value, config) + canonical_schema_repo.seed(kg.id.value, config) def _make_ds( @@ -448,7 +470,7 @@ async def test_workspace_status_returns_none_when_view_denied( @pytest.mark.asyncio async def test_workspace_status_includes_mode_readiness_and_session_pointers( - self, service, authz, kg_repo, user_id + self, service, authz, kg_repo, canonical_schema_repo, user_id ): """Should project mode/readiness flags and default null session pointers.""" kg = _make_kg() @@ -462,7 +484,7 @@ async def test_workspace_status_includes_mode_readiness_and_session_pointers( ), ), ) - await _seed_stored_ontology(kg, kg_repo, ontology_config) + await _seed_stored_ontology(kg, kg_repo, canonical_schema_repo, ontology_config) await _grant_kg_view(authz, kg.id.value, user_id) result = await service.get_workspace_status(user_id=user_id, kg_id=kg.id.value) @@ -502,7 +524,7 @@ async def test_workspace_status_transition_not_eligible_without_schema_readiness @pytest.mark.asyncio async def test_workspace_status_fails_for_prepopulated_type_without_instances( - self, service, authz, kg_repo, user_id + self, service, authz, kg_repo, canonical_schema_repo, user_id ): """Should block transition when prepopulated type has zero instances.""" kg = _make_kg() @@ -522,7 +544,7 @@ async def test_workspace_status_fails_for_prepopulated_type_without_instances( ), ), ) - await _seed_stored_ontology(kg, kg_repo, ontology_config) + await _seed_stored_ontology(kg, kg_repo, canonical_schema_repo, ontology_config) await _grant_kg_view(authz, kg.id.value, user_id) result = await service.get_workspace_status(user_id=user_id, kg_id=kg.id.value) @@ -562,7 +584,7 @@ async def test_validate_workspace_returns_projection_when_authorized( @pytest.mark.asyncio async def test_transition_workspace_requires_edit_permission( - self, service, authz, kg_repo, user_id + self, service, authz, kg_repo, canonical_schema_repo, user_id ): kg = _make_kg() ontology_config = OntologyConfig( @@ -575,7 +597,7 @@ async def test_transition_workspace_requires_edit_permission( ), ), ) - await _seed_stored_ontology(kg, kg_repo, ontology_config) + await _seed_stored_ontology(kg, kg_repo, canonical_schema_repo, ontology_config) await _grant_kg_view(authz, kg.id.value, user_id) with pytest.raises(UnauthorizedError): @@ -586,7 +608,7 @@ async def test_transition_workspace_requires_edit_permission( @pytest.mark.asyncio async def test_transition_workspace_changes_mode_and_creates_session_pointer( - self, service, authz, kg_repo, user_id + self, service, authz, kg_repo, canonical_schema_repo, user_id ): kg = _make_kg() ontology_config = OntologyConfig( @@ -599,7 +621,7 @@ async def test_transition_workspace_changes_mode_and_creates_session_pointer( ), ), ) - await _seed_stored_ontology(kg, kg_repo, ontology_config) + await _seed_stored_ontology(kg, kg_repo, canonical_schema_repo, ontology_config) await _grant_kg_edit(authz, kg.id.value, user_id) result = await service.transition_workspace_to_extraction( From 15045ec2d77d357f558cc7abbd986dc46d566a8b Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Fri, 22 May 2026 18:39:50 -0400 Subject: [PATCH 052/153] feat(data-sources): align onboarding with k-extract add-another flow (#735) Switch data-source onboarding to row-based multi-entry URL capture with Add another/Add to project actions, and add partial-success connection handling so valid sources are created even when some fail. Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/ui/experience.spec.md | 35 +- specs/ui/kg-manage-experience.spec.md | 3 +- src/dev-ui/app/pages/data-sources/index.vue | 1311 +++++------------ .../data-source-connection-wizard.test.ts | 56 +- src/dev-ui/app/tests/data-sources.test.ts | 66 +- .../app/tests/task-121-spec-alignment.test.ts | 43 +- .../app/tests/task-129-spec-alignment.test.ts | 93 +- src/dev-ui/app/utils/dataSourceWizard.ts | 111 +- 8 files changed, 588 insertions(+), 1130 deletions(-) diff --git a/specs/ui/experience.spec.md b/specs/ui/experience.spec.md index f17216337..4f35fb42c 100644 --- a/specs/ui/experience.spec.md +++ b/specs/ui/experience.spec.md @@ -67,17 +67,19 @@ The system SHALL guide users through creating a knowledge graph before adding da ### Requirement: Data Source Connection The system SHALL provide a guided flow for connecting external data sources to a knowledge graph. -#### Scenario: Adapter type selection +#### Scenario: URL-first provider detection - GIVEN a user adding a data source to a knowledge graph - WHEN the flow begins -- THEN the user selects an adapter type first (e.g., GitHub) -- AND the form adapts to show adapter-specific fields +- THEN the user can add multiple source URLs using repeated URL input rows (`Add another`) +- AND the system auto-detects the provider type from the URL (GitHub, GitLab, Jira) +- AND unsupported providers are clearly marked as coming soon without allowing completion #### Scenario: Connection configuration -- GIVEN a selected adapter type (e.g., GitHub) +- GIVEN a detected GitHub provider - WHEN the user configures the connection -- THEN they provide the minimum required fields (e.g., repository URL, access token) -- AND the system infers defaults where possible (e.g., data source name from repo name) +- THEN they provide the minimum required fields (knowledge graph, repository URL, tracked branch, and source name) +- AND the system infers defaults where possible (e.g., data source name from repo name and default branch from repository metadata) +- AND credentials are entered in a single one-time token field #### Scenario: Credential handling - GIVEN credentials provided during data source setup @@ -86,26 +88,7 @@ The system SHALL provide a guided flow for connecting external data sources to a - AND the plaintext is never persisted in the browser ### Requirement: Ontology Design -The system SHALL support an agent-assisted ontology design flow when connecting a data source. - -#### Scenario: Intent description -- GIVEN a user who has connected a data source -- WHEN the connection is saved -- THEN the user is prompted to describe (in free text) what problems or questions they want to solve with this data - -#### Scenario: Agent-proposed ontology -- GIVEN a free-text intent description and a connected data source -- WHEN the user submits their intent -- THEN the system performs a lightweight scan of the data source -- AND an AI agent explores the scanned data and proposes an ontology (node types, edge types, properties) -- AND the proposed ontology is presented to the user for review - -#### Scenario: Ontology review and approval -- GIVEN a proposed ontology -- WHEN the user reviews it -- THEN they can approve the ontology as-is -- OR iterate by editing individual types and relationships -- AND extraction begins only after the user explicitly approves +The system SHALL support an editable ontology experience for connected data sources. #### Scenario: Individual type editing - GIVEN a proposed or existing ontology diff --git a/specs/ui/kg-manage-experience.spec.md b/specs/ui/kg-manage-experience.spec.md index d91d9eb24..86871b238 100644 --- a/specs/ui/kg-manage-experience.spec.md +++ b/specs/ui/kg-manage-experience.spec.md @@ -88,7 +88,8 @@ The system SHALL preserve the established data-source operations experience whil #### Scenario: Graph-scoped data source step - GIVEN the user opens `Data Sources` from KG manage workspace - THEN the destination is pre-scoped to the selected knowledge graph -- AND existing commit cues, maintenance readiness, and diff summary behaviors remain available +- AND source onboarding and source-level commit/diff cues remain available in that scoped view +- AND graph-wide maintenance orchestration and run telemetry remain in `Manage` - AND returning to manage workspace preserves the current graph context ### Requirement: Graph Management Conversation-First Layout diff --git a/src/dev-ui/app/pages/data-sources/index.vue b/src/dev-ui/app/pages/data-sources/index.vue index 1ab0fa470..75ab47727 100644 --- a/src/dev-ui/app/pages/data-sources/index.vue +++ b/src/dev-ui/app/pages/data-sources/index.vue @@ -5,8 +5,6 @@ import { Cable, Building2, Plus, - Github, - GitBranch, ChevronRight, ChevronLeft, CheckCircle2, @@ -23,23 +21,17 @@ import { FileText, Settings, RefreshCw, - Cpu, - Coins, - DollarSign, - Clock3, } from 'lucide-vue-next' import { - ADAPTERS, - isAdapterSelectable, - canAdvanceStep1, inferNameFromRepoUrl, + validateStep1, validateStep2, buildDataSourceCreationUrl, buildDataSourceCreationBody, } from '@/utils/dataSourceWizard' +import type { DetectedAdapterId } from '@/utils/dataSourceWizard' import { validateTypeLabel, - validateIntentText, parsePropertyList, buildOntologySavePayload, } from '@/utils/ontologyWizard' @@ -95,21 +87,6 @@ interface SyncRun { created_at: string } -interface MaintenanceSchedule { - enabled: boolean - cron_expression: string - timezone_name: string - next_run_at: string | null -} - -interface MaintenanceRun { - run_id: string - triggered_at: string - outcome: 'started' | 'no-changes' | 'preflight-failed' | 'launch-failed' - message: string | null - target_data_source_ids: string[] -} - interface DataSourceItem { id: string name: string @@ -141,12 +118,20 @@ interface DataSourceDiffSummary { changed_files: DiffChangedFile[] } -interface AdapterType { +interface PendingSourceDraft { id: string - label: string - description: string - icon: typeof Github - available: boolean + url: string + detectedAdapterId: DetectedAdapterId + name: string + branch: string + nameError: string + urlError: string + branchError: string +} + +interface SourceUrlInputRow { + id: string + url: string } interface ProposedNodeType { @@ -208,59 +193,30 @@ const ACTIVE_STATUSES: SyncRun['status'][] = ['pending', 'ingesting', 'ai_extrac const { hasTenant, tenantVersion } = useTenant() -// ── Available adapters ───────────────────────────────────────────────────── - -/** Icon map: resolves the Lucide icon component for each adapter ID. */ -const ADAPTER_ICONS: Record<string, typeof Github> = { - github: Github, - gitlab: GitBranch, - jira: Cable, -} - -/** - * Adapter list consumed by the template — extends the framework-free - * `ADAPTERS` definition from `utils/dataSourceWizard.ts` with Vue icon refs. - */ -const adapters: AdapterType[] = ADAPTERS.map((a) => ({ - ...a, - icon: ADAPTER_ICONS[a.id] ?? Cable, -})) - // ── Wizard state ─────────────────────────────────────────────────────────── const wizardOpen = ref(false) const wizardStep = ref(1) -const WIZARD_STEPS = 4 +const WIZARD_STEPS = 2 -// Step 1 – Adapter selection -const selectedAdapterId = ref('') +// Step 1 – URL-first onboarding const selectedKnowledgeGraphId = ref('') +const sourceUrlInputs = ref<SourceUrlInputRow[]>([{ id: 'source-1', url: '' }]) +const sourceUrlError = ref('') +const providerError = ref('') +const pendingSources = ref<PendingSourceDraft[]>([]) +const detectingSourceDetails = ref(false) const knowledgeGraphs = ref<Array<{ id: string; name: string }>>([]) const loadingKgs = ref(false) -// Step 4 – Approval state -const approvingOntology = ref(false) +// Step 2 – Approval state +const connectingDataSource = ref(false) // Step 2 – Connection configuration -const connName = ref('') -const connRepoUrl = ref('') const connToken = ref('') const showToken = ref(false) -const connNameError = ref('') -const connRepoUrlError = ref('') const connTokenError = ref('') -// Step 3 – Intent description -const intentText = ref('') -const intentError = ref('') - -// Step 4 – Proposed ontology -const scanningOntology = ref(false) -const ontologyReady = ref(false) - -const proposedNodes = ref<ProposedNodeType[]>([]) -const proposedEdges = ref<ProposedEdgeType[]>([]) - // ── GitHub ontology proposal ─────────────────────────────────────────────── const GITHUB_PROPOSAL_NODES: Omit<ProposedNodeType, 'editing' | 'editLabel' | 'editDescription' | 'editRequired' | 'editOptional'>[] = [ @@ -333,8 +289,6 @@ const GITHUB_PROPOSAL_EDGES: Omit<ProposedEdgeType, 'editing' | 'editLabel' | 'e // ── Helpers ──────────────────────────────────────────────────────────────── -const selectedAdapter = computed(() => adapters.find((a) => a.id === selectedAdapterId.value)) - function toProposedNode(n: typeof GITHUB_PROPOSAL_NODES[0]): ProposedNodeType { return { ...n, @@ -357,15 +311,42 @@ function toProposedEdge(e: typeof GITHUB_PROPOSAL_EDGES[0]): ProposedEdgeType { } } -// ── Infer data source name from repo URL ─────────────────────────────────── +// ── URL detection & inference ─────────────────────────────────────────────── -watch(connRepoUrl, (url) => { - // Only infer when the name field is still empty (do not overwrite user edits). - if (!url.trim() || connName.value.trim()) return - const inferred = inferNameFromRepoUrl(url) - if (inferred) { - connName.value = inferred +watch(sourceUrlInputs, () => { + sourceUrlError.value = '' + providerError.value = '' +}, { deep: true }) + +function addSourceInput(initialUrl = '') { + sourceUrlInputs.value.push({ + id: `source-${Date.now()}-${sourceUrlInputs.value.length + 1}`, + url: initialUrl, + }) +} + +function removeSourceInput(id: string) { + if (sourceUrlInputs.value.length === 1) { + sourceUrlInputs.value[0]!.url = '' + return } + sourceUrlInputs.value = sourceUrlInputs.value.filter((entry) => entry.id !== id) +} + +const sourceUrlPreviews = computed(() => { + const seen = new Set<string>() + const previews: Array<{ id: string; url: string; detectedAdapterId: DetectedAdapterId }> = [] + for (const row of sourceUrlInputs.value) { + const url = row.url.trim() + if (!url || seen.has(url)) continue + seen.add(url) + previews.push({ + id: row.id, + url, + detectedAdapterId: detectAdapterFromUrl(url), + }) + } + return previews }) // ── Wizard navigation ────────────────────────────────────────────────────── @@ -381,183 +362,133 @@ watch(connRepoUrl, (url) => { */ function openWizard(preselectedKgId?: string) { wizardStep.value = 1 - selectedAdapterId.value = '' // Pre-select the knowledge graph if one was provided (e.g. from ?kg_id= query param). selectedKnowledgeGraphId.value = preselectedKgId ?? '' - approvingOntology.value = false - connName.value = '' - connRepoUrl.value = '' + sourceUrlInputs.value = [{ id: 'source-1', url: '' }] + sourceUrlError.value = '' + providerError.value = '' + pendingSources.value = [] + connectingDataSource.value = false + detectingSourceDetails.value = false connToken.value = '' showToken.value = false - connNameError.value = '' - connRepoUrlError.value = '' connTokenError.value = '' - intentText.value = '' - intentError.value = '' - scanningOntology.value = false - ontologyReady.value = false - proposedNodes.value = [] - proposedEdges.value = [] wizardOpen.value = true loadKnowledgeGraphs() } -function selectAdapter(id: string) { - // Guard: unavailable adapters cannot be selected. - if (!isAdapterSelectable(id)) return - selectedAdapterId.value = id +function providerLabel(adapterId: DetectedAdapterId): string { + if (adapterId === 'github') return 'GitHub' + if (adapterId === 'gitlab') return 'GitLab' + if (adapterId === 'jira') return 'Jira' + return 'Unknown' } -function nextStep() { - if (wizardStep.value === 1) { - if (!canAdvanceStep1(selectedAdapterId.value, selectedKnowledgeGraphId.value)) return - wizardStep.value = 2 - return - } - - if (wizardStep.value === 2) { - const validation = validateStep2({ - connName: connName.value, - connRepoUrl: connRepoUrl.value, - }) - connNameError.value = validation.connNameError - connRepoUrlError.value = validation.connRepoUrlError - connTokenError.value = validation.connTokenError - - if (!validation.valid) return - wizardStep.value = 3 - return - } - - if (wizardStep.value === 3) { - const intentValidation = validateIntentText(intentText.value) - intentError.value = intentValidation.error - if (!intentValidation.valid) return - wizardStep.value = 4 - beginOntologyProposal() - return +async function detectGithubSourceDetails(entry: PendingSourceDraft) { + if (entry.detectedAdapterId !== 'github') return + try { + const parsed = new URL(entry.url) + const [owner, repoRaw] = parsed.pathname.split('/').filter(Boolean) + const repo = repoRaw?.replace(/\.git$/, '') + if (!owner || !repo) return + const response = await fetch(`https://api.github.com/repos/${owner}/${repo}`) + if (!response.ok) return + const payload = await response.json() as { default_branch?: string; name?: string } + if (!entry.branch.trim() && payload.default_branch) { + entry.branch = payload.default_branch + } + if (!entry.name.trim() && payload.name) { + entry.name = payload.name + } + } catch { + // Best effort only. } } -function prevStep() { - if (wizardStep.value > 1) wizardStep.value-- +async function detectGithubSourceDetailsBatch() { + detectingSourceDetails.value = true + try { + for (const entry of pendingSources.value) { + await detectGithubSourceDetails(entry) + } + } catch { + // Best effort only; leave user-entered values untouched. + } finally { + detectingSourceDetails.value = false + } } -// ── Ontology proposal (simulated scan + AI proposal) ────────────────────── - -async function beginOntologyProposal() { - scanningOntology.value = true - ontologyReady.value = false - proposedNodes.value = [] - proposedEdges.value = [] - - // Simulate a lightweight scan of the data source (1.5s) followed by AI proposal - await new Promise<void>((resolve) => setTimeout(resolve, 1500)) - - proposedNodes.value = GITHUB_PROPOSAL_NODES.map(toProposedNode) - proposedEdges.value = GITHUB_PROPOSAL_EDGES.map(toProposedEdge) - scanningOntology.value = false - ontologyReady.value = true -} +async function nextStep() { + if (wizardStep.value === 1) { + if (!selectedKnowledgeGraphId.value.trim()) { + providerError.value = 'Select a knowledge graph to continue.' + return + } + const parsedEntries = sourceUrlPreviews.value + if (parsedEntries.length === 0) { + sourceUrlError.value = 'Provide at least one source URL.' + return + } -// ── Per-type inline editing ──────────────────────────────────────────────── + const drafts: PendingSourceDraft[] = parsedEntries.map((entry, index) => ({ + id: `src-${index}-${entry.url}`, + url: entry.url, + detectedAdapterId: entry.detectedAdapterId, + name: inferNameFromRepoUrl(entry.url) ?? '', + branch: '', + nameError: '', + urlError: '', + branchError: '', + })) + + let hasError = false + const providerIssues: string[] = [] + for (const entry of drafts) { + const validation = validateStep1({ + selectedKnowledgeGraphId: selectedKnowledgeGraphId.value, + sourceUrl: entry.url, + detectedAdapterId: entry.detectedAdapterId, + }) + entry.urlError = validation.sourceUrlError + if (validation.providerError) { + providerIssues.push(`${entry.url}: ${validation.providerError}`) + } + if (!validation.valid) hasError = true + } -function startEditNode(index: number) { - const n = proposedNodes.value[index] - n.editLabel = n.label - n.editDescription = n.description - n.editRequired = n.required_properties.join(', ') - n.editOptional = n.optional_properties.join(', ') - n.editing = true -} + pendingSources.value = drafts + sourceUrlError.value = hasError && drafts.some((d) => !!d.urlError) + ? 'One or more URLs are invalid.' + : '' + providerError.value = providerIssues.join(' | ') + if (hasError) return -function saveEditNode(index: number) { - const n = proposedNodes.value[index] - const validation = validateTypeLabel(proposedNodes.value, n.editLabel, index) - if (!validation.valid) { - n.editError = validation.error + await detectGithubSourceDetailsBatch() + wizardStep.value = 2 return } - n.editError = '' - n.label = n.editLabel.trim() - n.description = n.editDescription - n.required_properties = parsePropertyList(n.editRequired) - n.optional_properties = parsePropertyList(n.editOptional) - n.editing = false -} - -function cancelEditNode(index: number) { - proposedNodes.value[index].editing = false - proposedNodes.value[index].editError = '' -} - -function removeNode(index: number) { - proposedNodes.value.splice(index, 1) -} - -function startEditEdge(index: number) { - const e = proposedEdges.value[index] - e.editLabel = e.label - e.editDescription = e.description - e.editRequired = e.required_properties.join(', ') - e.editOptional = e.optional_properties.join(', ') - e.editing = true -} -function saveEditEdge(index: number) { - const e = proposedEdges.value[index] - const validation = validateTypeLabel(proposedEdges.value, e.editLabel, index) - if (!validation.valid) { - e.editError = validation.error + if (wizardStep.value === 2) { + let hasError = false + for (const entry of pendingSources.value) { + const validation = validateStep2({ + connName: entry.name, + connRepoUrl: entry.url, + }) + entry.nameError = validation.connNameError + entry.urlError = validation.connRepoUrlError + entry.branchError = !entry.branch.trim() ? 'Tracked branch is required.' : '' + if (!validation.valid || entry.branchError) hasError = true + } + connTokenError.value = '' + if (hasError) return + await approveOntology() return } - e.editError = '' - e.label = e.editLabel.trim() - e.description = e.editDescription - e.required_properties = parsePropertyList(e.editRequired) - e.optional_properties = parsePropertyList(e.editOptional) - e.editing = false -} - -function cancelEditEdge(index: number) { - proposedEdges.value[index].editing = false - proposedEdges.value[index].editError = '' -} - -function removeEdge(index: number) { - proposedEdges.value.splice(index, 1) } -// ── Add new types (wizard) ───────────────────────────────────────────────── - -function addNode() { - proposedNodes.value.push({ - label: '', - description: '', - required_properties: [], - optional_properties: [], - editing: true, - editLabel: '', - editDescription: '', - editRequired: '', - editOptional: '', - }) -} - -function addEdge() { - proposedEdges.value.push({ - label: '', - description: '', - from: '', - to: '', - required_properties: [], - optional_properties: [], - editing: true, - editLabel: '', - editDescription: '', - editRequired: '', - editOptional: '', - }) +function prevStep() { + if (wizardStep.value > 1) wizardStep.value-- } // ── Knowledge graph loader ───────────────────────────────────────────────── @@ -605,32 +536,63 @@ async function approveOntology() { toast.error('Please select a knowledge graph first') return } + if (pendingSources.value.length === 0) { + toast.error('Add at least one source URL first') + return + } - approvingOntology.value = true + connectingDataSource.value = true try { - await createDataSource({ - kg_id: selectedKnowledgeGraphId.value, - name: connName.value, - adapter_type: selectedAdapterId.value, - connection_config: { - repo_url: connRepoUrl.value, - }, - credentials: connToken.value ? { access_token: connToken.value } : undefined, - }) - // Clear the plaintext token immediately after the API call succeeds so - // that it does not linger in Vue's reactive state (readable via DevTools). - connToken.value = '' - toast.success('Data source connected', { - description: `${connName.value} has been connected and extraction will begin shortly.`, - }) - wizardOpen.value = false - await loadDataSources() - } catch (err: unknown) { - const msg = err instanceof Error ? err.message : 'Failed to connect data source' - toast.error('Connection failed', { description: msg }) - // Token is intentionally NOT cleared on failure so the user can retry. + const failedEntries: Array<{ id: string; message: string }> = [] + let successCount = 0 + for (const entry of pendingSources.value) { + try { + await createDataSource({ + kg_id: selectedKnowledgeGraphId.value, + name: entry.name, + adapter_type: 'github', + connection_config: { + repo_url: entry.url, + branch: entry.branch, + }, + credentials: connToken.value ? { access_token: connToken.value } : undefined, + }) + successCount += 1 + } catch (err: unknown) { + const message = err instanceof Error ? err.message : 'Failed to connect source' + failedEntries.push({ id: entry.id, message }) + } + } + + if (successCount > 0) { + await loadDataSources() + } + + if (failedEntries.length === 0) { + // Clear the plaintext token immediately after the API call succeeds so + // that it does not linger in Vue's reactive state (readable via DevTools). + connToken.value = '' + toast.success('Data sources connected', { + description: `${successCount} source(s) connected successfully.`, + }) + wizardOpen.value = false + return + } + + pendingSources.value = pendingSources.value.filter((entry) => + failedEntries.some((failed) => failed.id === entry.id), + ) + const firstError = failedEntries[0]?.message ?? 'Some sources failed to connect' + if (successCount > 0) { + toast.warning('Some sources were not connected', { + description: `${successCount} succeeded, ${failedEntries.length} failed. ${firstError}`, + }) + } else { + toast.error('Connection failed', { description: firstError }) + } + // Token is intentionally NOT cleared on partial/full failure so the user can retry. } finally { - approvingOntology.value = false + connectingDataSource.value = false } } @@ -713,16 +675,6 @@ async function loadDataSources() { '/management/knowledge-graphs' ) const kgs = kgResult.knowledge_graphs ?? [] - maintenanceKnowledgeGraphs.value = kgs - if (!selectedMaintenanceKnowledgeGraphId.value && kgs.length > 0) { - selectedMaintenanceKnowledgeGraphId.value = kgs[0].id - } - if ( - selectedMaintenanceKnowledgeGraphId.value - && !kgs.some(kg => kg.id === selectedMaintenanceKnowledgeGraphId.value) - ) { - selectedMaintenanceKnowledgeGraphId.value = kgs[0]?.id ?? '' - } const all: DataSourceItem[] = [] for (const kg of kgs) { try { @@ -754,12 +706,8 @@ async function loadDataSources() { } } dataSources.value = all - await loadMaintenanceOrchestration() } catch { dataSources.value = [] - maintenanceKnowledgeGraphs.value = [] - selectedMaintenanceKnowledgeGraphId.value = '' - maintenanceRuns.value = [] } finally { loadingDataSources.value = false } @@ -778,150 +726,6 @@ const hasActiveSyncs = computed(() => }), ) -const telemetryRows = computed(() => - dataSources.value.flatMap((ds) => - (ds.sync_runs ?? []).map(run => ({ ...run, data_source_name: ds.name })), - ), -) - -const telemetryStatusBuckets = computed(() => { - const buckets = { - pending: 0, - ingesting: 0, - ai_extracting: 0, - applying: 0, - completed: 0, - failed: 0, - } - for (const row of telemetryRows.value) { - buckets[row.status] += 1 - } - return buckets -}) - -const telemetryRecentJobs = computed(() => - [...telemetryRows.value] - .sort((a, b) => new Date(b.started_at).getTime() - new Date(a.started_at).getTime()) - .slice(0, 8), -) - -const telemetryActiveWorkers = computed(() => - telemetryRows.value.filter(row => ACTIVE_STATUSES.includes(row.status)).length, -) - -const telemetryTokenTotal = computed(() => - telemetryRows.value.reduce((sum, row) => sum + (row.token_usage_total ?? 0), 0), -) - -const telemetryCostTotal = computed(() => - telemetryRows.value.reduce((sum, row) => sum + (row.cost_total_usd ?? 0), 0), -) - -const telemetryCostTrend = computed(() => { - const now = Date.now() - const oneDayMs = 24 * 60 * 60 * 1000 - let current = 0 - let previous = 0 - for (const row of telemetryRows.value) { - const eventMs = new Date(row.completed_at ?? row.started_at).getTime() - if (eventMs >= now - oneDayMs) current += row.cost_total_usd ?? 0 - else if (eventMs >= now - oneDayMs * 2) previous += row.cost_total_usd ?? 0 - } - const delta = current - previous - return { current, previous, delta } -}) - -const maintenanceKnowledgeGraphs = ref<Array<{ id: string; name: string }>>([]) -const selectedMaintenanceKnowledgeGraphId = ref('') -const maintenanceSchedule = ref<MaintenanceSchedule>({ - enabled: false, - cron_expression: '0 2 * * *', - timezone_name: 'UTC', - next_run_at: null, -}) -const maintenanceRuns = ref<MaintenanceRun[]>([]) -const maintenanceLoading = ref(false) -const maintenanceSaving = ref(false) -const maintenanceTriggering = ref(false) - -function maintenanceOutcomeTone( - outcome: MaintenanceRun['outcome'], -): 'default' | 'secondary' | 'destructive' { - if (outcome === 'started') return 'default' - if (outcome === 'launch-failed' || outcome === 'preflight-failed') return 'destructive' - return 'secondary' -} - -async function loadMaintenanceOrchestration() { - if (!selectedMaintenanceKnowledgeGraphId.value) { - maintenanceRuns.value = [] - return - } - maintenanceLoading.value = true - try { - const { apiFetch } = useApiClient() - const [schedule, runs] = await Promise.all([ - apiFetch<MaintenanceSchedule>( - `/management/knowledge-graphs/${selectedMaintenanceKnowledgeGraphId.value}/maintenance-schedule`, - ), - apiFetch<{ runs: MaintenanceRun[] }>( - `/management/knowledge-graphs/${selectedMaintenanceKnowledgeGraphId.value}/maintenance-runs`, - ), - ]) - maintenanceSchedule.value = schedule - maintenanceRuns.value = runs.runs ?? [] - } catch { - maintenanceRuns.value = [] - } finally { - maintenanceLoading.value = false - } -} - -async function saveMaintenanceSchedule() { - if (!selectedMaintenanceKnowledgeGraphId.value) return - maintenanceSaving.value = true - try { - const { apiFetch } = useApiClient() - const schedule = await apiFetch<MaintenanceSchedule>( - `/management/knowledge-graphs/${selectedMaintenanceKnowledgeGraphId.value}/maintenance-schedule`, - { - method: 'PUT', - body: { - enabled: maintenanceSchedule.value.enabled, - cron_expression: maintenanceSchedule.value.cron_expression, - timezone_name: maintenanceSchedule.value.timezone_name, - }, - }, - ) - maintenanceSchedule.value = schedule - toast.success('Maintenance schedule saved') - } catch (err) { - const msg = err instanceof Error ? err.message : 'Failed to save maintenance schedule' - toast.error('Failed to save maintenance schedule', { description: msg }) - } finally { - maintenanceSaving.value = false - } -} - -async function triggerMaintenanceRun() { - if (!selectedMaintenanceKnowledgeGraphId.value) return - maintenanceTriggering.value = true - try { - const { apiFetch } = useApiClient() - await apiFetch( - `/management/knowledge-graphs/${selectedMaintenanceKnowledgeGraphId.value}/maintenance-runs/trigger`, - { method: 'POST' }, - ) - await loadMaintenanceOrchestration() - toast.success('Maintenance orchestration completed') - } catch (err) { - const msg = err instanceof Error ? err.message : 'Failed to trigger maintenance' - toast.error('Failed to trigger maintenance', { description: msg }) - } finally { - maintenanceTriggering.value = false - } -} - /** Holds the active setInterval handle, or null when not polling. */ const pollInterval = ref<ReturnType<typeof setInterval> | null>(null) @@ -977,20 +781,14 @@ onMounted(async () => { // without auto-opening the creation wizard (see buildDataSourcesStepUrl). const preselectedKgId = route.query.kg_id as string | undefined const fromManage = route.query.from === 'manage' - const focusMaintain = route.query.focus === 'maintain' if (fromManage && preselectedKgId) { scopedKnowledgeGraphId.value = preselectedKgId manageReturnKgId.value = preselectedKgId - selectedMaintenanceKnowledgeGraphId.value = preselectedKgId } else if (preselectedKgId) { await nextTick() openWizard(preselectedKgId) } - - if (focusMaintain && preselectedKgId) { - selectedMaintenanceKnowledgeGraphId.value = preselectedKgId - } }) onUnmounted(() => { @@ -1005,10 +803,6 @@ watch(tenantVersion, () => { loadDataSources() }) -watch(selectedMaintenanceKnowledgeGraphId, () => { - loadMaintenanceOrchestration() -}) - async function triggerSync(dsId: string) { try { const { apiFetch } = useApiClient() @@ -1365,180 +1159,14 @@ async function handleDeleteDs() { </div> <template v-else> - <!-- Extraction operations telemetry dashboard --> - <div class="grid gap-3 md:grid-cols-4"> - <Card> - <CardHeader class="pb-2"> - <CardDescription class="flex items-center gap-1.5 text-[11px]"> - <Cpu class="size-3.5" /> - Active workers - </CardDescription> - <CardTitle class="text-xl">{{ telemetryActiveWorkers }}</CardTitle> - </CardHeader> - <CardContent class="text-[11px] text-muted-foreground"> - Pending {{ telemetryStatusBuckets.pending }} / Ingesting {{ telemetryStatusBuckets.ingesting }} / Extracting {{ telemetryStatusBuckets.ai_extracting }} / Applying {{ telemetryStatusBuckets.applying }} - </CardContent> - </Card> - <Card> - <CardHeader class="pb-2"> - <CardDescription class="flex items-center gap-1.5 text-[11px]"> - <Clock3 class="size-3.5" /> - Recent jobs tracked - </CardDescription> - <CardTitle class="text-xl">{{ telemetryRows.length }}</CardTitle> - </CardHeader> - <CardContent class="text-[11px] text-muted-foreground"> - Completed {{ telemetryStatusBuckets.completed }} / Failed {{ telemetryStatusBuckets.failed }} - </CardContent> - </Card> - <Card> - <CardHeader class="pb-2"> - <CardDescription class="flex items-center gap-1.5 text-[11px]"> - <Coins class="size-3.5" /> - Total token usage - </CardDescription> - <CardTitle class="text-xl">{{ telemetryTokenTotal.toLocaleString() }}</CardTitle> - </CardHeader> - <CardContent class="text-[11px] text-muted-foreground"> - Aggregated from sync-run mutation metadata. - </CardContent> - </Card> - <Card> - <CardHeader class="pb-2"> - <CardDescription class="flex items-center gap-1.5 text-[11px]"> - <DollarSign class="size-3.5" /> - Estimated cost trend - </CardDescription> - <CardTitle class="text-xl">${{ telemetryCostTrend.current.toFixed(2) }}</CardTitle> - </CardHeader> - <CardContent class="text-[11px]" :class="telemetryCostTrend.delta <= 0 ? 'text-emerald-600 dark:text-emerald-400' : 'text-amber-600 dark:text-amber-400'"> - {{ telemetryCostTrend.delta <= 0 ? 'Down' : 'Up' }} {{ Math.abs(telemetryCostTrend.delta).toFixed(2) }} vs previous 24h - </CardContent> - </Card> - </div> - <Card> <CardHeader class="pb-2"> - <CardTitle class="text-sm">Recent job events</CardTitle> - <CardDescription class="text-xs">Auto-refreshes while active runs are in progress.</CardDescription> - </CardHeader> - <CardContent> - <div v-if="telemetryRecentJobs.length === 0" class="text-xs text-muted-foreground"> - No sync jobs yet. - </div> - <div v-else class="space-y-1.5"> - <div v-for="job in telemetryRecentJobs" :key="job.id" class="flex items-center justify-between rounded border px-2 py-1.5 text-xs"> - <div class="min-w-0"> - <p class="truncate font-medium">{{ job.data_source_name }}</p> - <p class="truncate text-muted-foreground">{{ new Date(job.started_at).toLocaleString() }}</p> - </div> - <div class="flex items-center gap-2"> - <SyncPhaseIndicator :status="job.status" /> - <span class="font-mono text-muted-foreground">{{ job.token_usage_total ?? 0 }} tk</span> - <span class="font-mono text-muted-foreground">${{ (job.cost_total_usd ?? 0).toFixed(2) }}</span> - </div> - </div> - </div> - </CardContent> - </Card> - - <Card> - <CardHeader class="pb-2"> - <CardTitle class="text-sm">Scheduled maintenance orchestration</CardTitle> + <CardTitle class="text-sm">Data source catalog</CardTitle> <CardDescription class="text-xs"> - Configure one schedule per knowledge graph and review launch outcomes. + This page is optimized for source onboarding and source-level actions. + Graph-wide run telemetry and maintenance controls live in the manage workspace. </CardDescription> </CardHeader> - <CardContent class="space-y-3"> - <div class="grid gap-3 md:grid-cols-4"> - <div class="space-y-1"> - <Label class="text-xs">Knowledge graph</Label> - <Select v-model="selectedMaintenanceKnowledgeGraphId"> - <SelectTrigger class="h-8"> - <SelectValue placeholder="Select a knowledge graph" /> - </SelectTrigger> - <SelectContent> - <SelectItem - v-for="kg in maintenanceKnowledgeGraphs" - :key="kg.id" - :value="kg.id" - > - {{ kg.name }} - </SelectItem> - </SelectContent> - </Select> - </div> - <div class="space-y-1"> - <Label class="text-xs">Cron</Label> - <Input v-model="maintenanceSchedule.cron_expression" class="h-8 font-mono text-xs" /> - </div> - <div class="space-y-1"> - <Label class="text-xs">Timezone</Label> - <Input v-model="maintenanceSchedule.timezone_name" class="h-8 text-xs" /> - </div> - <div class="flex items-end gap-2"> - <Button - size="sm" - variant="secondary" - :disabled="!selectedMaintenanceKnowledgeGraphId" - @click="maintenanceSchedule.enabled = !maintenanceSchedule.enabled" - > - {{ maintenanceSchedule.enabled ? 'Disable' : 'Enable' }} - </Button> - <Button - size="sm" - variant="outline" - :disabled="maintenanceSaving || !selectedMaintenanceKnowledgeGraphId" - @click="saveMaintenanceSchedule" - > - <Loader2 v-if="maintenanceSaving" class="mr-1.5 size-3.5 animate-spin" /> - Save schedule - </Button> - <Button - size="sm" - :disabled="maintenanceTriggering || !selectedMaintenanceKnowledgeGraphId" - @click="triggerMaintenanceRun" - > - <Loader2 v-if="maintenanceTriggering" class="mr-1.5 size-3.5 animate-spin" /> - Run now - </Button> - </div> - </div> - <div class="flex items-center justify-between rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground"> - Next run: - <span class="font-medium text-foreground"> - {{ maintenanceSchedule.next_run_at ? new Date(maintenanceSchedule.next_run_at).toLocaleString() : 'Not scheduled' }} - </span> - </p> - <Badge :variant="maintenanceSchedule.enabled ? 'default' : 'secondary'"> - {{ maintenanceSchedule.enabled ? 'Enabled' : 'Disabled' }} - </Badge> - </div> - <div v-if="maintenanceLoading" class="flex items-center gap-2 text-xs text-muted-foreground"> - <Loader2 class="size-3.5 animate-spin" /> - Loading maintenance run history... - </div> - <div v-else-if="maintenanceRuns.length === 0" class="text-xs text-muted-foreground"> - No maintenance orchestration runs recorded yet. - </div> - <div v-else class="space-y-1.5"> - <div - v-for="run in maintenanceRuns" - :key="run.run_id" - class="flex items-center justify-between rounded border px-2 py-1.5 text-xs" - > - <div> - <p class="font-medium">{{ new Date(run.triggered_at).toLocaleString() }}</p> - <p class="text-muted-foreground">{{ run.message ?? 'No message provided' }}</p> - </div> - <div class="flex items-center gap-2"> - <Badge :variant="maintenanceOutcomeTone(run.outcome)">{{ run.outcome }}</Badge> - <span class="text-muted-foreground">{{ run.target_data_source_ids.length }} sources</span> - </div> - </div> - </div> - </CardContent> </Card> <!-- Empty state (no data sources yet) --> @@ -1772,11 +1400,13 @@ async function handleDeleteDs() { </template> </div> - <!-- ── Step 1: Select Adapter ── --> + <!-- ── Step 1: Bulk URL entry ── --> <div v-if="wizardStep === 1" class="space-y-4"> <div> - <h3 class="text-sm font-semibold">Select an adapter type</h3> - <p class="text-xs text-muted-foreground">Choose the system you want to import data from.</p> + <h3 class="text-sm font-semibold">Paste your source URLs</h3> + <p class="text-xs text-muted-foreground"> + Add one source at a time with "Add another". We auto-detect provider and prepare all supported sources at once. + </p> </div> <!-- Knowledge graph selection --> @@ -1798,44 +1428,59 @@ async function handleDeleteDs() { </p> </div> - <div class="grid gap-3 sm:grid-cols-2"> - <button - v-for="adapter in adapters" - :key="adapter.id" - :disabled="!adapter.available" - class="group relative rounded-lg border p-4 text-left transition-colors focus-visible:outline-none focus-visible:ring-[3px] focus-visible:ring-ring/50" - :class="[ - adapter.available - ? selectedAdapterId === adapter.id - ? 'border-primary bg-primary/5' - : 'hover:border-primary/50 hover:bg-accent' - : 'cursor-not-allowed opacity-50', - ]" - @click="adapter.available && selectAdapter(adapter.id)" + <div class="space-y-2"> + <Label>Data source URLs <span class="text-destructive">*</span></Label> + <div + v-for="(row, idx) in sourceUrlInputs" + :key="row.id" + class="rounded-md border p-2" > - <div class="flex items-start gap-3"> - <div class="rounded-md bg-muted p-2 shrink-0"> - <component :is="adapter.icon" class="size-5 text-muted-foreground" /> - </div> - <div class="flex-1 min-w-0"> - <div class="flex items-center gap-2"> - <p class="text-sm font-medium">{{ adapter.label }}</p> - <Badge v-if="!adapter.available" variant="outline" class="text-[10px] px-1.5 py-0"> - Soon - </Badge> - <CheckCircle2 - v-if="selectedAdapterId === adapter.id" - class="ml-auto size-4 text-primary shrink-0" - /> - </div> - <p class="text-xs text-muted-foreground mt-0.5">{{ adapter.description }}</p> - </div> + <div class="flex items-start gap-2"> + <Input + v-model="row.url" + :placeholder="`https://github.com/owner/repository-${idx + 1}`" + /> + <Button + v-if="sourceUrlInputs.length > 1" + type="button" + variant="ghost" + size="sm" + class="h-9 shrink-0" + @click="removeSourceInput(row.id)" + > + Remove + </Button> + </div> + <div v-if="row.url.trim()" class="mt-2 flex items-center gap-2 text-xs text-muted-foreground"> + <span>Detected:</span> + <Badge :variant="detectAdapterFromUrl(row.url) === 'github' ? 'default' : 'outline'"> + {{ providerLabel(detectAdapterFromUrl(row.url)) }} + </Badge> </div> - </button> + </div> + <div class="flex items-center gap-2"> + <Button type="button" variant="outline" size="sm" @click="addSourceInput()"> + Add another + </Button> + </div> + <p v-if="sourceUrlError" class="text-xs text-destructive">{{ sourceUrlError }}</p> + <p + v-if="providerError" + class="text-xs" + :class="providerError.includes('Unknown') ? 'text-destructive' : 'text-amber-600 dark:text-amber-400'" + > + {{ providerError }} + </p> + <p v-else class="text-xs text-muted-foreground"> + GitHub is fully supported now. GitLab and Jira are detected and shown as coming soon. + </p> </div> <DialogFooter class="pt-2"> - <Button :disabled="!selectedAdapterId" @click="nextStep"> + <Button + :disabled="!selectedKnowledgeGraphId || sourceUrlInputs.every((entry) => !entry.url.trim())" + @click="nextStep" + > Continue <ChevronRight class="ml-1 size-4" /> </Button> @@ -1845,76 +1490,75 @@ async function handleDeleteDs() { <!-- ── Step 2: Connection Configuration ── --> <div v-else-if="wizardStep === 2" class="space-y-5"> <div> - <h3 class="text-sm font-semibold">Configure connection</h3> + <h3 class="text-sm font-semibold">Confirm connection details</h3> <p class="text-xs text-muted-foreground"> - Provide the details to connect your - <span class="font-medium">{{ selectedAdapter?.label }}</span> repository. + Review each detected source, adjust inferred name/branch if needed, then connect them all at once. </p> </div> - <!-- GitHub-specific fields --> - <div v-if="selectedAdapterId === 'github'" class="space-y-4"> - <div class="space-y-1.5"> - <Label for="ds-repo-url"> - Repository URL <span class="text-destructive">*</span> - </Label> - <Input - id="ds-repo-url" - v-model="connRepoUrl" - placeholder="https://github.com/owner/repository" - @input="connRepoUrlError = ''" - /> - <p v-if="connRepoUrlError" class="text-xs text-destructive">{{ connRepoUrlError }}</p> - <p v-else class="text-xs text-muted-foreground"> - The full HTTPS URL of the GitHub repository to index. - </p> - </div> - - <div class="space-y-1.5"> - <Label for="ds-token"> - Access Token <span class="text-destructive">*</span> - </Label> - <div class="relative"> - <Input - id="ds-token" - v-model="connToken" - :type="showToken ? 'text' : 'password'" - placeholder="ghp_••••••••••••••••••••••••••••••••••••" - class="pr-10" - @input="connTokenError = ''" - /> - <Button - variant="ghost" - size="icon" - class="absolute right-1 top-1/2 size-7 -translate-y-1/2 text-muted-foreground" - type="button" - @click="showToken = !showToken" - > - <Eye v-if="!showToken" class="size-3.5" /> - <EyeOff v-else class="size-3.5" /> - </Button> + <div class="space-y-3"> + <div + v-for="entry in pendingSources" + :key="entry.id" + class="space-y-2 rounded-md border p-3" + > + <div class="flex items-center justify-between gap-2"> + <p class="text-xs font-mono break-all">{{ entry.url }}</p> + <Badge variant="secondary">{{ providerLabel(entry.detectedAdapterId) }}</Badge> </div> - <p v-if="connTokenError" class="text-xs text-destructive">{{ connTokenError }}</p> - <p v-else class="text-xs text-muted-foreground"> - A GitHub personal access token with <code class="rounded bg-muted px-0.5">read:repo</code> scope. - </p> + <div class="grid gap-3 md:grid-cols-2"> + <div class="space-y-1.5"> + <Label>Data Source Name <span class="text-destructive">*</span></Label> + <Input + v-model="entry.name" + placeholder="e.g. my-repository" + @input="entry.nameError = ''" + /> + <p v-if="entry.nameError" class="text-xs text-destructive">{{ entry.nameError }}</p> + </div> + <div class="space-y-1.5"> + <Label>Tracked Branch <span class="text-destructive">*</span></Label> + <Input + v-model="entry.branch" + placeholder="main" + @input="entry.branchError = ''" + /> + <p v-if="entry.branchError" class="text-xs text-destructive">{{ entry.branchError }}</p> + <p v-else class="text-xs text-muted-foreground">Default branch is auto-detected when available.</p> + </div> + </div> + <p v-if="entry.urlError" class="text-xs text-destructive">{{ entry.urlError }}</p> </div> + </div> - <div class="space-y-1.5"> - <Label for="ds-name"> - Data Source Name <span class="text-destructive">*</span> - </Label> + <div class="space-y-1.5"> + <Label for="ds-token"> + Access Token (optional) + </Label> + <div class="relative"> <Input - id="ds-name" - v-model="connName" - placeholder="e.g. my-repository" - @input="connNameError = ''" + id="ds-token" + v-model="connToken" + :type="showToken ? 'text' : 'password'" + placeholder="ghp_••••••••••••••••••••••••••••••••••••" + class="pr-10" + @input="connTokenError = ''" /> - <p v-if="connNameError" class="text-xs text-destructive">{{ connNameError }}</p> - <p v-else class="text-xs text-muted-foreground"> - Auto-inferred from the repository URL. You can rename it here. - </p> + <Button + variant="ghost" + size="icon" + class="absolute right-1 top-1/2 size-7 -translate-y-1/2 text-muted-foreground" + type="button" + @click="showToken = !showToken" + > + <Eye v-if="!showToken" class="size-3.5" /> + <EyeOff v-else class="size-3.5" /> + </Button> </div> + <p v-if="connTokenError" class="text-xs text-destructive">{{ connTokenError }}</p> + <p v-else class="text-xs text-muted-foreground"> + A GitHub personal access token with <code class="rounded bg-muted px-0.5">read:repo</code> scope. + </p> </div> <!-- Credential security note --> @@ -1931,306 +1575,13 @@ async function handleDeleteDs() { <ChevronLeft class="mr-1 size-4" /> Back </Button> - <Button @click="nextStep"> - Continue - <ChevronRight class="ml-1 size-4" /> + <Button :disabled="connectingDataSource || detectingSourceDetails" @click="nextStep"> + <Loader2 v-if="connectingDataSource || detectingSourceDetails" class="mr-1 size-4 animate-spin" /> + Add to project </Button> </DialogFooter> </div> - <!-- ── Step 3: Intent Description ── --> - <div v-else-if="wizardStep === 3" class="space-y-5"> - <div> - <h3 class="text-sm font-semibold">Describe your intent</h3> - <p class="text-xs text-muted-foreground"> - Tell the AI agent what problems or questions you want to solve with this data. - This shapes the proposed knowledge graph ontology. - </p> - </div> - - <div class="space-y-1.5"> - <Label for="intent-text">What do you want to learn from this data?</Label> - <textarea - id="intent-text" - v-model="intentText" - placeholder="e.g. I want to understand how issues are triaged, who the most active contributors are, and how pull requests relate to releases…" - class="flex min-h-[120px] w-full resize-none rounded-md border border-input bg-transparent px-3 py-2 text-sm shadow-xs placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-[3px] focus-visible:ring-ring/50 disabled:cursor-not-allowed disabled:opacity-50" - @input="intentError = ''" - /> - <p v-if="intentError" class="text-xs text-destructive">{{ intentError }}</p> - <p v-else class="text-xs text-muted-foreground"> - The more specific you are, the better the proposed ontology will match your needs. - </p> - </div> - - <DialogFooter class="pt-2"> - <Button variant="outline" @click="prevStep"> - <ChevronLeft class="mr-1 size-4" /> - Back - </Button> - <Button @click="nextStep"> - Analyse & Propose Ontology - <ChevronRight class="ml-1 size-4" /> - </Button> - </DialogFooter> - </div> - - <!-- ── Step 4: Review Proposed Ontology ── --> - <div v-else-if="wizardStep === 4" class="space-y-4"> - <!-- Scanning state --> - <div v-if="scanningOntology" class="flex flex-col items-center gap-4 py-10 text-center"> - <Loader2 class="size-10 animate-spin text-primary" /> - <div> - <p class="text-sm font-medium">Analysing your data source…</p> - <p class="text-xs text-muted-foreground"> - Scanning repository structure and applying your intent to propose an ontology. - </p> - </div> - </div> - - <!-- Proposed ontology --> - <template v-else-if="ontologyReady"> - <div> - <h3 class="text-sm font-semibold">Review proposed ontology</h3> - <p class="text-xs text-muted-foreground"> - The AI agent has proposed the following node and edge types based on your data source and intent. - You can edit or remove individual types before approving. - </p> - </div> - - <!-- Re-extraction warning note --> - <div class="flex items-start gap-2 rounded-md border border-amber-200 bg-amber-50 p-3 dark:border-amber-800 dark:bg-amber-950/30"> - <AlertTriangle class="mt-0.5 size-4 shrink-0 text-amber-600 dark:text-amber-400" /> - <p class="text-xs text-amber-700 dark:text-amber-300"> - Modifying the ontology after the initial extraction is complete will trigger a full - re-extraction of this data source. Approve carefully. - </p> - </div> - - <!-- Node types --> - <div class="space-y-2"> - <h4 class="text-[11px] font-semibold uppercase tracking-wider text-muted-foreground"> - Node Types ({{ proposedNodes.length }}) - </h4> - <div class="space-y-2"> - <Card - v-for="(node, idx) in proposedNodes" - :key="idx" - class="overflow-hidden" - > - <!-- View mode --> - <CardContent v-if="!node.editing" class="flex items-start gap-3 p-3"> - <Badge variant="default" class="mt-0.5 shrink-0">Node</Badge> - <div class="flex-1 min-w-0"> - <p class="text-sm font-medium">{{ node.label }}</p> - <p class="text-xs text-muted-foreground">{{ node.description }}</p> - <div class="mt-1.5 flex flex-wrap gap-1"> - <Badge - v-for="prop in node.required_properties" - :key="prop" - variant="secondary" - class="text-[10px]" - > - {{ prop }} <span class="ml-0.5 text-destructive">*</span> - </Badge> - <Badge - v-for="prop in node.optional_properties" - :key="prop" - variant="outline" - class="text-[10px]" - > - {{ prop }} - </Badge> - </div> - </div> - <div class="flex shrink-0 items-center gap-1"> - <Tooltip> - <TooltipTrigger as-child> - <Button variant="ghost" size="icon" class="size-7" @click="startEditNode(idx)"> - <Pencil class="size-3.5" /> - </Button> - </TooltipTrigger> - <TooltipContent><p>Edit type</p></TooltipContent> - </Tooltip> - <Tooltip> - <TooltipTrigger as-child> - <Button variant="ghost" size="icon" class="size-7 text-destructive hover:text-destructive" @click="removeNode(idx)"> - <Trash2 class="size-3.5" /> - </Button> - </TooltipTrigger> - <TooltipContent><p>Remove type</p></TooltipContent> - </Tooltip> - </div> - </CardContent> - - <!-- Edit mode --> - <CardContent v-else class="space-y-3 p-3"> - <div class="grid grid-cols-2 gap-3"> - <div class="space-y-1"> - <Label class="text-xs">Label</Label> - <Input v-model="node.editLabel" class="h-8 text-xs" @input="node.editError = ''" /> - <p v-if="node.editError" class="text-xs text-destructive">{{ node.editError }}</p> - </div> - <div class="space-y-1"> - <Label class="text-xs">Description</Label> - <Input v-model="node.editDescription" class="h-8 text-xs" /> - </div> - </div> - <div class="grid grid-cols-2 gap-3"> - <div class="space-y-1"> - <Label class="text-xs">Required properties <span class="text-muted-foreground">(comma-separated)</span></Label> - <Input v-model="node.editRequired" placeholder="e.g. name, url" class="h-8 text-xs" /> - </div> - <div class="space-y-1"> - <Label class="text-xs">Optional properties</Label> - <Input v-model="node.editOptional" placeholder="e.g. description, stars" class="h-8 text-xs" /> - </div> - </div> - <div class="flex justify-end gap-2"> - <Button variant="ghost" size="sm" class="h-7 text-xs" @click="cancelEditNode(idx)"> - <X class="mr-1 size-3" /> - Cancel - </Button> - <Button size="sm" class="h-7 text-xs" @click="saveEditNode(idx)"> - <Check class="mr-1 size-3" /> - Save - </Button> - </div> - </CardContent> - </Card> - </div> - <!-- Add Node Type button --> - <Button variant="outline" size="sm" class="mt-2 w-full gap-2" @click="addNode"> - <Plus class="size-4" /> - Add Node Type - </Button> - </div> - - <!-- Edge types --> - <div class="space-y-2"> - <h4 class="text-[11px] font-semibold uppercase tracking-wider text-muted-foreground"> - Edge Types ({{ proposedEdges.length }}) - </h4> - <div class="space-y-2"> - <Card - v-for="(edge, idx) in proposedEdges" - :key="idx" - class="overflow-hidden" - > - <!-- View mode --> - <CardContent v-if="!edge.editing" class="flex items-start gap-3 p-3"> - <Badge variant="outline" class="mt-0.5 shrink-0">Edge</Badge> - <div class="flex-1 min-w-0"> - <p class="text-sm font-medium font-mono">{{ edge.label }}</p> - <p class="text-xs text-muted-foreground">{{ edge.description }}</p> - <p class="text-xs text-muted-foreground/70 mt-0.5"> - {{ edge.from }} → {{ edge.to }} - </p> - <div v-if="edge.required_properties.length || edge.optional_properties.length" class="mt-1.5 flex flex-wrap gap-1"> - <Badge - v-for="prop in edge.required_properties" - :key="prop" - variant="secondary" - class="text-[10px]" - > - {{ prop }} <span class="ml-0.5 text-destructive">*</span> - </Badge> - <Badge - v-for="prop in edge.optional_properties" - :key="prop" - variant="outline" - class="text-[10px]" - > - {{ prop }} - </Badge> - </div> - </div> - <div class="flex shrink-0 items-center gap-1"> - <Tooltip> - <TooltipTrigger as-child> - <Button variant="ghost" size="icon" class="size-7" @click="startEditEdge(idx)"> - <Pencil class="size-3.5" /> - </Button> - </TooltipTrigger> - <TooltipContent><p>Edit type</p></TooltipContent> - </Tooltip> - <Tooltip> - <TooltipTrigger as-child> - <Button variant="ghost" size="icon" class="size-7 text-destructive hover:text-destructive" @click="removeEdge(idx)"> - <Trash2 class="size-3.5" /> - </Button> - </TooltipTrigger> - <TooltipContent><p>Remove type</p></TooltipContent> - </Tooltip> - </div> - </CardContent> - - <!-- Edit mode --> - <CardContent v-else class="space-y-3 p-3"> - <div class="grid grid-cols-2 gap-3"> - <div class="space-y-1"> - <Label class="text-xs">Label</Label> - <Input v-model="edge.editLabel" class="h-8 text-xs" @input="edge.editError = ''" /> - <p v-if="edge.editError" class="text-xs text-destructive">{{ edge.editError }}</p> - </div> - <div class="space-y-1"> - <Label class="text-xs">Description</Label> - <Input v-model="edge.editDescription" class="h-8 text-xs" /> - </div> - </div> - <div class="grid grid-cols-2 gap-3"> - <div class="space-y-1"> - <Label class="text-xs">From type</Label> - <Input v-model="edge.from" placeholder="e.g. Repository" class="h-8 text-xs" /> - </div> - <div class="space-y-1"> - <Label class="text-xs">To type</Label> - <Input v-model="edge.to" placeholder="e.g. Issue" class="h-8 text-xs" /> - </div> - </div> - <div class="grid grid-cols-2 gap-3"> - <div class="space-y-1"> - <Label class="text-xs">Required properties</Label> - <Input v-model="edge.editRequired" placeholder="comma-separated" class="h-8 text-xs" /> - </div> - <div class="space-y-1"> - <Label class="text-xs">Optional properties</Label> - <Input v-model="edge.editOptional" placeholder="comma-separated" class="h-8 text-xs" /> - </div> - </div> - <div class="flex justify-end gap-2"> - <Button variant="ghost" size="sm" class="h-7 text-xs" @click="cancelEditEdge(idx)"> - <X class="mr-1 size-3" /> - Cancel - </Button> - <Button size="sm" class="h-7 text-xs" @click="saveEditEdge(idx)"> - <Check class="mr-1 size-3" /> - Save - </Button> - </div> - </CardContent> - </Card> - </div> - <!-- Add Edge Type button --> - <Button variant="outline" size="sm" class="mt-2 w-full gap-2" @click="addEdge"> - <Plus class="size-4" /> - Add Edge Type - </Button> - </div> - </template> - - <DialogFooter v-if="!scanningOntology" class="pt-2"> - <Button variant="outline" @click="prevStep"> - <ChevronLeft class="mr-1 size-4" /> - Back - </Button> - <Button :disabled="!ontologyReady || approvingOntology" @click="approveOntology"> - <Loader2 v-if="approvingOntology" class="mr-2 size-4 animate-spin" /> - <CheckCircle2 v-else class="mr-2 size-4" /> - Approve & Start Extraction - </Button> - </DialogFooter> - </div> </DialogContent> </Dialog> diff --git a/src/dev-ui/app/tests/data-source-connection-wizard.test.ts b/src/dev-ui/app/tests/data-source-connection-wizard.test.ts index 6ab430ca1..4b78fe829 100644 --- a/src/dev-ui/app/tests/data-source-connection-wizard.test.ts +++ b/src/dev-ui/app/tests/data-source-connection-wizard.test.ts @@ -1,9 +1,12 @@ import { describe, it, expect, vi } from 'vitest' import { ADAPTERS, + detectAdapterFromUrl, + parseSourceUrls, inferNameFromRepoUrl, canAdvanceStep1, isAdapterSelectable, + validateStep1, validateStep2, buildDataSourceCreationUrl, buildDataSourceCreationBody, @@ -26,6 +29,32 @@ import { // ── Group 1: Adapter selection (Step 1) ─────────────────────────────────────── describe('Data Source Connection Wizard — Group 1: Adapter selection', () => { + it('test_detects_github_gitlab_and_jira_from_source_urls', () => { + expect(detectAdapterFromUrl('https://github.com/acme/repo')).toBe('github') + expect(detectAdapterFromUrl('https://gitlab.com/acme/repo')).toBe('gitlab') + expect(detectAdapterFromUrl('https://acme.atlassian.net/browse/PROJ-1')).toBe('jira') + }) + + it('test_returns_unknown_for_unrecognized_or_invalid_url', () => { + expect(detectAdapterFromUrl('https://example.com/repo')).toBe('unknown') + expect(detectAdapterFromUrl('not-a-url')).toBe('unknown') + }) + + it('test_bulk_url_parser_normalizes_multiline_entries', () => { + const parsed = parseSourceUrls(` + https://github.com/acme/repo-1 + https://github.com/acme/repo-2 + + https://github.com/acme/repo-1 + `) + expect(parsed).toHaveLength(2) + expect(parsed.map((entry) => entry.url)).toEqual([ + 'https://github.com/acme/repo-1', + 'https://github.com/acme/repo-2', + ]) + expect(parsed.every((entry) => entry.detectedAdapterId === 'github')).toBe(true) + }) + it('test_github_is_the_only_available_adapter', () => { // The adapters list has exactly one available adapter and it is GitHub. // This is a regression guard: adding a new adapter without updating this @@ -78,6 +107,27 @@ describe('Data Source Connection Wizard — Group 1: Adapter selection', () => { expect(canAdvanceStep1('github', 'kg-123')).toBe(true) }) + it('test_step1_validation_rejects_unavailable_detected_provider', () => { + const result = validateStep1({ + selectedKnowledgeGraphId: 'kg-1', + sourceUrl: 'https://gitlab.com/acme/repo', + detectedAdapterId: 'gitlab', + }) + expect(result.valid).toBe(false) + expect(result.providerError).toContain('coming soon') + }) + + it('test_step1_validation_accepts_github_url_with_selected_kg', () => { + const result = validateStep1({ + selectedKnowledgeGraphId: 'kg-1', + sourceUrl: 'https://github.com/acme/repo', + detectedAdapterId: 'github', + }) + expect(result.valid).toBe(true) + expect(result.sourceUrlError).toBe('') + expect(result.providerError).toBe('') + }) + it('test_unavailable_adapter_blocks_step1_advancement', () => { // Even if selectedAdapterId is set to an unavailable adapter it cannot // advance — selecting such an adapter should be blocked at selection time @@ -107,9 +157,9 @@ describe('Data Source Connection Wizard — Group 2: Connection configuration', expect(name).toBe('repo') }) - it('test_name_inference_returns_null_for_non_github_url', () => { - // Non-GitHub URLs return null so the caller can leave the name unchanged. - expect(inferNameFromRepoUrl('https://gitlab.com/org/repo')).toBeNull() + it('test_name_inference_supports_git_host_urls_and_returns_null_for_invalid', () => { + // Git host URLs can infer repository names; invalid strings return null. + expect(inferNameFromRepoUrl('https://gitlab.com/org/repo')).toBe('repo') expect(inferNameFromRepoUrl('not-a-url')).toBeNull() expect(inferNameFromRepoUrl('')).toBeNull() }) diff --git a/src/dev-ui/app/tests/data-sources.test.ts b/src/dev-ui/app/tests/data-sources.test.ts index 5b0c38b6c..340fb2828 100644 --- a/src/dev-ui/app/tests/data-sources.test.ts +++ b/src/dev-ui/app/tests/data-sources.test.ts @@ -3139,7 +3139,7 @@ describe('Data Sources — kg_id query param pre-selects KG and opens wizard (Ta }) }) -describe('Extraction telemetry dashboard - structural verification', () => { +describe('Data-sources-focused layout - structural verification', () => { const { readFileSync } = require('fs') const { resolve } = require('path') const source = readFileSync( @@ -3147,42 +3147,52 @@ describe('Extraction telemetry dashboard - structural verification', () => { 'utf-8', ) - it('declares telemetry status buckets and recent jobs computeds', () => { - expect(source).toContain('telemetryStatusBuckets') - expect(source).toContain('telemetryRecentJobs') + it('keeps data-source catalog guidance and removes telemetry dashboard copy', () => { + expect(source).toContain('Data source catalog') + expect(source).not.toContain('Active workers') + expect(source).not.toContain('Estimated cost trend') }) - it('renders active worker and token usage cards', () => { - expect(source).toContain('Active workers') - expect(source).toContain('Total token usage') + it('removes scheduled maintenance orchestration from this page', () => { + expect(source).not.toContain('Scheduled maintenance orchestration') + expect(source).not.toContain('maintenance-runs/trigger') }) - it('renders estimated cost trend with 24h comparison', () => { - expect(source).toContain('Estimated cost trend') - expect(source).toContain('previous 24h') + it('renders URL-first onboarding with provider detection and coming soon messaging', () => { + expect(source).toContain('Paste your source URLs') + expect(source).toContain('Add another') + expect(source).toContain('Detected:') + expect(source).toContain('onboarding is coming soon, sorry.') + expect(source).toContain('Add to project') }) }) -describe('Scheduled maintenance orchestration - structural verification', () => { - const source = readFileSync( - resolve(__dirname, '../pages/data-sources/index.vue'), - 'utf-8', - ) - - it('declares maintenance schedule state and loader function', () => { - expect(source).toContain('maintenanceSchedule') - expect(source).toContain('loadMaintenanceOrchestration') - }) +describe('Bulk onboarding partial-success behavior', () => { + it('retains only failed entries when batch create is partially successful', async () => { + const pendingSources = [ + { id: '1', name: 'repo-one', url: 'https://github.com/acme/repo-one', branch: 'main' }, + { id: '2', name: 'repo-two', url: 'https://github.com/acme/repo-two', branch: 'main' }, + { id: '3', name: 'repo-three', url: 'https://github.com/acme/repo-three', branch: 'main' }, + ] + const createDataSource = vi.fn() + .mockResolvedValueOnce({ id: 'ds-1' }) + .mockRejectedValueOnce(new Error('token invalid')) + .mockResolvedValueOnce({ id: 'ds-3' }) + const failedIds: string[] = [] + let successCount = 0 - it('renders the scheduled maintenance panel and trigger action', () => { - expect(source).toContain('Scheduled maintenance orchestration') - expect(source).toContain('maintenance-runs/trigger') - expect(source).toContain('Run now') - }) + for (const entry of pendingSources) { + try { + await createDataSource(entry) + successCount += 1 + } catch { + failedIds.push(entry.id) + } + } - it('renders maintenance outcome history list', () => { - expect(source).toContain('No maintenance orchestration runs recorded yet.') - expect(source).toContain('maintenanceOutcomeTone') + const remaining = pendingSources.filter((entry) => failedIds.includes(entry.id)) + expect(successCount).toBe(2) + expect(remaining.map((entry) => entry.id)).toEqual(['2']) }) }) diff --git a/src/dev-ui/app/tests/task-121-spec-alignment.test.ts b/src/dev-ui/app/tests/task-121-spec-alignment.test.ts index 4e25cd661..63eda2df6 100644 --- a/src/dev-ui/app/tests/task-121-spec-alignment.test.ts +++ b/src/dev-ui/app/tests/task-121-spec-alignment.test.ts @@ -3,9 +3,11 @@ import { readFileSync } from 'fs' import { resolve } from 'path' import { ADAPTERS, + detectAdapterFromUrl, isAdapterSelectable, canAdvanceStep1, inferNameFromRepoUrl, + validateStep1, validateStep2, buildDataSourceCreationUrl, buildDataSourceCreationBody, @@ -111,12 +113,12 @@ describe('Task-121 — Requirement: Knowledge Graph Creation', () => { describe('Task-121 — Requirement: Data Source Connection — Adapter & Configuration', () => { describe('data-sources page imports and uses dataSourceWizard utilities', () => { - it('imports ADAPTERS from dataSourceWizard', () => { - expect(DS_INDEX_VUE).toContain('ADAPTERS') + it('imports detectAdapterFromUrl from dataSourceWizard', () => { + expect(DS_INDEX_VUE).toContain('detectAdapterFromUrl') }) - it('imports canAdvanceStep1 from dataSourceWizard', () => { - expect(DS_INDEX_VUE).toContain('canAdvanceStep1') + it('imports validateStep1 from dataSourceWizard', () => { + expect(DS_INDEX_VUE).toContain('validateStep1') }) it('imports validateStep2 from dataSourceWizard', () => { @@ -149,17 +151,32 @@ describe('Task-121 — Requirement: Data Source Connection — Adapter & Configu }) }) - describe('Step 1 advancement requires both adapter AND knowledge graph', () => { - it('blocked when adapter is missing even with KG selected', () => { - expect(canAdvanceStep1('', 'kg-123')).toBe(false) + describe('Step 1 validation enforces URL detection and provider availability', () => { + it('detects supported and unsupported providers from URL', () => { + expect(detectAdapterFromUrl('https://github.com/acme/repo')).toBe('github') + expect(detectAdapterFromUrl('https://gitlab.com/acme/repo')).toBe('gitlab') + expect(detectAdapterFromUrl('https://acme.atlassian.net/browse/ABC-1')).toBe('jira') }) - it('blocked when KG is missing even with adapter selected', () => { - expect(canAdvanceStep1('github', '')).toBe(false) + it('blocks advancement when provider is unsupported', () => { + const result = validateStep1({ + selectedKnowledgeGraphId: 'kg-123', + sourceUrl: 'https://gitlab.com/acme/repo', + detectedAdapterId: 'gitlab', + }) + expect(result.valid).toBe(false) + expect(result.providerError).toContain('coming soon') }) - it('allowed when both adapter and KG are selected', () => { - expect(canAdvanceStep1('github', 'kg-123')).toBe(true) + it('allows advancement for valid GitHub URL and selected KG', () => { + const result = validateStep1({ + selectedKnowledgeGraphId: 'kg-123', + sourceUrl: 'https://github.com/acme/repo', + detectedAdapterId: 'github', + }) + expect(result.valid).toBe(true) + expect(result.sourceUrlError).toBe('') + expect(result.providerError).toBe('') }) }) @@ -172,8 +189,8 @@ describe('Task-121 — Requirement: Data Source Connection — Adapter & Configu expect(inferNameFromRepoUrl('https://github.com/org/repo.git')).toBe('repo') }) - it('returns null for non-GitHub URLs (no overwrite)', () => { - expect(inferNameFromRepoUrl('https://gitlab.com/org/repo')).toBeNull() + it('supports name inference for GitHub and GitLab repository URLs', () => { + expect(inferNameFromRepoUrl('https://gitlab.com/org/repo')).toBe('repo') expect(inferNameFromRepoUrl('')).toBeNull() }) }) diff --git a/src/dev-ui/app/tests/task-129-spec-alignment.test.ts b/src/dev-ui/app/tests/task-129-spec-alignment.test.ts index 16845f910..6ff882e1d 100644 --- a/src/dev-ui/app/tests/task-129-spec-alignment.test.ts +++ b/src/dev-ui/app/tests/task-129-spec-alignment.test.ts @@ -873,90 +873,31 @@ describe('Task-129 — Scenario: Default landing', () => { }) }) -// ── Requirement: Ontology Design — Scenario: Intent description ─────────────── +// ── Requirement: Data Source Connection — URL-first onboarding ──────────────── // -// Spec: "GIVEN a user who has connected a data source -// WHEN the connection is saved -// THEN the user is prompted to describe (in free text) what problems or -// questions they want to solve with this data" +// Spec: URL-first flow with provider auto-detection and coming-soon handling. -describe('Task-129 — Scenario: Intent description', () => { - it('data-sources page has an intentText ref for the free-text description prompt', () => { - // Step 3 of the wizard: the user describes their intent before ontology proposal - expect(dataSourcesVue).toContain('intentText') +describe('Task-129 — Scenario: URL-first data source onboarding', () => { + it('data-sources page prompts for source URL first', () => { + expect(dataSourcesVue).toContain('Paste your source URLs') + expect(dataSourcesVue).toContain('sourceUrlInputs') + expect(dataSourcesVue).toContain('Add another') }) - it('intentText is validated before submitting — intentError is shown if invalid', () => { - expect(dataSourcesVue).toContain('intentError') - expect(dataSourcesVue).toContain('validateIntentText') + it('provider detection is shown with explicit coming-soon messaging', () => { + expect(dataSourcesVue).toContain('Detected provider:') + expect(dataSourcesVue).toContain('onboarding is coming soon, sorry.') }) - it('submitting intent calls beginOntologyProposal()', () => { - // After valid intent, the system starts the proposal flow - expect(dataSourcesVue).toContain('beginOntologyProposal') - }) -}) - -// ── Requirement: Ontology Design — Scenario: Agent-proposed ontology ────────── -// -// Spec: "GIVEN a free-text intent description and a connected data source -// WHEN the user submits their intent -// THEN the system performs a lightweight scan of the data source -// AND an AI agent explores the scanned data and proposes an ontology -// AND the proposed ontology is presented to the user for review" - -describe('Task-129 — Scenario: Agent-proposed ontology', () => { - it('data-sources page has proposedNodes ref for the agent-proposed node types', () => { - expect(dataSourcesVue).toContain('proposedNodes') - }) - - it('data-sources page has proposedEdges ref for the agent-proposed edge types', () => { - expect(dataSourcesVue).toContain('proposedEdges') - }) - - it('data-sources page has ontologyReady ref — true when proposal is ready for review', () => { - // ontologyReady = true signals the wizard to show the review step - expect(dataSourcesVue).toContain('ontologyReady') + it('wizard validates provider/URL through validateStep1 before advancing', () => { + expect(dataSourcesVue).toContain('validateStep1') + expect(dataSourcesVue).toContain('detectAdapterFromUrl') }) - it('beginOntologyProposal() resets proposal state before re-fetching', () => { - // Clearing previous state prevents stale proposal data from being shown - expect(dataSourcesVue).toContain('proposedNodes.value = []') - expect(dataSourcesVue).toContain('proposedEdges.value = []') - }) - - it('beginOntologyProposal() sets ontologyReady to true after proposal is complete', () => { - expect(dataSourcesVue).toContain('ontologyReady.value = true') - }) -}) - -// ── Requirement: Ontology Design — Scenario: Ontology review and approval ───── -// -// Spec: "GIVEN a proposed ontology -// WHEN the user reviews it -// THEN they can approve the ontology as-is -// OR iterate by editing individual types and relationships -// AND extraction begins only after the user explicitly approves" - -describe('Task-129 — Scenario: Ontology review and approval', () => { - it('data-sources page has an approveOntology() function that triggers extraction', () => { - // Spec: "extraction begins only after the user explicitly approves" - expect(dataSourcesVue).toContain('approveOntology') - }) - - it('approve button is disabled until ontologyReady is true', () => { - // Prevents the user from approving before the proposal has loaded - expect(dataSourcesVue).toContain(':disabled="!ontologyReady') - }) - - it('approvingOntology flag prevents double submission on approval', () => { - expect(dataSourcesVue).toContain('approvingOntology') - }) - - it('approval step is the final step in the wizard — extraction follows approval', () => { - // The wizard step after review/approval creates the data source and starts sync - expect(dataSourcesVue).toContain('approveOntology') - expect(dataSourcesVue).toContain('triggerSync') + it('connection confirmation includes tracked branch and one-time token entry', () => { + expect(dataSourcesVue).toContain('Tracked Branch') + expect(dataSourcesVue).toContain('Add to project') + expect(dataSourcesVue).toContain('Access Token (optional)') }) }) diff --git a/src/dev-ui/app/utils/dataSourceWizard.ts b/src/dev-ui/app/utils/dataSourceWizard.ts index 4fc149883..419b3f73d 100644 --- a/src/dev-ui/app/utils/dataSourceWizard.ts +++ b/src/dev-ui/app/utils/dataSourceWizard.ts @@ -24,6 +24,8 @@ export interface AdapterDefinition { available: boolean } +export type DetectedAdapterId = 'github' | 'gitlab' | 'jira' | 'unknown' + /** * The canonical list of supported (and unavailable/future) adapters. * @@ -51,6 +53,50 @@ export const ADAPTERS: AdapterDefinition[] = [ }, ] +/** + * Best-effort adapter detection from a source URL hostname/path. + */ +export function detectAdapterFromUrl(url: string): DetectedAdapterId { + if (!url.trim()) return 'unknown' + try { + const parsed = new URL(url.trim()) + const host = parsed.hostname.toLowerCase() + const path = parsed.pathname.toLowerCase() + if (host.includes('github.com')) return 'github' + if (host.includes('gitlab.com')) return 'gitlab' + if (host.includes('atlassian.net') || path.includes('/jira') || path.includes('/browse/')) { + return 'jira' + } + return 'unknown' + } catch { + return 'unknown' + } +} + +export interface ParsedSourceUrl { + url: string + detectedAdapterId: DetectedAdapterId +} + +/** + * Parses a multiline bulk-input field into normalized URL entries. + * Empty lines are ignored and exact duplicates are removed. + */ +export function parseSourceUrls(input: string): ParsedSourceUrl[] { + const seen = new Set<string>() + const entries: ParsedSourceUrl[] = [] + for (const line of input.split(/\r?\n/)) { + const url = line.trim() + if (!url || seen.has(url)) continue + seen.add(url) + entries.push({ + url, + detectedAdapterId: detectAdapterFromUrl(url), + }) + } + return entries +} + // ── Adapter selection guard ──────────────────────────────────────────────────── /** @@ -81,6 +127,54 @@ export function canAdvanceStep1( return !!selectedAdapterId && !!selectedKnowledgeGraphId } +export interface Step1ValidationResult { + valid: boolean + sourceUrlError: string + providerError: string +} + +export function validateStep1(opts: { + selectedKnowledgeGraphId: string + sourceUrl: string + detectedAdapterId: DetectedAdapterId +}): Step1ValidationResult { + const result: Step1ValidationResult = { + valid: true, + sourceUrlError: '', + providerError: '', + } + + if (!opts.selectedKnowledgeGraphId.trim()) { + result.providerError = 'Select a knowledge graph to continue.' + result.valid = false + } + + if (!opts.sourceUrl.trim()) { + result.sourceUrlError = 'Source URL is required.' + result.valid = false + return result + } + + try { + // URL constructor validates format. + new URL(opts.sourceUrl.trim()) + } catch { + result.sourceUrlError = 'Enter a valid source URL.' + result.valid = false + return result + } + + if (opts.detectedAdapterId === 'unknown') { + result.providerError = 'Could not detect provider from this URL.' + result.valid = false + } else if (opts.detectedAdapterId !== 'github') { + result.providerError = `${opts.detectedAdapterId[0]!.toUpperCase()}${opts.detectedAdapterId.slice(1)} support is coming soon.` + result.valid = false + } + + return result +} + // ── Name inference ───────────────────────────────────────────────────────────── /** @@ -97,9 +191,20 @@ export function canAdvanceStep1( * `'not-a-url'` → `null` */ export function inferNameFromRepoUrl(url: string): string | null { - const match = url.trim().match(/github\.com\/[^/]+\/([^/]+?)(?:\.git)?\/?$/) - if (!match || !match[1]) return null - return match[1] + if (!url.trim()) return null + try { + const parsed = new URL(url.trim()) + const parts = parsed.pathname.split('/').filter(Boolean) + // github/gitlab repositories: /owner/repo + if (parts.length >= 2) { + return parts[1]!.replace(/\.git$/, '') + } + // fallback to the last segment when available + const fallback = parts[parts.length - 1] + return fallback ? fallback.replace(/\.git$/, '') : null + } catch { + return null + } } // ── Step 2 validation ────────────────────────────────────────────────────────── From 5cae3a967c72a2b23542af838acc74234df109fd Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 22 May 2026 18:44:17 -0400 Subject: [PATCH 053/153] fix(data-sources): modernize KG selector styling in add-source dialog Replace the native Knowledge Graph dropdown in the Add Data Source popup with the shared Select UI component so contrast, hover states, and overall visual style match the rest of the app. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/dev-ui/app/pages/data-sources/index.vue | 22 ++++++++++++++++----- src/dev-ui/app/tests/data-sources.test.ts | 7 +++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/dev-ui/app/pages/data-sources/index.vue b/src/dev-ui/app/pages/data-sources/index.vue index 75ab47727..537e702ea 100644 --- a/src/dev-ui/app/pages/data-sources/index.vue +++ b/src/dev-ui/app/pages/data-sources/index.vue @@ -40,6 +40,13 @@ import { Input } from '@/components/ui/input' import { Label } from '@/components/ui/label' import { Badge } from '@/components/ui/badge' import { Separator } from '@/components/ui/separator' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' import SyncPhaseIndicator from '@/components/graph/SyncPhaseIndicator.vue' import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' import { CopyableText } from '@/components/ui/copyable-text' @@ -1412,14 +1419,19 @@ async function handleDeleteDs() { <!-- Knowledge graph selection --> <div class="space-y-1.5"> <Label>Knowledge Graph <span class="text-destructive">*</span></Label> - <select + <Select v-if="!loadingKgs && knowledgeGraphs.length > 0" v-model="selectedKnowledgeGraphId" - class="flex h-9 w-full rounded-md border border-input bg-transparent px-3 py-1 text-sm shadow-xs focus-visible:outline-none focus-visible:ring-[3px] focus-visible:ring-ring/50" > - <option value="">Select a knowledge graph...</option> - <option v-for="kg in knowledgeGraphs" :key="kg.id" :value="kg.id">{{ kg.name }}</option> - </select> + <SelectTrigger> + <SelectValue placeholder="Select a knowledge graph..." /> + </SelectTrigger> + <SelectContent> + <SelectItem v-for="kg in knowledgeGraphs" :key="kg.id" :value="kg.id"> + {{ kg.name }} + </SelectItem> + </SelectContent> + </Select> <div v-else-if="loadingKgs" class="flex items-center gap-2 text-sm text-muted-foreground"> <Loader2 class="size-4 animate-spin" /> Loading knowledge graphs... </div> diff --git a/src/dev-ui/app/tests/data-sources.test.ts b/src/dev-ui/app/tests/data-sources.test.ts index 340fb2828..be978914a 100644 --- a/src/dev-ui/app/tests/data-sources.test.ts +++ b/src/dev-ui/app/tests/data-sources.test.ts @@ -3165,6 +3165,13 @@ describe('Data-sources-focused layout - structural verification', () => { expect(source).toContain('onboarding is coming soon, sorry.') expect(source).toContain('Add to project') }) + + it('uses shadcn Select for knowledge graph dropdown styling consistency', () => { + expect(source).toContain('<Select v-model="selectedKnowledgeGraphId">') + expect(source).toContain('SelectTrigger') + expect(source).toContain('SelectContent') + expect(source).not.toContain('<select') + }) }) describe('Bulk onboarding partial-success behavior', () => { From bdeabf7c290827899f0155fbfb45d792bc269d48 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Tue, 26 May 2026 14:26:06 -0400 Subject: [PATCH 054/153] feat(ui): add KG-scoped data source onboarding flow Introduce k-extract-style full-page routes for connecting repositories from the knowledge graph manage workspace: wizard at /data-sources/new with post-create sequential sync, and an operations page when sources already exist. Closes #736. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../[kgId]/data-sources/index.vue | 621 ++++++++++++++++ .../[kgId]/data-sources/new.vue | 703 ++++++++++++++++++ .../pages/knowledge-graphs/[kgId]/manage.vue | 4 +- .../app/pages/knowledge-graphs/index.vue | 4 +- .../tests/kg-data-sources-navigation.test.ts | 39 + .../knowledge-graph-manage-workspace.test.ts | 20 +- src/dev-ui/app/tests/knowledge-graphs.test.ts | 19 +- .../app/tests/task-121-spec-alignment.test.ts | 38 +- .../app/utils/kgDataSourcesNavigation.ts | 39 + src/dev-ui/app/utils/kgDataSourcesSync.ts | 41 + src/dev-ui/app/utils/kgManageWorkspace.ts | 24 +- 11 files changed, 1500 insertions(+), 52 deletions(-) create mode 100644 src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue create mode 100644 src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue create mode 100644 src/dev-ui/app/tests/kg-data-sources-navigation.test.ts create mode 100644 src/dev-ui/app/utils/kgDataSourcesNavigation.ts create mode 100644 src/dev-ui/app/utils/kgDataSourcesSync.ts diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue new file mode 100644 index 000000000..614924aac --- /dev/null +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue @@ -0,0 +1,621 @@ +<script setup lang="ts"> +import { ref, computed, watch, onMounted, onUnmounted, nextTick } from 'vue' +import { toast } from 'vue-sonner' +import { + Cable, + ChevronLeft, + Plus, + Loader2, + Trash2, + Settings, + RefreshCw, + ScrollText, + Building2, +} from 'lucide-vue-next' +import { + buildKgDataSourcesNewUrl, + buildKgManageUrl, + parseKgDataSourcesFocusQuery, +} from '@/utils/kgDataSourcesNavigation' +import { isMaintenanceReady } from '@/utils/kgManageWorkspace' +import { hasAnyActiveSync, type SyncRunStatus } from '@/utils/kgDataSourcesSync' +import SyncPhaseIndicator from '@/components/graph/SyncPhaseIndicator.vue' +import { Button } from '@/components/ui/button' +import { Input } from '@/components/ui/input' +import { Label } from '@/components/ui/label' +import { Badge } from '@/components/ui/badge' +import { Separator } from '@/components/ui/separator' +import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' +import { CopyableText } from '@/components/ui/copyable-text' +import { + Sheet, + SheetContent, + SheetHeader, + SheetTitle, + SheetDescription, +} from '@/components/ui/sheet' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' + +interface SyncRun { + id: string + status: SyncRunStatus + started_at: string + completed_at: string | null + error: string | null +} + +interface DiffChangedFile { + path: string + status: string +} + +interface DataSourceDiffSummary { + total_changed_files: number + added_count: number + modified_count: number + removed_count: number + renamed_count: number + files_truncated: boolean + changed_files: DiffChangedFile[] +} + +interface DataSourceItem { + id: string + name: string + adapter_type: string + knowledge_graph_id: string + clone_head_commit?: string | null + last_extraction_baseline_commit?: string | null + tracked_branch_head_commit?: string | null + sync_runs?: SyncRun[] + diff_summary?: DataSourceDiffSummary | null +} + +const route = useRoute() +const kgId = computed(() => route.params.kgId as string) +const maintainFocus = computed(() => parseKgDataSourcesFocusQuery(route.query.focus) === 'maintain') + +const { hasTenant, tenantVersion } = useTenant() +const { apiFetch } = useApiClient() + +const kgName = ref('') +const dataSources = ref<DataSourceItem[]>([]) +const loading = ref(false) +const expandedDiffLists = ref<Record<string, boolean>>({}) +const refreshingCommitRefs = ref<Record<string, boolean>>({}) +const adoptingBaselines = ref<Record<string, boolean>>({}) + +const manageUrl = computed(() => buildKgManageUrl(kgId.value)) +const newSourceUrl = computed(() => buildKgDataSourcesNewUrl(kgId.value)) + +const visibleDataSources = computed(() => { + if (!maintainFocus.value) return dataSources.value + return dataSources.value.filter((ds) => isMaintenanceReady(ds)) +}) + +const pollInterval = ref<ReturnType<typeof setInterval> | null>(null) + +function stopPolling() { + if (pollInterval.value !== null) { + clearInterval(pollInterval.value) + pollInterval.value = null + } +} + +function startPolling() { + if (pollInterval.value !== null) return + pollInterval.value = setInterval(async () => { + await loadDataSources() + if (!hasAnyActiveSync(dataSources.value)) { + stopPolling() + } + }, 3000) +} + +async function loadKnowledgeGraph() { + try { + const result = await apiFetch<{ name: string }>( + `/management/knowledge-graphs/${kgId.value}`, + ) + kgName.value = result.name ?? kgId.value + } catch { + kgName.value = kgId.value + } +} + +async function loadDataSources() { + if (!hasTenant.value) return + loading.value = true + try { + const sources = await apiFetch<DataSourceItem[]>( + `/management/knowledge-graphs/${kgId.value}/data-sources`, + ) + for (const ds of sources) { + try { + ds.sync_runs = await apiFetch<SyncRun[]>( + `/management/data-sources/${ds.id}/sync-runs`, + ) + } catch { + ds.sync_runs = [] + } + try { + ds.diff_summary = await apiFetch<DataSourceDiffSummary>( + `/management/data-sources/${ds.id}/diff-summary`, + ) + } catch { + ds.diff_summary = null + } + } + dataSources.value = sources + } catch { + dataSources.value = [] + } finally { + loading.value = false + } +} + +async function ensureEntryRoute() { + await loadDataSources() + if (dataSources.value.length === 0) { + await navigateTo(newSourceUrl.value, { replace: true }) + return + } + if (hasAnyActiveSync(dataSources.value)) { + startPolling() + } +} + +function isDiffExpanded(dsId: string): boolean { + return expandedDiffLists.value[dsId] === true +} + +function toggleDiffExpanded(dsId: string) { + expandedDiffLists.value[dsId] = !isDiffExpanded(dsId) +} + +async function triggerSync(dsId: string) { + try { + await apiFetch(`/management/data-sources/${dsId}/sync`, { method: 'POST' }) + toast.success('Sync triggered') + await loadDataSources() + if (hasAnyActiveSync(dataSources.value)) startPolling() + } catch { + toast.error('Failed to trigger sync') + } +} + +async function refreshCommitRefs(dsId: string) { + refreshingCommitRefs.value[dsId] = true + try { + await apiFetch(`/management/data-sources/${dsId}/commit-refs/refresh`, { method: 'POST' }) + toast.success('Commit references refreshed') + await loadDataSources() + } catch { + toast.error('Failed to refresh commit references') + } finally { + refreshingCommitRefs.value[dsId] = false + } +} + +async function adoptTrackedHeadBaseline(dsId: string) { + adoptingBaselines.value[dsId] = true + try { + await apiFetch(`/management/data-sources/${dsId}/commit-refs/adopt-tracked-head`, { + method: 'POST', + }) + toast.success('Baseline updated to tracked head') + await loadDataSources() + } catch (err) { + const msg = err instanceof Error ? err.message : 'Failed to update baseline' + toast.error('Failed to update baseline', { description: msg }) + } finally { + adoptingBaselines.value[dsId] = false + } +} + +// Edit config sheet +const editConfigOpen = ref(false) +const editConfigDs = ref<DataSourceItem | null>(null) +const editConfigName = ref('') +const editConfigToken = ref('') +const editConfigNameError = ref('') +const savingConfig = ref(false) + +function openEditConfig(ds: DataSourceItem) { + editConfigDs.value = ds + editConfigName.value = ds.name + editConfigToken.value = '' + editConfigNameError.value = '' + editConfigOpen.value = true +} + +async function handleEditConfig() { + if (!editConfigName.value.trim()) { + editConfigNameError.value = 'Data source name is required' + return + } + savingConfig.value = true + try { + const body: Record<string, unknown> = { name: editConfigName.value.trim() } + if (editConfigToken.value.trim()) { + body.credentials = { access_token: editConfigToken.value.trim() } + } + await apiFetch(`/management/data-sources/${editConfigDs.value!.id}`, { + method: 'PATCH', + body, + }) + toast.success('Data source updated') + editConfigOpen.value = false + await loadDataSources() + } catch (err) { + const msg = err instanceof Error ? err.message : 'Failed to update' + toast.error('Failed to update data source', { description: msg }) + } finally { + savingConfig.value = false + } +} + +// Delete +const deleteDsOpen = ref(false) +const deletingDs = ref<DataSourceItem | null>(null) +const deletingDsFlag = ref(false) + +function openDeleteDs(ds: DataSourceItem) { + deletingDs.value = ds + deleteDsOpen.value = true +} + +async function handleDeleteDs() { + if (!deletingDs.value) return + deletingDsFlag.value = true + try { + await apiFetch(`/management/data-sources/${deletingDs.value.id}`, { method: 'DELETE' }) + toast.success(`Data source "${deletingDs.value.name}" deleted`) + deleteDsOpen.value = false + await loadDataSources() + if (dataSources.value.length === 0) { + await navigateTo(newSourceUrl.value, { replace: true }) + } + } catch (err) { + const msg = err instanceof Error ? err.message : 'Failed to delete' + toast.error('Failed to delete data source', { description: msg }) + } finally { + deletingDsFlag.value = false + deletingDs.value = null + } +} + +// Sync logs sheet +const logSheetOpen = ref(false) +const selectedLogRunId = ref<string | null>(null) +const runLogs = ref<string[]>([]) +const logsLoading = ref(false) +const logsError = ref<string | null>(null) + +async function viewLogs(ds: DataSourceItem, run: SyncRun) { + selectedLogRunId.value = run.id + runLogs.value = [] + logsError.value = null + logSheetOpen.value = true + logsLoading.value = true + try { + const result = await apiFetch<{ logs: string[] }>( + `/management/data-sources/${ds.id}/sync-runs/${run.id}/logs`, + ) + runLogs.value = result.logs ?? [] + } catch (err) { + logsError.value = err instanceof Error ? err.message : 'Failed to load logs' + } finally { + logsLoading.value = false + } +} + +onMounted(async () => { + if (!hasTenant.value) return + await loadKnowledgeGraph() + await ensureEntryRoute() + if (maintainFocus.value) { + await nextTick() + document.getElementById('maintain-section')?.scrollIntoView({ behavior: 'smooth' }) + } +}) + +onUnmounted(() => stopPolling()) + +watch(tenantVersion, async () => { + dataSources.value = [] + await loadKnowledgeGraph() + await ensureEntryRoute() +}) +</script> + +<template> + <div class="mx-auto max-w-5xl space-y-6"> + <div class="flex flex-wrap items-center justify-between gap-3"> + <NuxtLink + :to="manageUrl" + class="inline-flex items-center text-sm text-muted-foreground hover:text-foreground" + > + <ChevronLeft class="mr-1 size-4" /> + Back to workspace overview + </NuxtLink> + <Button :disabled="!hasTenant" @click="navigateTo(newSourceUrl)"> + <Plus class="mr-2 size-4" /> + Add data source + </Button> + </div> + + <div class="flex items-center gap-3"> + <div class="rounded-lg bg-primary/10 p-2"> + <Cable class="size-5 text-primary" /> + </div> + <div> + <h1 class="text-2xl font-semibold tracking-tight">Data Sources</h1> + <p class="text-sm text-muted-foreground"> + <template v-if="kgName">{{ kgName }} — </template> + Manage connected repositories, sync runs, and commit tracking. + </p> + </div> + </div> + + <Separator /> + + <div v-if="!hasTenant" class="flex flex-col items-center gap-3 py-16 text-center text-muted-foreground"> + <Building2 class="size-10" /> + <p class="font-medium">No tenant selected</p> + </div> + + <div v-else-if="loading" class="flex justify-center py-16"> + <Loader2 class="size-8 animate-spin text-muted-foreground" /> + </div> + + <template v-else> + <Card v-if="maintainFocus"> + <CardHeader class="pb-2"> + <CardTitle class="text-sm">Maintenance focus</CardTitle> + <CardDescription class="text-xs"> + Showing sources with new commits since the last extraction baseline. + </CardDescription> + </CardHeader> + </Card> + + <div + v-if="visibleDataSources.length === 0" + class="flex flex-col items-center gap-4 py-16 text-center" + > + <p class="text-sm text-muted-foreground"> + <template v-if="maintainFocus"> + No sources need maintenance right now. + </template> + <template v-else> + No data sources connected. + </template> + </p> + <Button v-if="!maintainFocus" @click="navigateTo(newSourceUrl)"> + <Plus class="mr-2 size-4" /> + Add your first data source + </Button> + </div> + + <div v-else id="maintain-section" class="space-y-3"> + <div + v-for="ds in visibleDataSources" + :key="ds.id" + class="rounded-lg border bg-card" + :class="isMaintenanceReady(ds) ? 'border-amber-300/60' : ''" + > + <div class="flex flex-wrap items-center justify-between gap-3 p-4"> + <div class="flex items-center gap-3"> + <div class="rounded-md bg-muted p-2"> + <Cable class="size-4 text-muted-foreground" /> + </div> + <div> + <p class="text-sm font-medium">{{ ds.name }}</p> + <p class="text-xs text-muted-foreground">{{ ds.adapter_type }}</p> + <CopyableText :text="ds.id" label="Data source ID copied" class="mt-0.5" /> + </div> + </div> + <div class="flex flex-wrap items-center gap-2"> + <SyncPhaseIndicator + v-if="ds.sync_runs?.[0]" + :status="ds.sync_runs[0].status" + /> + <Badge v-else variant="secondary" class="text-[10px]">Idle</Badge> + <Button size="sm" variant="outline" @click="openEditConfig(ds)"> + <Settings class="mr-1.5 size-3.5" /> + Edit Config + </Button> + <Button + size="sm" + variant="outline" + class="text-destructive hover:bg-destructive/10" + @click="openDeleteDs(ds)" + > + <Trash2 class="mr-1.5 size-3.5" /> + Delete + </Button> + <Button size="sm" variant="outline" @click="triggerSync(ds.id)"> + Sync Now + </Button> + </div> + </div> + + <div class="border-t px-4 py-3"> + <p class="mb-2 text-[11px] font-semibold uppercase tracking-wider text-muted-foreground"> + Commit Status + </p> + <div class="grid gap-2 sm:grid-cols-3"> + <div class="rounded-md border bg-muted/20 p-2"> + <p class="text-[10px] uppercase tracking-wider text-muted-foreground">Local clone commit</p> + <p class="mt-1 break-all font-mono text-xs">{{ ds.clone_head_commit ?? '—' }}</p> + </div> + <div class="rounded-md border bg-muted/20 p-2"> + <p class="text-[10px] uppercase tracking-wider text-muted-foreground">Last extraction baseline</p> + <p class="mt-1 break-all font-mono text-xs">{{ ds.last_extraction_baseline_commit ?? '—' }}</p> + </div> + <div class="rounded-md border bg-muted/20 p-2"> + <p class="text-[10px] uppercase tracking-wider text-muted-foreground">Tracked branch head</p> + <p class="mt-1 break-all font-mono text-xs">{{ ds.tracked_branch_head_commit ?? '—' }}</p> + </div> + </div> + <div class="mt-2 flex flex-wrap gap-2"> + <Button + size="sm" + variant="outline" + class="h-7 text-[10px]" + :disabled="refreshingCommitRefs[ds.id] === true" + @click="refreshCommitRefs(ds.id)" + > + <RefreshCw + class="mr-1 size-3" + :class="refreshingCommitRefs[ds.id] ? 'animate-spin' : ''" + /> + Refresh commits + </Button> + <Button + size="sm" + variant="outline" + class="h-7 text-[10px]" + :disabled="adoptingBaselines[ds.id] === true || !isMaintenanceReady(ds)" + @click="adoptTrackedHeadBaseline(ds.id)" + > + Adopt tracked head as baseline + </Button> + </div> + + <div + v-if="ds.diff_summary" + class="mt-3 rounded-md border p-2" + :class="isMaintenanceReady(ds) ? 'border-amber-300 bg-amber-50/50 dark:border-amber-800 dark:bg-amber-950/20' : 'bg-muted/10'" + > + <div class="flex items-center justify-between gap-2 text-xs"> + <span> + <span class="font-medium">{{ ds.diff_summary.total_changed_files }}</span> + changed files + </span> + <Badge + :variant="isMaintenanceReady(ds) ? 'default' : 'secondary'" + class="text-[10px]" + > + {{ isMaintenanceReady(ds) ? 'New commits available' : 'Up to date' }} + </Badge> + </div> + <Button + v-if="ds.diff_summary.changed_files.length > 0" + size="sm" + variant="ghost" + class="mt-2 h-6 px-2 text-[10px]" + @click="toggleDiffExpanded(ds.id)" + > + {{ isDiffExpanded(ds.id) ? 'Hide changed files' : 'Show changed files' }} + </Button> + <div + v-if="isDiffExpanded(ds.id)" + class="mt-2 max-h-48 space-y-1 overflow-y-auto rounded-md border bg-background/80 p-2" + > + <div + v-for="file in ds.diff_summary.changed_files" + :key="`${file.status}:${file.path}`" + class="flex justify-between gap-2 text-[11px]" + > + <span class="break-all font-mono">{{ file.path }}</span> + <Badge variant="outline" class="h-5 text-[10px] uppercase">{{ file.status }}</Badge> + </div> + </div> + </div> + </div> + + <div v-if="ds.sync_runs?.length" class="border-t px-4 py-3"> + <p class="mb-2 text-[11px] font-semibold uppercase tracking-wider text-muted-foreground"> + Sync History + </p> + <div class="space-y-1"> + <div + v-for="run in ds.sync_runs" + :key="run.id" + class="flex items-center gap-2 text-xs text-muted-foreground" + > + <SyncPhaseIndicator :status="run.status" /> + <span>{{ new Date(run.started_at).toLocaleString() }}</span> + <span v-if="run.error" class="text-destructive">{{ run.error }}</span> + <Button + size="sm" + variant="ghost" + class="ml-auto h-6 px-2 text-[10px]" + @click="viewLogs(ds, run)" + > + <ScrollText class="mr-1 size-3" /> + View Logs + </Button> + </div> + </div> + </div> + </div> + </div> + </template> + + <Sheet v-model:open="editConfigOpen"> + <SheetContent> + <SheetHeader> + <SheetTitle>Edit configuration</SheetTitle> + <SheetDescription>Update name or rotate credentials.</SheetDescription> + </SheetHeader> + <div class="mt-4 space-y-4"> + <div class="space-y-1.5"> + <Label>Name</Label> + <Input v-model="editConfigName" /> + <p v-if="editConfigNameError" class="text-xs text-destructive">{{ editConfigNameError }}</p> + </div> + <div class="space-y-1.5"> + <Label>New access token (optional)</Label> + <Input v-model="editConfigToken" type="password" autocomplete="off" /> + </div> + <Button :disabled="savingConfig" @click="handleEditConfig"> + <Loader2 v-if="savingConfig" class="mr-2 size-4 animate-spin" /> + Save + </Button> + </div> + </SheetContent> + </Sheet> + + <Sheet v-model:open="logSheetOpen"> + <SheetContent class="sm:max-w-xl"> + <SheetHeader> + <SheetTitle>Sync logs</SheetTitle> + <SheetDescription>Run {{ selectedLogRunId }}</SheetDescription> + </SheetHeader> + <div class="mt-4 max-h-[70vh] overflow-y-auto font-mono text-xs"> + <Loader2 v-if="logsLoading" class="mx-auto size-6 animate-spin" /> + <p v-else-if="logsError" class="text-destructive">{{ logsError }}</p> + <pre v-else class="whitespace-pre-wrap">{{ runLogs.join('\n') || 'No log lines.' }}</pre> + </div> + </SheetContent> + </Sheet> + + <AlertDialog v-model:open="deleteDsOpen"> + <AlertDialogContent> + <AlertDialogHeader> + <AlertDialogTitle>Delete data source?</AlertDialogTitle> + <AlertDialogDescription> + This permanently deletes "{{ deletingDs?.name }}" and its sync history. + </AlertDialogDescription> + </AlertDialogHeader> + <AlertDialogFooter> + <AlertDialogCancel>Cancel</AlertDialogCancel> + <AlertDialogAction :disabled="deletingDsFlag" @click="handleDeleteDs"> + Delete + </AlertDialogAction> + </AlertDialogFooter> + </AlertDialogContent> + </AlertDialog> + </div> +</template> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue new file mode 100644 index 000000000..98ec4254d --- /dev/null +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue @@ -0,0 +1,703 @@ +<script setup lang="ts"> +import { ref, computed, watch, onMounted, onUnmounted, nextTick } from 'vue' +import { toast } from 'vue-sonner' +import { + ChevronLeft, + GitBranch, + Plus, + Trash2, + Loader2, + Check, + X, + Cable, + ArrowRight, + Settings2, + LayoutDashboard, +} from 'lucide-vue-next' +import { + inferNameFromRepoUrl, + validateStep1, + validateStep2, + buildDataSourceCreationUrl, + buildDataSourceCreationBody, + detectAdapterFromUrl, +} from '@/utils/dataSourceWizard' +import type { DetectedAdapterId } from '@/utils/dataSourceWizard' +import { + buildKgDataSourcesUrl, + buildKgManageUrl, +} from '@/utils/kgDataSourcesNavigation' +import { + isActiveSyncStatus, + isSyncTerminal, + latestSyncRun, + type SyncRunStatus, +} from '@/utils/kgDataSourcesSync' +import { Card, CardHeader, CardTitle, CardDescription, CardContent, CardFooter } from '@/components/ui/card' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' +import { Input } from '@/components/ui/input' +import { Label } from '@/components/ui/label' +import SyncPhaseIndicator from '@/components/graph/SyncPhaseIndicator.vue' + +type FlowPhase = 'urls' | 'configure' | 'sync' | 'stats' + +interface PendingSourceDraft { + id: string + url: string + detectedAdapterId: DetectedAdapterId + name: string + branch: string + nameError: string + urlError: string + branchError: string +} + +interface SourceUrlInputRow { + id: string + url: string +} + +interface CreatedSourceRow { + id: string + name: string + url: string + branch: string + syncStatus: SyncRunStatus | 'idle' | 'queued' + syncError: string | null + token_usage_total: number | null + cost_total_usd: number | null +} + +const route = useRoute() +const kgId = computed(() => route.params.kgId as string) + +const { hasTenant, tenantVersion } = useTenant() +const { apiFetch } = useApiClient() + +const kgName = ref('') +const loadingKg = ref(false) + +const flowPhase = ref<FlowPhase>('urls') +const sourceUrlInputs = ref<SourceUrlInputRow[]>([{ id: 'source-1', url: '' }]) +const sourceUrlError = ref('') +const providerError = ref('') +const pendingSources = ref<PendingSourceDraft[]>([]) +const detectingSourceDetails = ref(false) +const connToken = ref('') +const creating = ref(false) + +const createdSources = ref<CreatedSourceRow[]>([]) +const syncRunActive = ref(false) +const syncCompletedInRun = ref(0) +const syncRunTotal = ref(0) +const syncActiveName = ref<string | null>(null) +const syncStepLabel = ref('') +const readyForStats = ref(false) + +const wizardSectionRef = ref<HTMLElement | null>(null) + +const manageUrl = computed(() => buildKgManageUrl(kgId.value)) +const operationsUrl = computed(() => buildKgDataSourcesUrl(kgId.value)) + +const validUrlRows = computed(() => + sourceUrlInputs.value + .map((row) => row.url.trim()) + .filter((url) => url.length > 0), +) + +const syncProgressPercent = computed(() => { + if (syncRunTotal.value === 0) return 0 + return Math.round((syncCompletedInRun.value / syncRunTotal.value) * 100) +}) + +const completedSyncCount = computed(() => + createdSources.value.filter((s) => s.syncStatus === 'completed').length, +) + +const totalTokenUsage = computed(() => + createdSources.value.reduce((sum, s) => sum + (s.token_usage_total ?? 0), 0), +) + +const totalSyncCost = computed(() => + createdSources.value.reduce((sum, s) => sum + (s.cost_total_usd ?? 0), 0), +) + +function addUrlField() { + sourceUrlInputs.value.push({ + id: `source-${Date.now()}-${sourceUrlInputs.value.length + 1}`, + url: '', + }) +} + +function removeUrlField(id: string) { + if (sourceUrlInputs.value.length === 1) { + sourceUrlInputs.value[0]!.url = '' + return + } + sourceUrlInputs.value = sourceUrlInputs.value.filter((row) => row.id !== id) +} + +async function loadKnowledgeGraph() { + loadingKg.value = true + try { + const result = await apiFetch<{ name: string }>( + `/management/knowledge-graphs/${kgId.value}`, + ) + kgName.value = result.name ?? kgId.value + } catch { + kgName.value = kgId.value + } finally { + loadingKg.value = false + } +} + +async function detectGithubSourceDetails(entry: PendingSourceDraft) { + if (entry.detectedAdapterId !== 'github') return + try { + const parsed = new URL(entry.url) + const [owner, repoRaw] = parsed.pathname.split('/').filter(Boolean) + const repo = repoRaw?.replace(/\.git$/, '') + if (!owner || !repo) return + const response = await fetch(`https://api.github.com/repos/${owner}/${repo}`) + if (!response.ok) return + const payload = await response.json() as { default_branch?: string; name?: string } + if (!entry.branch.trim() && payload.default_branch) { + entry.branch = payload.default_branch + } + if (!entry.name.trim() && payload.name) { + entry.name = payload.name + } + } catch { + // Best effort only. + } +} + +function proceedToConfigure() { + const seen = new Set<string>() + const parsedEntries: Array<{ url: string; detectedAdapterId: DetectedAdapterId }> = [] + for (const row of sourceUrlInputs.value) { + const url = row.url.trim() + if (!url || seen.has(url)) continue + seen.add(url) + parsedEntries.push({ url, detectedAdapterId: detectAdapterFromUrl(url) }) + } + + if (parsedEntries.length === 0) { + sourceUrlError.value = 'Provide at least one source URL.' + return + } + + const drafts: PendingSourceDraft[] = parsedEntries.map((entry, index) => ({ + id: `src-${index}-${entry.url}`, + url: entry.url, + detectedAdapterId: entry.detectedAdapterId, + name: inferNameFromRepoUrl(entry.url) ?? '', + branch: '', + nameError: '', + urlError: '', + branchError: '', + })) + + let hasError = false + const providerIssues: string[] = [] + for (const entry of drafts) { + const validation = validateStep1({ + selectedKnowledgeGraphId: kgId.value, + sourceUrl: entry.url, + detectedAdapterId: entry.detectedAdapterId, + }) + entry.urlError = validation.sourceUrlError + if (validation.providerError) { + providerIssues.push(`${entry.url}: ${validation.providerError}`) + } + if (!validation.valid) hasError = true + } + + pendingSources.value = drafts + sourceUrlError.value = hasError && drafts.some((d) => !!d.urlError) + ? 'One or more URLs are invalid.' + : '' + providerError.value = providerIssues.join(' | ') + if (hasError) return + + detectingSourceDetails.value = true + Promise.all(drafts.map((d) => detectGithubSourceDetails(d))) + .finally(() => { + detectingSourceDetails.value = false + flowPhase.value = 'configure' + }) +} + +async function createDataSources() { + let hasError = false + for (const entry of pendingSources.value) { + const validation = validateStep2({ + connName: entry.name, + connRepoUrl: entry.url, + }) + entry.nameError = validation.connNameError + entry.urlError = validation.connRepoUrlError + entry.branchError = !entry.branch.trim() ? 'Tracked branch is required.' : '' + if (!validation.valid || entry.branchError) hasError = true + } + if (hasError) return + + creating.value = true + const rows: CreatedSourceRow[] = [] + const failed: string[] = [] + + try { + for (const entry of pendingSources.value) { + try { + const created = await apiFetch<{ id: string; name: string }>( + buildDataSourceCreationUrl(kgId.value), + { + method: 'POST', + body: buildDataSourceCreationBody({ + name: entry.name, + adapter_type: 'github', + connection_config: { + repo_url: entry.url, + branch: entry.branch, + }, + credentials: connToken.value ? { access_token: connToken.value } : undefined, + }), + }, + ) + rows.push({ + id: created.id, + name: created.name, + url: entry.url, + branch: entry.branch, + syncStatus: 'idle', + syncError: null, + token_usage_total: null, + cost_total_usd: null, + }) + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : 'Failed to connect source' + failed.push(`${entry.url}: ${msg}`) + } + } + + if (rows.length === 0) { + toast.error('Connection failed', { description: failed[0] ?? 'No sources were created.' }) + return + } + + connToken.value = '' + createdSources.value = rows + flowPhase.value = 'sync' + readyForStats.value = false + + toast.success('Data sources connected', { + description: `${rows.length} source(s) ready for initial sync.`, + }) + + await nextTick() + wizardSectionRef.value?.scrollIntoView({ behavior: 'smooth', block: 'start' }) + + if (failed.length > 0) { + toast.warning('Some sources were not connected', { description: failed.join(' | ') }) + } + } finally { + creating.value = false + } +} + +async function refreshSourceSyncStatus(row: CreatedSourceRow) { + const runs = await apiFetch<Array<{ + status: SyncRunStatus + error: string | null + token_usage_total?: number | null + cost_total_usd?: number | null + }>>(`/management/data-sources/${row.id}/sync-runs`) + const latest = latestSyncRun(runs) + if (latest) { + row.syncStatus = latest.status + row.syncError = latest.error + row.token_usage_total = latest.token_usage_total ?? null + row.cost_total_usd = latest.cost_total_usd ?? null + } +} + +async function pollUntilTerminal(row: CreatedSourceRow, timeoutMs = 600_000) { + const started = Date.now() + while (Date.now() - started < timeoutMs) { + await refreshSourceSyncStatus(row) + if (isSyncTerminal(row.syncStatus as SyncRunStatus)) return + await new Promise((resolve) => setTimeout(resolve, 3000)) + } + row.syncStatus = 'failed' + row.syncError = 'Sync timed out' +} + +async function runSequentialSync() { + const queue = createdSources.value.filter( + (s) => s.syncStatus === 'idle' || s.syncStatus === 'failed' || s.syncStatus === 'queued', + ) + if (queue.length === 0) { + toast.error('No sources need syncing') + return + } + + syncRunActive.value = true + syncRunTotal.value = queue.length + syncCompletedInRun.value = 0 + readyForStats.value = false + + try { + for (let i = 0; i < queue.length; i++) { + const target = queue[i]! + syncStepLabel.value = `${i + 1} / ${queue.length}` + syncActiveName.value = target.name + target.syncStatus = 'pending' + target.syncError = null + + try { + await apiFetch(`/management/data-sources/${target.id}/sync`, { method: 'POST' }) + await pollUntilTerminal(target) + if (target.syncStatus === 'failed') { + toast.error(`Sync failed: ${target.name}`, { + description: target.syncError ?? undefined, + }) + } + } catch (err: unknown) { + target.syncStatus = 'failed' + target.syncError = err instanceof Error ? err.message : 'Sync failed' + toast.error(`Sync failed: ${target.name}`, { description: target.syncError }) + } + + syncCompletedInRun.value = i + 1 + } + + const allCompleted = createdSources.value.every((s) => s.syncStatus === 'completed') + readyForStats.value = allCompleted + + if (allCompleted) { + flowPhase.value = 'stats' + await nextTick() + wizardSectionRef.value?.scrollIntoView({ behavior: 'smooth', block: 'start' }) + toast.success('Initial sync complete', { + description: 'Review results below, then open data sources to continue.', + }) + } else { + toast('Sync finished with issues', { + description: 'Fix failed sources from the data sources page or retry sync.', + }) + } + } finally { + syncRunActive.value = false + syncActiveName.value = null + syncStepLabel.value = '' + } +} + +function getSyncBadge(status: CreatedSourceRow['syncStatus']) { + switch (status) { + case 'completed': + return { variant: 'default' as const, label: 'Completed', icon: Check } + case 'failed': + return { variant: 'destructive' as const, label: 'Failed', icon: X } + case 'pending': + case 'ingesting': + case 'ai_extracting': + case 'applying': + return { variant: 'secondary' as const, label: 'Syncing…', icon: Loader2 } + default: + return { variant: 'outline' as const, label: 'Ready', icon: null } + } +} + +onMounted(async () => { + if (!hasTenant.value) return + await loadKnowledgeGraph() +}) + +watch(tenantVersion, () => { + loadKnowledgeGraph() +}) + +onUnmounted(() => { + syncRunActive.value = false +}) +</script> + +<template> + <div class="mx-auto max-w-4xl space-y-6"> + <NuxtLink + :to="manageUrl" + class="inline-flex items-center text-sm text-muted-foreground hover:text-foreground" + > + <ChevronLeft class="mr-1 size-4" /> + Back to workspace overview + </NuxtLink> + + <div v-if="!hasTenant" class="py-16 text-center text-muted-foreground"> + Select a tenant from the sidebar to connect data sources. + </div> + + <template v-else> + <!-- URLs --> + <Card v-if="flowPhase === 'urls'"> + <CardHeader> + <div class="flex items-center gap-2"> + <GitBranch class="size-5 text-primary" /> + <CardTitle>Add data sources</CardTitle> + </div> + <CardDescription> + Connect Git repositories to + <Badge v-if="kgName" variant="outline" class="mx-1">{{ kgName }}</Badge> + <span v-else-if="loadingKg" class="text-muted-foreground">loading…</span>. + You will confirm branch and credentials next, then run an initial sync. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div class="space-y-3"> + <div + v-for="row in sourceUrlInputs" + :key="row.id" + class="flex items-center gap-2" + > + <Input + v-model="row.url" + type="text" + placeholder="https://github.com/org/repo" + class="flex-1 font-mono text-sm" + /> + <Button + variant="ghost" + size="icon" + :disabled="sourceUrlInputs.length === 1 && !sourceUrlInputs[0]?.url" + @click="removeUrlField(row.id)" + > + <Trash2 class="size-4" /> + </Button> + </div> + </div> + <Button variant="outline" size="sm" type="button" @click="addUrlField"> + <Plus class="mr-2 size-4" /> + Add another URL + </Button> + <p v-if="sourceUrlError" class="text-sm text-destructive">{{ sourceUrlError }}</p> + <p v-if="providerError" class="text-sm text-destructive">{{ providerError }}</p> + <p class="text-xs text-muted-foreground"> + GitHub repositories are supported today. You can add more sources later from the + data sources page. + </p> + </CardContent> + <CardFooter> + <Button + type="button" + :disabled="validUrlRows.length === 0 || detectingSourceDetails" + @click="proceedToConfigure" + > + <Loader2 v-if="detectingSourceDetails" class="mr-2 size-4 animate-spin" /> + Continue + <ArrowRight v-else class="ml-2 size-4" /> + </Button> + </CardFooter> + </Card> + + <!-- Configure before create --> + <Card v-if="flowPhase === 'configure'"> + <CardHeader> + <div class="flex items-center gap-2"> + <Cable class="size-5 text-primary" /> + <CardTitle>Configure each repository</CardTitle> + </div> + <CardDescription> + Review names and tracked branches. Use one access token for all private repos if needed. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div + v-for="entry in pendingSources" + :key="entry.id" + class="rounded-lg border p-4 space-y-3" + > + <p class="truncate font-mono text-xs text-muted-foreground">{{ entry.url }}</p> + <div class="grid gap-3 sm:grid-cols-2"> + <div class="space-y-1.5"> + <Label>Name</Label> + <Input v-model="entry.name" /> + <p v-if="entry.nameError" class="text-xs text-destructive">{{ entry.nameError }}</p> + </div> + <div class="space-y-1.5"> + <Label>Tracked branch</Label> + <Input v-model="entry.branch" placeholder="main" /> + <p v-if="entry.branchError" class="text-xs text-destructive">{{ entry.branchError }}</p> + </div> + </div> + <p v-if="entry.urlError" class="text-xs text-destructive">{{ entry.urlError }}</p> + </div> + <div class="space-y-1.5"> + <Label>GitHub access token (optional)</Label> + <Input v-model="connToken" type="password" placeholder="ghp_…" autocomplete="off" /> + <p class="text-xs text-muted-foreground"> + Required for private repositories. Applied to all sources in this batch. + </p> + </div> + </CardContent> + <CardFooter class="flex justify-between"> + <Button variant="outline" type="button" @click="flowPhase = 'urls'">Back</Button> + <Button type="button" :disabled="creating" @click="createDataSources"> + <Loader2 v-if="creating" class="mr-2 size-4 animate-spin" /> + <Check v-else class="mr-2 size-4" /> + Connect data sources and sync + </Button> + </CardFooter> + </Card> + + <!-- Sync + stats --> + <div + v-if="flowPhase === 'sync' || flowPhase === 'stats'" + ref="wizardSectionRef" + class="space-y-6" + > + <Card> + <CardHeader> + <div class="flex flex-wrap items-center gap-2"> + <Badge variant="default">{{ kgName }}</Badge> + <span class="text-sm text-muted-foreground">sources connected</span> + </div> + <CardTitle class="text-base">Initial sync</CardTitle> + <CardDescription> + Run ingestion and extraction for each source. Sources sync one at a time so you can + follow progress. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div class="flex flex-wrap gap-2"> + <Button + type="button" + size="sm" + :disabled="syncRunActive || createdSources.length === 0" + @click="runSequentialSync" + > + <Loader2 v-if="syncRunActive" class="mr-2 size-4 animate-spin" /> + <GitBranch v-else class="mr-2 size-4" /> + Start initial sync + </Button> + </div> + + <div + v-if="syncRunActive" + class="space-y-2 rounded-lg border border-primary/30 bg-primary/5 p-4" + > + <div class="flex items-center justify-between text-sm"> + <span class="font-medium">Syncing {{ syncActiveName || '…' }}</span> + <span class="tabular-nums text-muted-foreground">{{ syncStepLabel }}</span> + </div> + <div class="h-2 overflow-hidden rounded-full bg-muted"> + <div + class="h-full rounded-full bg-primary transition-[width] duration-500 ease-out" + :style="{ width: `${syncProgressPercent}%` }" + /> + </div> + </div> + + <div class="space-y-4"> + <div + v-for="source in createdSources" + :key="source.id" + class="rounded-lg border p-4 transition-shadow" + :class="[ + source.syncStatus === 'failed' ? 'border-destructive/50 bg-destructive/5' : '', + syncActiveName === source.name ? 'ring-2 ring-primary/40' : '', + ]" + > + <div class="flex flex-wrap items-start justify-between gap-3"> + <div class="min-w-0 flex-1 space-y-1"> + <p class="font-medium">{{ source.name }}</p> + <p class="truncate font-mono text-xs text-muted-foreground">{{ source.url }}</p> + <p v-if="source.syncError" class="text-xs text-destructive">{{ source.syncError }}</p> + </div> + <div class="flex shrink-0 flex-col items-end gap-2"> + <SyncPhaseIndicator + v-if="isActiveSyncStatus(source.syncStatus as SyncRunStatus) || source.syncStatus === 'completed' || source.syncStatus === 'failed'" + :status="(source.syncStatus === 'idle' || source.syncStatus === 'queued') ? 'pending' : (source.syncStatus as SyncRunStatus)" + /> + <Badge v-else :variant="getSyncBadge(source.syncStatus).variant"> + {{ getSyncBadge(source.syncStatus).label }} + </Badge> + </div> + </div> + </div> + </div> + </CardContent> + </Card> + + <Card v-if="flowPhase === 'stats' && readyForStats" class="border-primary/30"> + <CardHeader> + <div class="flex items-center gap-2"> + <Settings2 class="size-5 text-primary" /> + <CardTitle class="text-base">Sync summary</CardTitle> + </div> + <CardDescription> + Initial sync finished for all sources. Open data sources to manage commits, ontology, + and maintenance. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div class="overflow-x-auto rounded-md border"> + <table class="w-full min-w-[320px] text-sm"> + <thead> + <tr class="border-b bg-muted/50 text-left"> + <th class="px-3 py-2 font-medium">Data source</th> + <th class="px-3 py-2 text-right font-medium">Status</th> + <th class="px-3 py-2 text-right font-medium">Tokens</th> + <th class="px-3 py-2 text-right font-medium">Cost (USD)</th> + </tr> + </thead> + <tbody> + <tr + v-for="s in createdSources" + :key="s.id" + class="border-b border-border/60 last:border-0" + > + <td class="px-3 py-2 font-medium">{{ s.name }}</td> + <td class="px-3 py-2 text-right"> + <Badge variant="default" class="text-[10px]">Completed</Badge> + </td> + <td class="px-3 py-2 text-right tabular-nums"> + {{ s.token_usage_total?.toLocaleString() ?? '—' }} + </td> + <td class="px-3 py-2 text-right tabular-nums text-muted-foreground"> + {{ s.cost_total_usd != null ? s.cost_total_usd.toFixed(4) : '—' }} + </td> + </tr> + </tbody> + </table> + </div> + <p class="text-sm text-muted-foreground"> + <span class="font-medium text-foreground">{{ completedSyncCount }}</span> + source{{ completedSyncCount === 1 ? '' : 's' }} synced. + <template v-if="totalTokenUsage > 0"> + Total tokens: {{ totalTokenUsage.toLocaleString() }}. + </template> + <template v-if="totalSyncCost > 0"> + Estimated cost: ${{ totalSyncCost.toFixed(4) }}. + </template> + </p> + <div class="flex flex-col gap-3 sm:flex-row sm:flex-wrap"> + <Button as-child> + <NuxtLink :to="operationsUrl" class="inline-flex items-center gap-2"> + <Check class="size-4" /> + Open data sources + </NuxtLink> + </Button> + <Button as-child variant="outline"> + <NuxtLink :to="manageUrl" class="inline-flex items-center gap-2"> + <LayoutDashboard class="size-4" /> + Back to workspace overview + </NuxtLink> + </Button> + </div> + </CardContent> + </Card> + </div> + </template> + </div> +</template> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 017847d7e..8f175b9c2 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -487,7 +487,9 @@ async function applyInlineMutations() { } function openWorkspaceStep(stepId: WorkspaceStepId) { - navigateTo(resolveStepDestination(kgId.value, stepId)) + navigateTo(resolveStepDestination(kgId.value, stepId, { + dataSourceCount: dataSourceCount.value, + })) } function returnToWorkspaceOverview() { diff --git a/src/dev-ui/app/pages/knowledge-graphs/index.vue b/src/dev-ui/app/pages/knowledge-graphs/index.vue index 28c56c82b..d994f5021 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/index.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/index.vue @@ -155,14 +155,14 @@ async function handleCreate() { }, }, ) - // Pass the new KG id so the data-sources wizard pre-selects it automatically. + // Direct the user to kg-scoped data source onboarding for the new graph. // This satisfies: "AND the user is prompted to add their first data source" // with the wizard scoped to the newly created knowledge graph. toast.success(`Knowledge graph "${createName.value.trim()}" created`, { description: 'Next: connect a data source to start populating your graph.', action: { label: 'Add Data Source', - onClick: () => navigateTo(`/data-sources?kg_id=${result.id}`), + onClick: () => navigateTo(`/knowledge-graphs/${result.id}/data-sources/new`), }, duration: 8000, }) diff --git a/src/dev-ui/app/tests/kg-data-sources-navigation.test.ts b/src/dev-ui/app/tests/kg-data-sources-navigation.test.ts new file mode 100644 index 000000000..53134eea5 --- /dev/null +++ b/src/dev-ui/app/tests/kg-data-sources-navigation.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect } from 'vitest' +import { + buildKgDataSourcesNewUrl, + buildKgDataSourcesUrl, + buildKgManageUrl, + parseKgDataSourcesFocusQuery, + resolveKgDataSourcesEntryUrl, +} from '../utils/kgDataSourcesNavigation' + +describe('kgDataSourcesNavigation', () => { + it('builds new onboarding URL', () => { + expect(buildKgDataSourcesNewUrl('kg-1')).toBe('/knowledge-graphs/kg-1/data-sources/new') + }) + + it('builds operations URL with optional maintain focus', () => { + expect(buildKgDataSourcesUrl('kg-1')).toBe('/knowledge-graphs/kg-1/data-sources') + expect(buildKgDataSourcesUrl('kg-1', { focus: 'maintain' })).toBe( + '/knowledge-graphs/kg-1/data-sources?focus=maintain', + ) + }) + + it('resolves entry URL from data source count', () => { + expect(resolveKgDataSourcesEntryUrl('kg-1', 0)).toBe( + '/knowledge-graphs/kg-1/data-sources/new', + ) + expect(resolveKgDataSourcesEntryUrl('kg-1', 2)).toBe( + '/knowledge-graphs/kg-1/data-sources', + ) + }) + + it('builds manage workspace return URL', () => { + expect(buildKgManageUrl('kg-abc')).toBe('/knowledge-graphs/kg-abc/manage') + }) + + it('parses maintain focus query', () => { + expect(parseKgDataSourcesFocusQuery('maintain')).toBe('maintain') + expect(parseKgDataSourcesFocusQuery('other')).toBeNull() + }) +}) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 5c9c74b24..4a770be9b 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -295,13 +295,21 @@ describe('KG-MANAGE-004 - step card status semantics', () => { describe('KG-MANAGE-005 - graph-scoped data sources step', () => { it('keeps data-sources route utility for workspace cards but not graph-management redirects', () => { expect(manageWorkspaceVue).not.toContain('navigateTo(buildDataSourcesStepUrl(kgId))') - expect(buildDataSourcesStepUrl('kg-abc')).toBe('/data-sources?kg_id=kg-abc&from=manage') + expect(buildDataSourcesStepUrl('kg-abc', 0)).toBe('/knowledge-graphs/kg-abc/data-sources/new') + expect(buildDataSourcesStepUrl('kg-abc', 2)).toBe('/knowledge-graphs/kg-abc/data-sources') }) - it('data-sources page preserves manage return path without auto-opening wizard', () => { - expect(dataSourcesVue).toContain('from=manage') - expect(dataSourcesVue).toContain('scopedKnowledgeGraphId') - expect(dataSourcesVue).toContain('Back to workspace overview') + it('manage workspace passes data source count when opening data-sources step', () => { + expect(manageWorkspaceVue).toContain('dataSourceCount: dataSourceCount.value') + }) + + it('kg-scoped data sources pages preserve manage return path', () => { + const kgDataSourcesIndex = readFileSync( + resolve(__dirname, '../pages/knowledge-graphs/[kgId]/data-sources/index.vue'), + 'utf-8', + ) + expect(kgDataSourcesIndex).toContain('Back to workspace overview') + expect(kgDataSourcesIndex).toContain('buildKgManageUrl') }) }) @@ -309,7 +317,7 @@ describe('KG-MANAGE-015 - graph-scoped maintain step and round trip', () => { it('keeps maintain route utility for workspace cards but not graph-management redirects', () => { expect(manageWorkspaceVue).not.toContain('navigateTo(buildMaintainStepUrl(kgId))') expect(buildMaintainStepUrl('kg-abc')).toBe( - '/data-sources?kg_id=kg-abc&from=manage&focus=maintain', + '/knowledge-graphs/kg-abc/data-sources?focus=maintain', ) }) diff --git a/src/dev-ui/app/tests/knowledge-graphs.test.ts b/src/dev-ui/app/tests/knowledge-graphs.test.ts index 2c6e34ba8..a79d898df 100644 --- a/src/dev-ui/app/tests/knowledge-graphs.test.ts +++ b/src/dev-ui/app/tests/knowledge-graphs.test.ts @@ -1025,8 +1025,7 @@ describe('Knowledge Graph Creation — prompt to add first data source', () => { method: 'POST', body: { name: createName.value.trim() }, }) - // Include kg_id so data-sources wizard pre-selects the new knowledge graph. - actionOnClick = () => navigateTo(`/data-sources?kg_id=${result.id}`) + actionOnClick = () => navigateTo(`/knowledge-graphs/${result.id}/data-sources/new`) } finally { creating.value = false } @@ -1035,7 +1034,7 @@ describe('Knowledge Graph Creation — prompt to add first data source', () => { await handleCreate() expect(actionOnClick).toBeDefined() actionOnClick!() - expect(navigateTo).toHaveBeenCalledWith('/data-sources?kg_id=kg-new') + expect(navigateTo).toHaveBeenCalledWith('/knowledge-graphs/kg-new/data-sources/new') }) it('toast is not fired when KG creation fails (API error)', async () => { @@ -1097,7 +1096,7 @@ describe('Knowledge Graph Creation — KG-ID-scoped navigation (Task-101)', () = body: { name: createName.value.trim() }, }) // Capture the URL used in the action onClick - capturedUrl = `/data-sources?kg_id=${result.id}` + capturedUrl = `/knowledge-graphs/${result.id}/data-sources/new` navigateTo(capturedUrl) } finally { creating.value = false @@ -1105,17 +1104,17 @@ describe('Knowledge Graph Creation — KG-ID-scoped navigation (Task-101)', () = } await handleCreate() - expect(capturedUrl).toBe('/data-sources?kg_id=kg-abc-123') - expect(navigateTo).toHaveBeenCalledWith('/data-sources?kg_id=kg-abc-123') + expect(capturedUrl).toBe('/knowledge-graphs/kg-abc-123/data-sources/new') + expect(navigateTo).toHaveBeenCalledWith('/knowledge-graphs/kg-abc-123/data-sources/new') }) it('uses id from API response — not a hardcoded value', async () => { // Different KG IDs to verify the implementation reads from the response, // not a hardcoded string. const testCases = [ - { apiId: 'kg-aaa-111', expectedUrl: '/data-sources?kg_id=kg-aaa-111' }, - { apiId: 'kg-bbb-222', expectedUrl: '/data-sources?kg_id=kg-bbb-222' }, - { apiId: 'kg-ccc-333', expectedUrl: '/data-sources?kg_id=kg-ccc-333' }, + { apiId: 'kg-aaa-111', expectedUrl: '/knowledge-graphs/kg-aaa-111/data-sources/new' }, + { apiId: 'kg-bbb-222', expectedUrl: '/knowledge-graphs/kg-bbb-222/data-sources/new' }, + { apiId: 'kg-ccc-333', expectedUrl: '/knowledge-graphs/kg-ccc-333/data-sources/new' }, ] for (const { apiId, expectedUrl } of testCases) { @@ -1133,7 +1132,7 @@ describe('Knowledge Graph Creation — KG-ID-scoped navigation (Task-101)', () = method: 'POST', body: { name: createName.value.trim() }, }) - navigateTo(`/data-sources?kg_id=${result.id}`) + navigateTo(`/knowledge-graphs/${result.id}/data-sources/new`) } finally { creating.value = false } diff --git a/src/dev-ui/app/tests/task-121-spec-alignment.test.ts b/src/dev-ui/app/tests/task-121-spec-alignment.test.ts index 63eda2df6..10f545c9a 100644 --- a/src/dev-ui/app/tests/task-121-spec-alignment.test.ts +++ b/src/dev-ui/app/tests/task-121-spec-alignment.test.ts @@ -49,10 +49,8 @@ const KG_INDEX_VUE = readFileSync( describe('Task-121 — Requirement: Knowledge Graph Creation', () => { describe('Post-creation prompt: navigates to data-sources wizard with new KG scoped', () => { - it('knowledge-graphs page emits navigateTo to /data-sources?kg_id=', () => { - // The toast action must direct the user to /data-sources scoped to the new - // knowledge graph so the wizard pre-opens with the correct KG selected. - expect(KG_INDEX_VUE).toContain('/data-sources?kg_id=') + it('knowledge-graphs page navigates to kg-scoped onboarding after create', () => { + expect(KG_INDEX_VUE).toContain('/knowledge-graphs/${result.id}/data-sources/new') }) it('knowledge-graphs page constructs the navigation URL from the API result id', () => { @@ -404,36 +402,20 @@ describe('Task-121 — Requirement: Backend API Alignment — Parent context', ( method: 'POST', body: { name: createName.value }, }) - // 2. Post-creation: navigate to data-sources with new KG ID - postCreationUrl = `/data-sources?kg_id=${result.id}` + postCreationUrl = `/knowledge-graphs/${result.id}/data-sources/new` navigateTo(postCreationUrl) } await handleCreate() - // 3. The URL includes the exact KG ID returned by the API - expect(postCreationUrl).toBe('/data-sources?kg_id=kg-new-789') - expect(navigateTo).toHaveBeenCalledWith('/data-sources?kg_id=kg-new-789') + expect(postCreationUrl).toBe('/knowledge-graphs/kg-new-789/data-sources/new') + expect(navigateTo).toHaveBeenCalledWith('/knowledge-graphs/kg-new-789/data-sources/new') - // 4. The data-sources page would extract this param and call openWizard - const routeQuery = { kg_id: 'kg-new-789' } - const preselectedKgId = routeQuery.kg_id as string | undefined - expect(preselectedKgId).toBe('kg-new-789') - - // 5. openWizard initialises selectedKnowledgeGraphId with the param - const wizardState = { selectedKnowledgeGraphId: '' } - function openWizard(preselectedId?: string) { - wizardState.selectedKnowledgeGraphId = preselectedId ?? '' - } - openWizard(preselectedKgId) - expect(wizardState.selectedKnowledgeGraphId).toBe('kg-new-789') - - // 6. Step-1 can advance immediately (adapter still needs selection, but KG is set) - expect(canAdvanceStep1('github', wizardState.selectedKnowledgeGraphId)).toBe(true) - - // 7. Creation URL uses the pre-selected KG ID - const creationUrl = buildDataSourceCreationUrl(wizardState.selectedKnowledgeGraphId) - expect(creationUrl).toBe('/management/knowledge-graphs/kg-new-789/data-sources') + const kgIdFromRoute = 'kg-new-789' + expect(canAdvanceStep1('github', kgIdFromRoute)).toBe(true) + expect(buildDataSourceCreationUrl(kgIdFromRoute)).toBe( + '/management/knowledge-graphs/kg-new-789/data-sources', + ) }) }) }) diff --git a/src/dev-ui/app/utils/kgDataSourcesNavigation.ts b/src/dev-ui/app/utils/kgDataSourcesNavigation.ts new file mode 100644 index 000000000..c17f7f154 --- /dev/null +++ b/src/dev-ui/app/utils/kgDataSourcesNavigation.ts @@ -0,0 +1,39 @@ +/** + * Knowledge-graph–scoped data source routes (manage workspace entry points). + * + * Mirrors k-extract's split between `/designer/new` (first-time onboarding) and + * `/projects/:name/phase1` (ongoing data source operations). + */ + +export type KgDataSourcesFocus = 'maintain' + +export function buildKgDataSourcesNewUrl(kgId: string): string { + return `/knowledge-graphs/${encodeURIComponent(kgId)}/data-sources/new` +} + +export function buildKgDataSourcesUrl(kgId: string, opts?: { focus?: KgDataSourcesFocus }): string { + const base = `/knowledge-graphs/${encodeURIComponent(kgId)}/data-sources` + if (opts?.focus === 'maintain') { + return `${base}?focus=maintain` + } + return base +} + +export function buildKgManageUrl(kgId: string): string { + return `/knowledge-graphs/${encodeURIComponent(kgId)}/manage` +} + +/** + * Where "Data Sources" from KG manage should land. + * Zero sources → onboarding wizard; otherwise → operations page (phase1 equivalent). + */ +export function resolveKgDataSourcesEntryUrl(kgId: string, dataSourceCount: number): string { + if (dataSourceCount <= 0) { + return buildKgDataSourcesNewUrl(kgId) + } + return buildKgDataSourcesUrl(kgId) +} + +export function parseKgDataSourcesFocusQuery(focus: unknown): KgDataSourcesFocus | null { + return focus === 'maintain' ? 'maintain' : null +} diff --git a/src/dev-ui/app/utils/kgDataSourcesSync.ts b/src/dev-ui/app/utils/kgDataSourcesSync.ts new file mode 100644 index 000000000..ebaf754eb --- /dev/null +++ b/src/dev-ui/app/utils/kgDataSourcesSync.ts @@ -0,0 +1,41 @@ +export type SyncRunStatus = + | 'pending' + | 'ingesting' + | 'ai_extracting' + | 'applying' + | 'completed' + | 'failed' + +export const ACTIVE_SYNC_STATUSES: SyncRunStatus[] = [ + 'pending', + 'ingesting', + 'ai_extracting', + 'applying', +] + +export function isActiveSyncStatus(status: SyncRunStatus | undefined): boolean { + if (!status) return false + return ACTIVE_SYNC_STATUSES.includes(status) +} + +export function isSyncTerminal(status: SyncRunStatus | undefined): boolean { + return status === 'completed' || status === 'failed' +} + +export interface SyncRunSummary { + id: string + status: SyncRunStatus + error: string | null + token_usage_total?: number | null + cost_total_usd?: number | null +} + +export function latestSyncRun<T extends SyncRunSummary>(runs: T[] | undefined): T | undefined { + return runs?.[0] +} + +export function hasAnyActiveSync<T extends { sync_runs?: SyncRunSummary[] }>( + sources: T[], +): boolean { + return sources.some((ds) => isActiveSyncStatus(latestSyncRun(ds.sync_runs)?.status)) +} diff --git a/src/dev-ui/app/utils/kgManageWorkspace.ts b/src/dev-ui/app/utils/kgManageWorkspace.ts index 7bec05d4c..6e182d51a 100644 --- a/src/dev-ui/app/utils/kgManageWorkspace.ts +++ b/src/dev-ui/app/utils/kgManageWorkspace.ts @@ -1,5 +1,14 @@ +import { + buildKgDataSourcesUrl, + resolveKgDataSourcesEntryUrl, +} from '@/utils/kgDataSourcesNavigation' + export type WorkspaceStepId = 'data-sources' | 'graph-management' | 'mutation-logs' | 'maintain' +export interface StepDestinationContext { + dataSourceCount: number +} + export type StepStatusLabel = 'ready' | 'in_progress' | 'needs_attention' | 'blocked' export type StepActionLabel = 'Open' | 'Revisit' | 'Run' @@ -62,12 +71,12 @@ export function isMaintenanceReady(ds: { return ds.last_extraction_baseline_commit !== ds.tracked_branch_head_commit } -export function buildDataSourcesStepUrl(kgId: string): string { - return `/data-sources?kg_id=${encodeURIComponent(kgId)}&from=manage` +export function buildDataSourcesStepUrl(kgId: string, dataSourceCount = 0): string { + return resolveKgDataSourcesEntryUrl(kgId, dataSourceCount) } export function buildMaintainStepUrl(kgId: string): string { - return `/data-sources?kg_id=${encodeURIComponent(kgId)}&from=manage&focus=maintain` + return buildKgDataSourcesUrl(kgId, { focus: 'maintain' }) } export function buildManageStepUrl(kgId: string, step?: WorkspaceStepId): string { @@ -306,10 +315,15 @@ export function buildSuggestedNextStep(input: WorkspaceOverviewInputs): Suggeste } } -export function resolveStepDestination(kgId: string, stepId: WorkspaceStepId): string { +export function resolveStepDestination( + kgId: string, + stepId: WorkspaceStepId, + context?: StepDestinationContext, +): string { + const dataSourceCount = context?.dataSourceCount ?? 0 switch (stepId) { case 'data-sources': - return buildDataSourcesStepUrl(kgId) + return buildDataSourcesStepUrl(kgId, dataSourceCount) case 'maintain': return buildMaintainStepUrl(kgId) case 'graph-management': From cbc709e105a1e25015545b50e0ea68eb03cd8869 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 27 May 2026 16:17:37 -0400 Subject: [PATCH 055/153] feat(sync): add ingest-only pipeline and ingested status for KG onboarding prep --- env/api.env | 2 +- ...2d3e4f5a6b_add_ingested_sync_run_status.py | 38 ++++++++ .../ingestion/infrastructure/event_handler.py | 86 +++++++++++------ src/api/main.py | 1 + .../services/data_source_service.py | 10 +- .../domain/aggregates/data_source.py | 3 + .../domain/entities/data_source_sync_run.py | 17 +++- .../management/domain/events/data_source.py | 3 + .../value_objects/sync_pipeline_mode.py | 7 ++ .../models/data_source_sync_run.py | 3 +- .../infrastructure/sync_lifecycle_handler.py | 23 ++++- .../presentation/data_sources/models.py | 17 +++- .../presentation/data_sources/routes.py | 5 + .../test_ingestion_event_handler.py | 32 +++++++ .../test_sync_lifecycle_handler.py | 30 +++++- .../presentation/test_data_sources_routes.py | 22 +++++ src/api/uv.lock | 2 +- .../components/graph/SyncPhaseIndicator.vue | 12 ++- .../[kgId]/data-sources/new.vue | 94 ++++++++----------- src/dev-ui/app/utils/kgDataSourcesSync.ts | 3 +- 20 files changed, 314 insertions(+), 96 deletions(-) create mode 100644 src/api/infrastructure/migrations/versions/fc2d3e4f5a6b_add_ingested_sync_run_status.py create mode 100644 src/api/management/domain/value_objects/sync_pipeline_mode.py diff --git a/env/api.env b/env/api.env index 781143af9..868ea6bf0 100644 --- a/env/api.env +++ b/env/api.env @@ -10,7 +10,7 @@ SPICEDB_ENDPOINT="spicedb:50051" SPICEDB_PRESHARED_KEY="changeme" KARTOGRAPH_CORS_ORIGINS=["http://localhost:3000"] KARTOGRAPH_IAM_BOOTSTRAP_ADMIN_USERNAMES='["alice"]' -KARTOGRAPH_IAM_SINGLE_TENANT_MODE=false +KARTOGRAPH_IAM_SINGLE_TENANT_MODE=true # Generate with uv run python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())" KARTOGRAPH_MGMT_ENCRYPTION_KEY="vwN4rUcH-KL-UyJsL8hc6apftRUTovwec6L2M5uF5OE=" # Extraction runtime defaults to in-memory adapters. Set backend=container and diff --git a/src/api/infrastructure/migrations/versions/fc2d3e4f5a6b_add_ingested_sync_run_status.py b/src/api/infrastructure/migrations/versions/fc2d3e4f5a6b_add_ingested_sync_run_status.py new file mode 100644 index 000000000..b7ab2358d --- /dev/null +++ b/src/api/infrastructure/migrations/versions/fc2d3e4f5a6b_add_ingested_sync_run_status.py @@ -0,0 +1,38 @@ +"""add ingested sync run status + +Adds ``ingested`` as a terminal sync-run status for ingest-only pipeline runs +that prepare ingestion context without AI extraction. + +Revision ID: fc2d3e4f5a6b +Revises: fb1c2d3e4f5a +Create Date: 2026-05-26 +""" + +from typing import Sequence, Union + +from alembic import op + +revision: str = "fc2d3e4f5a6b" +down_revision: Union[str, Sequence[str], None] = "fb1c2d3e4f5a" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.drop_constraint("ck_sync_runs_status", "data_source_sync_runs") + op.create_check_constraint( + "ck_sync_runs_status", + "data_source_sync_runs", + "status IN ('pending', 'ingesting', 'ai_extracting', 'applying', " + "'ingested', 'completed', 'failed')", + ) + + +def downgrade() -> None: + op.drop_constraint("ck_sync_runs_status", "data_source_sync_runs") + op.create_check_constraint( + "ck_sync_runs_status", + "data_source_sync_runs", + "status IN ('pending', 'ingesting', 'ai_extracting', 'applying', " + "'completed', 'failed')", + ) diff --git a/src/api/ingestion/infrastructure/event_handler.py b/src/api/ingestion/infrastructure/event_handler.py index 6eb08ffb2..788920217 100644 --- a/src/api/ingestion/infrastructure/event_handler.py +++ b/src/api/ingestion/infrastructure/event_handler.py @@ -94,20 +94,38 @@ async def handle( knowledge_graph_id = payload["knowledge_graph_id"] now = datetime.now(UTC) + pipeline_mode = payload.get("pipeline_mode", "full") + ingest_only = pipeline_mode == "ingest_only" + if payload.get("no_changes_detected") is True: - await self._outbox.append( - event_type="MutationsApplied", - payload={ - "sync_run_id": sync_run_id, - "data_source_id": data_source_id, - "knowledge_graph_id": knowledge_graph_id, - "no_changes_detected": True, - "occurred_at": now.isoformat(), - }, - occurred_at=now, - aggregate_type="sync_run", - aggregate_id=sync_run_id, - ) + if ingest_only: + await self._outbox.append( + event_type="IngestionPrepared", + payload={ + "sync_run_id": sync_run_id, + "data_source_id": data_source_id, + "knowledge_graph_id": knowledge_graph_id, + "no_changes_detected": True, + "occurred_at": now.isoformat(), + }, + occurred_at=now, + aggregate_type="sync_run", + aggregate_id=sync_run_id, + ) + else: + await self._outbox.append( + event_type="MutationsApplied", + payload={ + "sync_run_id": sync_run_id, + "data_source_id": data_source_id, + "knowledge_graph_id": knowledge_graph_id, + "no_changes_detected": True, + "occurred_at": now.isoformat(), + }, + occurred_at=now, + aggregate_type="sync_run", + aggregate_id=sync_run_id, + ) return try: @@ -119,7 +137,6 @@ async def handle( connection_config=payload.get("connection_config", {}), credentials_path=payload.get("credentials_path"), tenant_id=payload.get("tenant_id"), - credentials=payload.get("credentials"), credentials=runtime_credentials or payload.get("credentials"), baseline_commit=payload.get("baseline_commit"), ) @@ -144,16 +161,31 @@ async def handle( # Ingestion succeeded — append success event outside the try block so # that an outbox write failure is not misclassified as IngestionFailed. - await self._outbox.append( - event_type="JobPackageProduced", - payload={ - "sync_run_id": sync_run_id, - "data_source_id": data_source_id, - "knowledge_graph_id": knowledge_graph_id, - "job_package_id": str(job_package_id), - "occurred_at": now.isoformat(), - }, - occurred_at=now, - aggregate_type="sync_run", - aggregate_id=sync_run_id, - ) + if ingest_only: + await self._outbox.append( + event_type="IngestionPrepared", + payload={ + "sync_run_id": sync_run_id, + "data_source_id": data_source_id, + "knowledge_graph_id": knowledge_graph_id, + "job_package_id": str(job_package_id), + "occurred_at": now.isoformat(), + }, + occurred_at=now, + aggregate_type="sync_run", + aggregate_id=sync_run_id, + ) + else: + await self._outbox.append( + event_type="JobPackageProduced", + payload={ + "sync_run_id": sync_run_id, + "data_source_id": data_source_id, + "knowledge_graph_id": knowledge_graph_id, + "job_package_id": str(job_package_id), + "occurred_at": now.isoformat(), + }, + occurred_at=now, + aggregate_type="sync_run", + aggregate_id=sync_run_id, + ) diff --git a/src/api/main.py b/src/api/main.py index 257c9f0a6..63fd2a7a6 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -87,6 +87,7 @@ class _SessionedSyncLifecycleHandler: { "SyncStarted", "JobPackageProduced", + "IngestionPrepared", "IngestionFailed", "MutationLogProduced", "ExtractionFailed", diff --git a/src/api/management/application/services/data_source_service.py b/src/api/management/application/services/data_source_service.py index b128052fc..fea3551cc 100644 --- a/src/api/management/application/services/data_source_service.py +++ b/src/api/management/application/services/data_source_service.py @@ -604,12 +604,16 @@ async def trigger_sync( self, user_id: str, ds_id: str, + *, + pipeline_mode: str = "full", ) -> DataSourceSyncRun: """Trigger a sync for a data source. Args: user_id: The user triggering the sync ds_id: The data source ID + pipeline_mode: ``full`` (default) or ``ingest_only`` to prepare ingestion + context without running AI extraction or graph application Returns: The created DataSourceSyncRun entity @@ -658,7 +662,11 @@ async def trigger_sync( # Record SyncStarted event on the data source aggregate. # This event carries the sync_run_id so lifecycle handlers # can update the correct sync run record. - ds.request_sync(sync_run_id=sync_run.id, requested_by=user_id) + ds.request_sync( + sync_run_id=sync_run.id, + requested_by=user_id, + pipeline_mode=pipeline_mode, + ) await self._ds_repo.save(ds) await self._session.commit() diff --git a/src/api/management/domain/aggregates/data_source.py b/src/api/management/domain/aggregates/data_source.py index af30f166e..b0dac1b28 100644 --- a/src/api/management/domain/aggregates/data_source.py +++ b/src/api/management/domain/aggregates/data_source.py @@ -311,6 +311,7 @@ def request_sync( sync_run_id: str, *, requested_by: str | None = None, + pipeline_mode: str = "full", ) -> None: """Request a sync for this data source. @@ -321,6 +322,7 @@ def request_sync( Args: sync_run_id: The ID of the sync run record created for this sync requested_by: The user who requested the sync (optional) + pipeline_mode: ``full`` or ``ingest_only`` — see SyncStarted.pipeline_mode Raises: AggregateDeletedError: If the data source has been marked for deletion @@ -338,6 +340,7 @@ def request_sync( credentials_path=self.credentials_path, occurred_at=datetime.now(UTC), requested_by=requested_by, + pipeline_mode=pipeline_mode, ) ) diff --git a/src/api/management/domain/entities/data_source_sync_run.py b/src/api/management/domain/entities/data_source_sync_run.py index 9bf466518..3c802a6dc 100644 --- a/src/api/management/domain/entities/data_source_sync_run.py +++ b/src/api/management/domain/entities/data_source_sync_run.py @@ -7,9 +7,17 @@ from typing import Any # Valid sync run status values representing the lifecycle state machine. -TERMINAL_STATUSES = frozenset({"completed", "failed"}) +TERMINAL_STATUSES = frozenset({"ingested", "completed", "failed"}) VALID_STATUSES = frozenset( - {"pending", "ingesting", "ai_extracting", "applying", "completed", "failed"} + { + "pending", + "ingesting", + "ai_extracting", + "applying", + "ingested", + "completed", + "failed", + } ) @@ -83,13 +91,14 @@ class DataSourceSyncRun: → ingesting (SyncStarted event processed, ingestion pipeline running) → ai_extracting (JobPackageProduced, AI entity extraction triggered) → applying (MutationLogProduced, graph mutations being applied) + → ingested (IngestionPrepared, context ready — no extraction) → completed (MutationsApplied, sync finished successfully) → failed (IngestionFailed / ExtractionFailed / MutationApplicationFailed) - Terminal states: completed, failed — no further transitions allowed. + Terminal states: ingested, completed, failed — no further transitions allowed. Valid status values: "pending", "ingesting", "ai_extracting", - "applying", "completed", "failed" + "applying", "ingested", "completed", "failed" """ id: str diff --git a/src/api/management/domain/events/data_source.py b/src/api/management/domain/events/data_source.py index 3ebdeafba..6a3ae0922 100644 --- a/src/api/management/domain/events/data_source.py +++ b/src/api/management/domain/events/data_source.py @@ -87,6 +87,8 @@ class SyncStarted: credentials_path: Optional path to credentials in vault occurred_at: When the sync was initiated requested_by: The user who requested the sync (if known) + pipeline_mode: ``full`` runs ingestion through graph apply; ``ingest_only`` + stops after ingestion context is prepared (no AI extraction). """ sync_run_id: str @@ -98,3 +100,4 @@ class SyncStarted: occurred_at: datetime credentials_path: str | None = None requested_by: str | None = None + pipeline_mode: str = "full" diff --git a/src/api/management/domain/value_objects/sync_pipeline_mode.py b/src/api/management/domain/value_objects/sync_pipeline_mode.py new file mode 100644 index 000000000..b8e1f9999 --- /dev/null +++ b/src/api/management/domain/value_objects/sync_pipeline_mode.py @@ -0,0 +1,7 @@ +"""Sync pipeline mode — controls how far a sync run progresses.""" + +from typing import Literal + +SyncPipelineMode = Literal["full", "ingest_only"] + +DEFAULT_SYNC_PIPELINE_MODE: SyncPipelineMode = "full" diff --git a/src/api/management/infrastructure/models/data_source_sync_run.py b/src/api/management/infrastructure/models/data_source_sync_run.py index d1401fe96..2af41a7bd 100644 --- a/src/api/management/infrastructure/models/data_source_sync_run.py +++ b/src/api/management/infrastructure/models/data_source_sync_run.py @@ -39,6 +39,7 @@ class DataSourceSyncRunModel(Base): - ingesting: Data extraction pipeline is running - ai_extracting: AI entity extraction is in progress - applying: Graph mutations are being applied + - ingested: Ingestion context prepared without extraction (terminal) - completed: Sync finished successfully (terminal) - failed: Sync failed at any stage (terminal) """ @@ -79,7 +80,7 @@ class DataSourceSyncRunModel(Base): Index("idx_sync_runs_data_source_status", "data_source_id", "status"), CheckConstraint( "status IN ('pending', 'ingesting', 'ai_extracting', 'applying', " - "'completed', 'failed')", + "'ingested', 'completed', 'failed')", name="ck_sync_runs_status", ), ) diff --git a/src/api/management/infrastructure/sync_lifecycle_handler.py b/src/api/management/infrastructure/sync_lifecycle_handler.py index c33ee1d65..bcfc22141 100644 --- a/src/api/management/infrastructure/sync_lifecycle_handler.py +++ b/src/api/management/infrastructure/sync_lifecycle_handler.py @@ -10,10 +10,11 @@ IngestionFailed → failed (with error) MutationLogProduced → applying ExtractionFailed → failed (with error) + IngestionPrepared → ingested (ingestion context ready; no extraction) MutationsApplied → completed (DataSource.last_sync_at updated) MutationApplicationFailed → failed (with error) -Terminal states (completed, failed) cannot be transitioned further. +Terminal states (ingested, completed, failed) cannot be transitioned further. """ from __future__ import annotations @@ -43,6 +44,7 @@ "JobPackageProduced": "ai_extracting", "MutationLogProduced": "applying", "MutationsApplied": "completed", + "IngestionPrepared": "ingested", } _SUPPORTED_EVENTS = frozenset(_STATUS_MAP.keys()) | _FAILURE_EVENTS @@ -119,6 +121,25 @@ async def handle( sync_run.completed_at = now sync_run.logs.append(f"[{now.isoformat()}] {event_type}: {error_msg}") + elif event_type == "IngestionPrepared": + sync_run.status = "ingested" + sync_run.completed_at = now + job_package_id = payload.get("job_package_id") + if job_package_id: + sync_run.logs.append( + f"[{now.isoformat()}] Ingestion context prepared " + f"(job_package_id={job_package_id})" + ) + elif payload.get("no_changes_detected") is True: + sync_run.logs.append( + f"[{now.isoformat()}] No source changes detected; " + "ingestion context preparation skipped." + ) + else: + sync_run.logs.append( + f"[{now.isoformat()}] Ingestion context prepared for later extraction." + ) + elif event_type == "MutationsApplied": sync_run.status = "completed" sync_run.completed_at = now diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index 903d494aa..57e826746 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -309,6 +309,18 @@ class DataSourceDiffSummaryResponse(BaseModel): ) +class TriggerSyncRequest(BaseModel): + """Request body for triggering a data source sync.""" + + mode: Literal["full", "ingest_only"] = Field( + default="full", + description=( + "Pipeline mode: full runs ingestion through graph apply; " + "ingest_only prepares ingestion context without extraction" + ), + ) + + class SyncRunResponse(BaseModel): """Response model for a data source sync run.""" @@ -316,7 +328,10 @@ class SyncRunResponse(BaseModel): data_source_id: str = Field(..., description="Data Source ID this run belongs to") status: str = Field( ..., - description="Sync run status (pending, ingesting, ai_extracting, applying, completed, failed)", + description=( + "Sync run status (pending, ingesting, ai_extracting, applying, " + "ingested, completed, failed)" + ), ) started_at: datetime = Field(..., description="When the sync run started") completed_at: datetime | None = Field( diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index c73f1b16f..4610ec3ca 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -32,6 +32,7 @@ MutationLogEntryPreviewPageResponse, SyncRunLogsResponse, SyncRunResponse, + TriggerSyncRequest, UpdateDataSourceRequest, ) from shared_kernel.datasource_types import DataSourceAdapterType @@ -370,6 +371,7 @@ async def trigger_sync( ds_id: str, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[DataSourceService, Depends(get_data_source_service)], + body: TriggerSyncRequest | None = None, ) -> SyncRunResponse: """Trigger a synchronization for a data source. @@ -380,6 +382,7 @@ async def trigger_sync( ds_id: Data Source ID to trigger sync for current_user: Current authenticated user with tenant context service: Data source service for orchestration + body: Optional pipeline mode (default full sync) Returns: SyncRunResponse with the created sync run details @@ -389,10 +392,12 @@ async def trigger_sync( HTTPException: 404 if DS not found HTTPException: 500 for unexpected errors """ + request = body or TriggerSyncRequest() try: sync_run = await service.trigger_sync( user_id=current_user.user_id.value, ds_id=ds_id, + pipeline_mode=request.mode, ) return SyncRunResponse.from_domain(sync_run) diff --git a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py index 0b1e6069b..ec77a0b35 100644 --- a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py +++ b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py @@ -238,6 +238,37 @@ async def test_short_circuits_when_no_changes_detected( assert event["payload"]["sync_run_id"] == "run-004" assert event["payload"]["no_changes_detected"] is True + async def test_emits_ingestion_prepared_when_ingest_only( + self, + handler: IngestionEventHandler, + outbox: _FakeOutboxRepository, + ): + """ingest_only mode should stop after ingestion without JobPackageProduced.""" + payload = _sync_started_payload(sync_run_id="run-ingest") + payload["pipeline_mode"] = "ingest_only" + await handler.handle("SyncStarted", payload) + + assert len(outbox.appended) == 1 + event = outbox.appended[0] + assert event["event_type"] == "IngestionPrepared" + assert event["payload"]["job_package_id"] is not None + + async def test_no_changes_ingest_only_emits_ingestion_prepared( + self, + handler: IngestionEventHandler, + ingestion_service: _FakeIngestionService, + outbox: _FakeOutboxRepository, + ): + """ingest_only with no_changes_detected should not emit MutationsApplied.""" + payload = _sync_started_payload(sync_run_id="run-nc-ingest") + payload["pipeline_mode"] = "ingest_only" + payload["no_changes_detected"] = True + + await handler.handle("SyncStarted", payload) + + assert ingestion_service.calls == [] + assert outbox.appended[0]["event_type"] == "IngestionPrepared" + @pytest.mark.asyncio class TestIngestionEventHandlerFailure: @@ -278,6 +309,7 @@ async def run( # type: ignore[override] adapter_type: str, connection_config: dict[str, str], credentials_path: str | None, + tenant_id: str | None = None, credentials: dict[str, str] | None = None, baseline_commit: str | None = None, ) -> JobPackageId: diff --git a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py index 035afd82e..bd9b9a9a5 100644 --- a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py +++ b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py @@ -83,10 +83,11 @@ class TestSyncLifecycleHandlerSupportedEvents: """Tests for supported_event_types().""" def test_supports_all_lifecycle_events(self, handler: SyncLifecycleHandler): - """Handler should support all 7 lifecycle events.""" + """Handler should support all lifecycle events.""" expected = { "SyncStarted", "JobPackageProduced", + "IngestionPrepared", "IngestionFailed", "MutationLogProduced", "ExtractionFailed", @@ -116,6 +117,33 @@ async def test_sync_started_sets_ingesting( assert saved_run.status == "ingesting" +@pytest.mark.asyncio +class TestIngestionPreparedTransition: + """IngestionPrepared → status = ingested (terminal, no last_sync_at).""" + + async def test_ingestion_prepared_sets_ingested( + self, + handler: SyncLifecycleHandler, + mock_sync_run_repo: AsyncMock, + mock_ds_repo: AsyncMock, + ): + run = _make_sync_run(status="ingesting") + mock_sync_run_repo.get_by_id.return_value = run + + await handler.handle( + "IngestionPrepared", + _payload( + sync_run_id=run.id, + job_package_id="pkg-001", + ), + ) + + saved_run: DataSourceSyncRun = mock_sync_run_repo.save.call_args[0][0] + assert saved_run.status == "ingested" + assert saved_run.completed_at is not None + mock_ds_repo.get_by_id.assert_not_called() + + @pytest.mark.asyncio class TestJobPackageProducedTransition: """JobPackageProduced → status = ai_extracting.""" diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index 5ad48dae2..ec92b4784 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -386,6 +386,28 @@ def test_trigger_sync_calls_service_with_correct_params( mock_ds_service.trigger_sync.assert_called_once_with( user_id=mock_current_user.user_id.value, ds_id=sample_data_source.id.value, + pipeline_mode="full", + ) + + def test_trigger_sync_passes_ingest_only_mode( + self, + test_client: TestClient, + mock_ds_service: AsyncMock, + sample_data_source: DataSource, + sample_sync_run: DataSourceSyncRun, + mock_current_user: CurrentUser, + ) -> None: + mock_ds_service.trigger_sync.return_value = sample_sync_run + + test_client.post( + f"/management/data-sources/{sample_data_source.id.value}/sync", + json={"mode": "ingest_only"}, + ) + + mock_ds_service.trigger_sync.assert_called_once_with( + user_id=mock_current_user.user_id.value, + ds_id=sample_data_source.id.value, + pipeline_mode="ingest_only", ) def test_trigger_sync_returns_403_when_unauthorized( diff --git a/src/api/uv.lock b/src/api/uv.lock index 6dc4cb007..9e964cae4 100644 --- a/src/api/uv.lock +++ b/src/api/uv.lock @@ -1289,7 +1289,7 @@ wheels = [ [[package]] name = "kartograph-api" -version = "3.36.1" +version = "3.37.1" source = { virtual = "." } dependencies = [ { name = "alembic" }, diff --git a/src/dev-ui/app/components/graph/SyncPhaseIndicator.vue b/src/dev-ui/app/components/graph/SyncPhaseIndicator.vue index 2b2cd02c2..0246930f0 100644 --- a/src/dev-ui/app/components/graph/SyncPhaseIndicator.vue +++ b/src/dev-ui/app/components/graph/SyncPhaseIndicator.vue @@ -3,7 +3,14 @@ import { computed } from 'vue' import { Loader2, Download, Sparkles, Database, Clock } from 'lucide-vue-next' import { Badge } from '@/components/ui/badge' -type SyncStatus = 'pending' | 'ingesting' | 'ai_extracting' | 'applying' | 'completed' | 'failed' +type SyncStatus = + | 'pending' + | 'ingesting' + | 'ai_extracting' + | 'applying' + | 'ingested' + | 'completed' + | 'failed' const props = defineProps<{ status: SyncStatus; label?: string }>() @@ -13,6 +20,7 @@ const phaseLabel = computed(() => { ingesting: 'Ingesting', ai_extracting: 'Extracting', applying: 'Applying', + ingested: 'Prepared', completed: 'Completed', failed: 'Failed', } @@ -24,7 +32,7 @@ const isActive = computed(() => ) const badgeVariant = computed<'default' | 'secondary' | 'destructive'>(() => { - if (props.status === 'completed') return 'default' + if (props.status === 'completed' || props.status === 'ingested') return 'default' if (props.status === 'failed') return 'destructive' return 'secondary' }) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue index 98ec4254d..8a5344106 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue @@ -111,16 +111,8 @@ const syncProgressPercent = computed(() => { return Math.round((syncCompletedInRun.value / syncRunTotal.value) * 100) }) -const completedSyncCount = computed(() => - createdSources.value.filter((s) => s.syncStatus === 'completed').length, -) - -const totalTokenUsage = computed(() => - createdSources.value.reduce((sum, s) => sum + (s.token_usage_total ?? 0), 0), -) - -const totalSyncCost = computed(() => - createdSources.value.reduce((sum, s) => sum + (s.cost_total_usd ?? 0), 0), +const preparedSourceCount = computed(() => + createdSources.value.filter((s) => s.syncStatus === 'ingested').length, ) function addUrlField() { @@ -333,12 +325,12 @@ async function pollUntilTerminal(row: CreatedSourceRow, timeoutMs = 600_000) { row.syncError = 'Sync timed out' } -async function runSequentialSync() { +async function runSequentialIngestionPrep() { const queue = createdSources.value.filter( (s) => s.syncStatus === 'idle' || s.syncStatus === 'failed' || s.syncStatus === 'queued', ) if (queue.length === 0) { - toast.error('No sources need syncing') + toast.error('No sources need preparation') return } @@ -356,35 +348,38 @@ async function runSequentialSync() { target.syncError = null try { - await apiFetch(`/management/data-sources/${target.id}/sync`, { method: 'POST' }) + await apiFetch(`/management/data-sources/${target.id}/sync`, { + method: 'POST', + body: { mode: 'ingest_only' }, + }) await pollUntilTerminal(target) if (target.syncStatus === 'failed') { - toast.error(`Sync failed: ${target.name}`, { + toast.error(`Preparation failed: ${target.name}`, { description: target.syncError ?? undefined, }) } } catch (err: unknown) { target.syncStatus = 'failed' - target.syncError = err instanceof Error ? err.message : 'Sync failed' - toast.error(`Sync failed: ${target.name}`, { description: target.syncError }) + target.syncError = err instanceof Error ? err.message : 'Preparation failed' + toast.error(`Preparation failed: ${target.name}`, { description: target.syncError }) } syncCompletedInRun.value = i + 1 } - const allCompleted = createdSources.value.every((s) => s.syncStatus === 'completed') - readyForStats.value = allCompleted + const allPrepared = createdSources.value.every((s) => s.syncStatus === 'ingested') + readyForStats.value = allPrepared - if (allCompleted) { + if (allPrepared) { flowPhase.value = 'stats' await nextTick() wizardSectionRef.value?.scrollIntoView({ behavior: 'smooth', block: 'start' }) - toast.success('Initial sync complete', { - description: 'Review results below, then open data sources to continue.', + toast.success('Ingestion context prepared', { + description: 'Sources are ready for design and extraction when you open those steps.', }) } else { - toast('Sync finished with issues', { - description: 'Fix failed sources from the data sources page or retry sync.', + toast('Preparation finished with issues', { + description: 'Fix failed sources from the data sources page or retry.', }) } } finally { @@ -396,6 +391,8 @@ async function runSequentialSync() { function getSyncBadge(status: CreatedSourceRow['syncStatus']) { switch (status) { + case 'ingested': + return { variant: 'default' as const, label: 'Prepared', icon: Check } case 'completed': return { variant: 'default' as const, label: 'Completed', icon: Check } case 'failed': @@ -404,7 +401,7 @@ function getSyncBadge(status: CreatedSourceRow['syncStatus']) { case 'ingesting': case 'ai_extracting': case 'applying': - return { variant: 'secondary' as const, label: 'Syncing…', icon: Loader2 } + return { variant: 'secondary' as const, label: 'Preparing…', icon: Loader2 } default: return { variant: 'outline' as const, label: 'Ready', icon: null } } @@ -495,7 +492,7 @@ onUnmounted(() => { > <Loader2 v-if="detectingSourceDetails" class="mr-2 size-4 animate-spin" /> Continue - <ArrowRight v-else class="ml-2 size-4" /> + <ArrowRight v-if="!detectingSourceDetails" class="ml-2 size-4" /> </Button> </CardFooter> </Card> @@ -544,8 +541,8 @@ onUnmounted(() => { <Button variant="outline" type="button" @click="flowPhase = 'urls'">Back</Button> <Button type="button" :disabled="creating" @click="createDataSources"> <Loader2 v-if="creating" class="mr-2 size-4 animate-spin" /> - <Check v-else class="mr-2 size-4" /> - Connect data sources and sync + <Check v-if="!creating" class="mr-2 size-4" /> + Connect data sources </Button> </CardFooter> </Card> @@ -562,10 +559,11 @@ onUnmounted(() => { <Badge variant="default">{{ kgName }}</Badge> <span class="text-sm text-muted-foreground">sources connected</span> </div> - <CardTitle class="text-base">Initial sync</CardTitle> + <CardTitle class="text-base">Prepare ingestion context</CardTitle> <CardDescription> - Run ingestion and extraction for each source. Sources sync one at a time so you can - follow progress. + Fetch repository content and build job packages for each source. No AI extraction + runs here — that happens later in graph management. Sources are prepared one at a + time so you can follow progress. </CardDescription> </CardHeader> <CardContent class="space-y-4"> @@ -574,11 +572,11 @@ onUnmounted(() => { type="button" size="sm" :disabled="syncRunActive || createdSources.length === 0" - @click="runSequentialSync" + @click="runSequentialIngestionPrep" > <Loader2 v-if="syncRunActive" class="mr-2 size-4 animate-spin" /> - <GitBranch v-else class="mr-2 size-4" /> - Start initial sync + <GitBranch v-if="!syncRunActive" class="mr-2 size-4" /> + Prepare ingestion context </Button> </div> @@ -587,7 +585,7 @@ onUnmounted(() => { class="space-y-2 rounded-lg border border-primary/30 bg-primary/5 p-4" > <div class="flex items-center justify-between text-sm"> - <span class="font-medium">Syncing {{ syncActiveName || '…' }}</span> + <span class="font-medium">Preparing {{ syncActiveName || '…' }}</span> <span class="tabular-nums text-muted-foreground">{{ syncStepLabel }}</span> </div> <div class="h-2 overflow-hidden rounded-full bg-muted"> @@ -616,7 +614,7 @@ onUnmounted(() => { </div> <div class="flex shrink-0 flex-col items-end gap-2"> <SyncPhaseIndicator - v-if="isActiveSyncStatus(source.syncStatus as SyncRunStatus) || source.syncStatus === 'completed' || source.syncStatus === 'failed'" + v-if="isActiveSyncStatus(source.syncStatus as SyncRunStatus) || source.syncStatus === 'ingested' || source.syncStatus === 'completed' || source.syncStatus === 'failed'" :status="(source.syncStatus === 'idle' || source.syncStatus === 'queued') ? 'pending' : (source.syncStatus as SyncRunStatus)" /> <Badge v-else :variant="getSyncBadge(source.syncStatus).variant"> @@ -633,11 +631,11 @@ onUnmounted(() => { <CardHeader> <div class="flex items-center gap-2"> <Settings2 class="size-5 text-primary" /> - <CardTitle class="text-base">Sync summary</CardTitle> + <CardTitle class="text-base">Preparation summary</CardTitle> </div> <CardDescription> - Initial sync finished for all sources. Open data sources to manage commits, ontology, - and maintenance. + Ingestion context is ready for all sources. Open data sources to manage commits, or + continue in graph management when you are ready to extract. </CardDescription> </CardHeader> <CardContent class="space-y-4"> @@ -647,8 +645,6 @@ onUnmounted(() => { <tr class="border-b bg-muted/50 text-left"> <th class="px-3 py-2 font-medium">Data source</th> <th class="px-3 py-2 text-right font-medium">Status</th> - <th class="px-3 py-2 text-right font-medium">Tokens</th> - <th class="px-3 py-2 text-right font-medium">Cost (USD)</th> </tr> </thead> <tbody> @@ -659,27 +655,15 @@ onUnmounted(() => { > <td class="px-3 py-2 font-medium">{{ s.name }}</td> <td class="px-3 py-2 text-right"> - <Badge variant="default" class="text-[10px]">Completed</Badge> - </td> - <td class="px-3 py-2 text-right tabular-nums"> - {{ s.token_usage_total?.toLocaleString() ?? '—' }} - </td> - <td class="px-3 py-2 text-right tabular-nums text-muted-foreground"> - {{ s.cost_total_usd != null ? s.cost_total_usd.toFixed(4) : '—' }} + <Badge variant="default" class="text-[10px]">Prepared</Badge> </td> </tr> </tbody> </table> </div> <p class="text-sm text-muted-foreground"> - <span class="font-medium text-foreground">{{ completedSyncCount }}</span> - source{{ completedSyncCount === 1 ? '' : 's' }} synced. - <template v-if="totalTokenUsage > 0"> - Total tokens: {{ totalTokenUsage.toLocaleString() }}. - </template> - <template v-if="totalSyncCost > 0"> - Estimated cost: ${{ totalSyncCost.toFixed(4) }}. - </template> + <span class="font-medium text-foreground">{{ preparedSourceCount }}</span> + source{{ preparedSourceCount === 1 ? '' : 's' }} ready for later extraction. </p> <div class="flex flex-col gap-3 sm:flex-row sm:flex-wrap"> <Button as-child> diff --git a/src/dev-ui/app/utils/kgDataSourcesSync.ts b/src/dev-ui/app/utils/kgDataSourcesSync.ts index ebaf754eb..b56757788 100644 --- a/src/dev-ui/app/utils/kgDataSourcesSync.ts +++ b/src/dev-ui/app/utils/kgDataSourcesSync.ts @@ -3,6 +3,7 @@ export type SyncRunStatus = | 'ingesting' | 'ai_extracting' | 'applying' + | 'ingested' | 'completed' | 'failed' @@ -19,7 +20,7 @@ export function isActiveSyncStatus(status: SyncRunStatus | undefined): boolean { } export function isSyncTerminal(status: SyncRunStatus | undefined): boolean { - return status === 'completed' || status === 'failed' + return status === 'ingested' || status === 'completed' || status === 'failed' } export interface SyncRunSummary { From 781fa38b25b8312b13bacb0e2781344ba0262350 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 28 May 2026 17:49:50 -0400 Subject: [PATCH 056/153] fix(ingestion): unblock SyncStarted handler shadowed import crash Remove the local get_management_settings import that caused UnboundLocalError and left sync runs stuck in ingesting after lifecycle updated. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/api/main.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/src/api/main.py b/src/api/main.py index 63fd2a7a6..b45a75cb9 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -196,9 +196,10 @@ async def _resolve_github_tracked_head_commit( return str(sha) if sha else None async def handle(self, event_type: str, payload: dict[str, Any]) -> None: - from infrastructure.outbox.repository import OutboxRepository + from ingestion.infrastructure.adapters.github import GitHubAdapter from ingestion.application.services.ingestion_service import IngestionService from ingestion.infrastructure.event_handler import IngestionEventHandler + from infrastructure.outbox.repository import OutboxRepository from management.domain.value_objects import DataSourceId from management.infrastructure.repositories.data_source_repository import ( DataSourceRepository, @@ -210,19 +211,6 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: async with self._session_factory() as session: outbox = OutboxRepository(session=session) ds_repo = DataSourceRepository(session=session, outbox=outbox) - management_settings = get_management_settings() - encryption_keys = management_settings.encryption_key.get_secret_value().split( - "," - ) - credential_reader = FernetSecretStore( - session=session, - encryption_keys=encryption_keys, - ) - from ingestion.infrastructure.adapters.github import GitHubAdapter - from infrastructure.settings import get_management_settings - from management.infrastructure.repositories.fernet_secret_store import ( - FernetSecretStore, - ) credential_reader = None if payload.get("credentials_path"): @@ -257,6 +245,7 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: data_source_id = str(payload.get("data_source_id", "")) tenant_id = str(payload.get("tenant_id", "")) if payload.get("tenant_id") else "" adapter_type = str(payload.get("adapter_type", "")) + credentials: dict[str, str] = {} if data_source_id and adapter_type == "github": ds = await ds_repo.get_by_id(DataSourceId(value=data_source_id)) if ds is not None: @@ -265,8 +254,7 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: ds.last_extraction_baseline_commit ) - credentials: dict[str, str] = {} - if ds.credentials_path and tenant_id: + if ds.credentials_path and tenant_id and credential_reader is not None: try: credentials = await credential_reader.retrieve( path=ds.credentials_path, From 1c212079d884114b9c6ad17d0347a438221fde4a Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 28 May 2026 18:22:09 -0400 Subject: [PATCH 057/153] feat(ui): align KG data sources page with k-extract phase1 layout Refresh commits updates tracked head only; advance extraction baseline on successful sync. Parallelize wizard ingest prep and add phase1-style overview table. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../services/data_source_service.py | 13 +- .../domain/aggregates/data_source.py | 16 + .../infrastructure/sync_lifecycle_handler.py | 1 + .../presentation/data_sources/models.py | 10 + .../presentation/data_sources/routes.py | 1 - .../application/test_data_source_service.py | 12 +- .../test_sync_lifecycle_handler.py | 3 + .../presentation/test_data_sources_routes.py | 12 +- src/dev-ui/app/pages/data-sources/index.vue | 7 +- .../[kgId]/data-sources/index.vue | 662 +++++++++++++----- .../[kgId]/data-sources/new.vue | 74 +- src/dev-ui/app/tests/data-sources.test.ts | 2 +- .../app/tests/kg-data-sources-phase1.test.ts | 72 ++ .../knowledge-graph-manage-workspace.test.ts | 2 + src/dev-ui/app/utils/kgDataSourcesCommits.ts | 78 +++ 15 files changed, 714 insertions(+), 251 deletions(-) create mode 100644 src/dev-ui/app/tests/kg-data-sources-phase1.test.ts create mode 100644 src/dev-ui/app/utils/kgDataSourcesCommits.ts diff --git a/src/api/management/application/services/data_source_service.py b/src/api/management/application/services/data_source_service.py index fea3551cc..87d8efecf 100644 --- a/src/api/management/application/services/data_source_service.py +++ b/src/api/management/application/services/data_source_service.py @@ -470,13 +470,12 @@ async def refresh_commit_references( user_id: str, ds_id: str, tracked_branch_head_commit: str, - clone_head_commit: str | None = None, ) -> DataSource: - """Persist refreshed source commit references for a data source. + """Persist the latest tracked branch head for a Git-backed data source. - Requires MANAGE permission on the data source. This action updates - tracked and clone commit references and initializes extraction baseline - on first refresh so per-source diff counts can be computed immediately. + Requires MANAGE permission. Updates only ``tracked_branch_head_commit``; + extraction baseline is advanced on successful sync completion or via + ``adopt_tracked_head_as_baseline``. """ has_manage = await self._check_permission( user_id=user_id, @@ -498,11 +497,7 @@ async def refresh_commit_references( if ds is None or ds.tenant_id != self._scope_to_tenant: raise ValueError(f"Data source {ds_id} not found") - resolved_clone_head = clone_head_commit or tracked_branch_head_commit ds.tracked_branch_head_commit = tracked_branch_head_commit - ds.clone_head_commit = resolved_clone_head - if ds.last_extraction_baseline_commit is None: - ds.last_extraction_baseline_commit = tracked_branch_head_commit await self._ds_repo.save(ds) await self._session.commit() diff --git a/src/api/management/domain/aggregates/data_source.py b/src/api/management/domain/aggregates/data_source.py index b0dac1b28..e61ee09a5 100644 --- a/src/api/management/domain/aggregates/data_source.py +++ b/src/api/management/domain/aggregates/data_source.py @@ -362,6 +362,22 @@ def record_sync_completed(self) -> None: tenant_id=self.tenant_id, ) + def advance_extraction_baseline_to_tracked_head(self) -> None: + """Move extraction baseline to the current tracked branch head. + + Called after graph mutations are applied so maintenance diffs reflect + the commit that was last extracted into the knowledge graph. + + Raises: + AggregateDeletedError: If the data source has been marked for deletion + """ + if self._deleted: + raise AggregateDeletedError( + "Cannot update extraction baseline on a deleted data source" + ) + if self.tracked_branch_head_commit: + self.last_extraction_baseline_commit = self.tracked_branch_head_commit + def mark_for_deletion( self, *, diff --git a/src/api/management/infrastructure/sync_lifecycle_handler.py b/src/api/management/infrastructure/sync_lifecycle_handler.py index bcfc22141..023f42112 100644 --- a/src/api/management/infrastructure/sync_lifecycle_handler.py +++ b/src/api/management/infrastructure/sync_lifecycle_handler.py @@ -227,4 +227,5 @@ async def _update_data_source_last_sync_at( return ds.record_sync_completed() + ds.advance_extraction_baseline_to_tracked_head() await self._ds_repo.save(ds) diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index 57e826746..1d3a380b8 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -199,6 +199,10 @@ class DataSourceResponse(BaseModel): tracked_branch_head_commit: str | None = Field( None, description="Latest known commit at the tracked source branch head" ) + connection_config: dict[str, str] = Field( + default_factory=dict, + description="Adapter connection configuration (non-secret)", + ) created_at: datetime = Field(..., description="When the DS was created") updated_at: datetime = Field(..., description="When the DS was last updated") ontology: OntologyModel | None = Field( @@ -227,6 +231,7 @@ def from_domain(cls, ds: DataSource) -> DataSourceResponse: clone_head_commit=ds.clone_head_commit, last_extraction_baseline_commit=ds.last_extraction_baseline_commit, tracked_branch_head_commit=ds.tracked_branch_head_commit, + connection_config=dict(ds.connection_config), created_at=ds.created_at, updated_at=ds.updated_at, ontology=( @@ -471,6 +476,10 @@ class DataSourceWithSyncResponse(BaseModel): tracked_branch_head_commit: str | None = Field( None, description="Latest known commit at the tracked source branch head" ) + connection_config: dict[str, str] = Field( + default_factory=dict, + description="Adapter connection configuration (non-secret)", + ) created_at: datetime = Field(..., description="When the DS was created") updated_at: datetime = Field(..., description="When the DS was last updated") ontology: OntologyModel | None = Field( @@ -506,6 +515,7 @@ def from_domain_pair( clone_head_commit=ds.clone_head_commit, last_extraction_baseline_commit=ds.last_extraction_baseline_commit, tracked_branch_head_commit=ds.tracked_branch_head_commit, + connection_config=dict(ds.connection_config), created_at=ds.created_at, updated_at=ds.updated_at, ontology=( diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index 4610ec3ca..ea67a40af 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -99,7 +99,6 @@ async def refresh_commit_references( user_id=current_user.user_id.value, ds_id=ds_id, tracked_branch_head_commit=tracked_head, - clone_head_commit=tracked_head, ) return DataSourceResponse.from_domain(updated) except UnauthorizedError: diff --git a/src/api/tests/unit/management/application/test_data_source_service.py b/src/api/tests/unit/management/application/test_data_source_service.py index 973528875..a55da66be 100644 --- a/src/api/tests/unit/management/application/test_data_source_service.py +++ b/src/api/tests/unit/management/application/test_data_source_service.py @@ -1158,7 +1158,6 @@ async def test_refresh_commit_references_requires_manage_permission( user_id=user_id, ds_id=ds.id.value, tracked_branch_head_commit="abc123", - clone_head_commit="abc123", ) authz.assert_check_called_once( @@ -1168,12 +1167,13 @@ async def test_refresh_commit_references_requires_manage_permission( ) @pytest.mark.asyncio - async def test_refresh_commit_references_initializes_baseline_when_empty( + async def test_refresh_commit_references_updates_tracked_head_only( self, service, authz, ds_repo, user_id ) -> None: - """First commit-refresh should initialize extraction baseline.""" + """Refresh should update tracked head without touching baseline or clone.""" ds = _make_ds() ds.last_extraction_baseline_commit = None + ds.clone_head_commit = "legacy-clone" ds_repo.seed(ds) authz.grant_all() @@ -1181,12 +1181,11 @@ async def test_refresh_commit_references_initializes_baseline_when_empty( user_id=user_id, ds_id=ds.id.value, tracked_branch_head_commit="abc123", - clone_head_commit="abc123", ) assert updated.tracked_branch_head_commit == "abc123" - assert updated.clone_head_commit == "abc123" - assert updated.last_extraction_baseline_commit == "abc123" + assert updated.clone_head_commit == "legacy-clone" + assert updated.last_extraction_baseline_commit is None @pytest.mark.asyncio async def test_refresh_commit_references_preserves_existing_baseline( @@ -1202,7 +1201,6 @@ async def test_refresh_commit_references_preserves_existing_baseline( user_id=user_id, ds_id=ds.id.value, tracked_branch_head_commit="tracked999", - clone_head_commit="tracked999", ) assert updated.last_extraction_baseline_commit == "baseline000" diff --git a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py index bd9b9a9a5..624beadc4 100644 --- a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py +++ b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py @@ -402,6 +402,8 @@ async def test_mutations_applied_updates_data_source_last_sync_at( credentials_path=None, schedule=Schedule(schedule_type=ScheduleType.MANUAL), last_sync_at=None, + tracked_branch_head_commit="processed-head", + last_extraction_baseline_commit="old-baseline", created_at=now, updated_at=now, ) @@ -419,6 +421,7 @@ async def test_mutations_applied_updates_data_source_last_sync_at( mock_ds_repo.save.assert_called_once() saved_ds = mock_ds_repo.save.call_args[0][0] assert saved_ds.last_sync_at is not None + assert saved_ds.last_extraction_baseline_commit == "processed-head" async def test_mutations_applied_logs_no_changes_short_circuit( self, diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index ec92b4784..52a92ec82 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -1104,13 +1104,13 @@ def test_refresh_commit_references_returns_updated_data_source( test_client: TestClient, mock_ds_service: AsyncMock, mock_commit_reference_service: AsyncMock, + mock_current_user: CurrentUser, sample_data_source: DataSource, ) -> None: """Refresh endpoint should return updated commit references.""" refreshed = sample_data_source - refreshed.clone_head_commit = "aaa" refreshed.tracked_branch_head_commit = "aaa" - refreshed.last_extraction_baseline_commit = "aaa" + refreshed.last_extraction_baseline_commit = None mock_ds_service.get.return_value = sample_data_source mock_commit_reference_service.resolve_tracked_head_commit.return_value = "aaa" mock_ds_service.refresh_commit_references.return_value = refreshed @@ -1121,9 +1121,13 @@ def test_refresh_commit_references_returns_updated_data_source( assert response.status_code == status.HTTP_200_OK payload = response.json() - assert payload["clone_head_commit"] == "aaa" assert payload["tracked_branch_head_commit"] == "aaa" - assert payload["last_extraction_baseline_commit"] == "aaa" + assert payload["last_extraction_baseline_commit"] is None + mock_ds_service.refresh_commit_references.assert_awaited_once_with( + user_id=mock_current_user.user_id.value, + ds_id=sample_data_source.id.value, + tracked_branch_head_commit="aaa", + ) def test_refresh_commit_references_returns_404_when_inaccessible( self, diff --git a/src/dev-ui/app/pages/data-sources/index.vue b/src/dev-ui/app/pages/data-sources/index.vue index 537e702ea..6aae7709c 100644 --- a/src/dev-ui/app/pages/data-sources/index.vue +++ b/src/dev-ui/app/pages/data-sources/index.vue @@ -101,7 +101,6 @@ interface DataSourceItem { knowledge_graph_id: string last_sync_at: string | null created_at: string - clone_head_commit?: string | null last_extraction_baseline_commit?: string | null tracked_branch_head_commit?: string | null sync_runs?: SyncRun[] @@ -1253,11 +1252,7 @@ async function handleDeleteDs() { <!-- Commit status and diff summary cues --> <div class="border-t px-4 py-3"> <p class="mb-2 text-[11px] font-semibold uppercase tracking-wider text-muted-foreground">Commit Status</p> - <div class="grid gap-2 sm:grid-cols-3"> - <div class="rounded-md border bg-muted/20 p-2"> - <p class="text-[10px] uppercase tracking-wider text-muted-foreground">Local clone commit</p> - <p class="mt-1 font-mono text-xs break-all">{{ ds.clone_head_commit ?? '—' }}</p> - </div> + <div class="grid gap-2 sm:grid-cols-2"> <div class="rounded-md border bg-muted/20 p-2"> <p class="text-[10px] uppercase tracking-wider text-muted-foreground">Commit during last extraction</p> <p class="mt-1 font-mono text-xs break-all">{{ ds.last_extraction_baseline_commit ?? '—' }}</p> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue index 614924aac..4f610dd1f 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue @@ -4,13 +4,17 @@ import { toast } from 'vue-sonner' import { Cable, ChevronLeft, + GitBranch, Plus, - Loader2, Trash2, + Loader2, + Check, + ArrowRight, Settings, RefreshCw, ScrollText, Building2, + LayoutDashboard, } from 'lucide-vue-next' import { buildKgDataSourcesNewUrl, @@ -18,7 +22,27 @@ import { parseKgDataSourcesFocusQuery, } from '@/utils/kgDataSourcesNavigation' import { isMaintenanceReady } from '@/utils/kgManageWorkspace' -import { hasAnyActiveSync, type SyncRunStatus } from '@/utils/kgDataSourcesSync' +import { + hasAnyActiveSync, + isSyncTerminal, + latestSyncRun, + type SyncRunStatus, +} from '@/utils/kgDataSourcesSync' +import { + commitStatusClass, + commitStatusLabel, + prepStatusBadgeVariant, + resolvePrepStatusLabel, + resolveRepoUrl, + resolveTrackedBranch, + shortCommitHash, +} from '@/utils/kgDataSourcesCommits' +import { + buildDataSourceCreationBody, + buildDataSourceCreationUrl, + detectAdapterFromUrl, + inferNameFromRepoUrl, +} from '@/utils/dataSourceWizard' import SyncPhaseIndicator from '@/components/graph/SyncPhaseIndicator.vue' import { Button } from '@/components/ui/button' import { Input } from '@/components/ui/input' @@ -26,7 +50,6 @@ import { Label } from '@/components/ui/label' import { Badge } from '@/components/ui/badge' import { Separator } from '@/components/ui/separator' import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' -import { CopyableText } from '@/components/ui/copyable-text' import { Sheet, SheetContent, @@ -73,7 +96,7 @@ interface DataSourceItem { name: string adapter_type: string knowledge_graph_id: string - clone_head_commit?: string | null + connection_config?: Record<string, string> last_extraction_baseline_commit?: string | null tracked_branch_head_commit?: string | null sync_runs?: SyncRun[] @@ -92,16 +115,43 @@ const dataSources = ref<DataSourceItem[]>([]) const loading = ref(false) const expandedDiffLists = ref<Record<string, boolean>>({}) const refreshingCommitRefs = ref<Record<string, boolean>>({}) +const refreshingAllCommits = ref(false) const adoptingBaselines = ref<Record<string, boolean>>({}) +const newUrls = ref<string[]>(['']) +const addToken = ref('') +const addingUrls = ref(false) + const manageUrl = computed(() => buildKgManageUrl(kgId.value)) const newSourceUrl = computed(() => buildKgDataSourcesNewUrl(kgId.value)) +const graphManagementUrl = computed( + () => `${buildKgManageUrl(kgId.value)}?step=graph-management`, +) const visibleDataSources = computed(() => { if (!maintainFocus.value) return dataSources.value return dataSources.value.filter((ds) => isMaintenanceReady(ds)) }) +const validNewUrls = computed(() => + newUrls.value + .map((url) => url.trim()) + .filter((url) => url.startsWith('http://') || url.startsWith('https://') || url.startsWith('git@')), +) + +const preparedCount = computed(() => + dataSources.value.filter((ds) => { + const status = latestSyncRun(ds.sync_runs)?.status + return status === 'ingested' || status === 'completed' + }).length, +) + +const allSourcesPrepared = computed( + () => + dataSources.value.length > 0 + && preparedCount.value === dataSources.value.length, +) + const pollInterval = ref<ReturnType<typeof setInterval> | null>(null) function stopPolling() { @@ -121,6 +171,95 @@ function startPolling() { }, 3000) } +function addUrlField() { + newUrls.value.push('') +} + +function removeUrlField(index: number) { + newUrls.value.splice(index, 1) + if (newUrls.value.length === 0) { + newUrls.value.push('') + } +} + +function updateUrl(index: number, value: string) { + newUrls.value[index] = value +} + +async function detectDefaultBranch(url: string): Promise<string> { + try { + const parsed = new URL(url) + const [owner, repoRaw] = parsed.pathname.split('/').filter(Boolean) + const repo = repoRaw?.replace(/\.git$/, '') + if (!owner || !repo) return 'main' + const response = await fetch(`https://api.github.com/repos/${owner}/${repo}`) + if (!response.ok) return 'main' + const payload = (await response.json()) as { default_branch?: string } + return payload.default_branch ?? 'main' + } catch { + return 'main' + } +} + +async function addRepositories() { + if (validNewUrls.value.length === 0) { + toast.error('Please enter at least one valid URL') + return + } + + addingUrls.value = true + const seen = new Set<string>() + let added = 0 + + try { + for (const url of validNewUrls.value) { + if (seen.has(url)) continue + seen.add(url) + + const adapterId = detectAdapterFromUrl(url) + if (adapterId !== 'github') { + toast.error('Unsupported repository URL', { description: url }) + continue + } + + const branch = await detectDefaultBranch(url) + const name = inferNameFromRepoUrl(url) || 'repository' + + try { + await apiFetch(buildDataSourceCreationUrl(kgId.value), { + method: 'POST', + body: buildDataSourceCreationBody({ + name, + adapter_type: 'github', + connection_config: { + repo_url: url, + branch, + }, + credentials: addToken.value.trim() + ? { access_token: addToken.value.trim() } + : undefined, + }), + }) + added += 1 + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : 'Failed to add source' + toast.error(`Failed to add ${url}`, { description: msg }) + } + } + + if (added > 0) { + newUrls.value = [''] + addToken.value = '' + toast.success(`Added ${added} source${added === 1 ? '' : 's'}`, { + description: 'Refresh commits or prepare ingestion context when ready.', + }) + await loadDataSources() + } + } finally { + addingUrls.value = false + } +} + async function loadKnowledgeGraph() { try { const result = await apiFetch<{ name: string }>( @@ -174,6 +313,10 @@ async function ensureEntryRoute() { } } +function latestStatus(ds: DataSourceItem): SyncRunStatus | undefined { + return latestSyncRun(ds.sync_runs)?.status +} + function isDiffExpanded(dsId: string): boolean { return expandedDiffLists.value[dsId] === true } @@ -182,10 +325,13 @@ function toggleDiffExpanded(dsId: string) { expandedDiffLists.value[dsId] = !isDiffExpanded(dsId) } -async function triggerSync(dsId: string) { +async function triggerSync(dsId: string, mode: 'full' | 'ingest_only' = 'full') { try { - await apiFetch(`/management/data-sources/${dsId}/sync`, { method: 'POST' }) - toast.success('Sync triggered') + await apiFetch(`/management/data-sources/${dsId}/sync`, { + method: 'POST', + body: mode === 'ingest_only' ? { mode: 'ingest_only' } : undefined, + }) + toast.success(mode === 'ingest_only' ? 'Preparation started' : 'Sync triggered') await loadDataSources() if (hasAnyActiveSync(dataSources.value)) startPolling() } catch { @@ -206,6 +352,24 @@ async function refreshCommitRefs(dsId: string) { } } +async function refreshAllCommitRefs() { + if (visibleDataSources.value.length === 0) return + refreshingAllCommits.value = true + try { + await Promise.allSettled( + visibleDataSources.value.map((ds) => + apiFetch(`/management/data-sources/${ds.id}/commit-refs/refresh`, { method: 'POST' }), + ), + ) + toast.success('Commit references refreshed') + await loadDataSources() + } catch { + toast.error('Failed to refresh commit references') + } finally { + refreshingAllCommits.value = false + } +} + async function adoptTrackedHeadBaseline(dsId: string) { adoptingBaselines.value[dsId] = true try { @@ -339,32 +503,32 @@ watch(tenantVersion, async () => { </script> <template> - <div class="mx-auto max-w-5xl space-y-6"> - <div class="flex flex-wrap items-center justify-between gap-3"> - <NuxtLink - :to="manageUrl" - class="inline-flex items-center text-sm text-muted-foreground hover:text-foreground" - > - <ChevronLeft class="mr-1 size-4" /> - Back to workspace overview - </NuxtLink> - <Button :disabled="!hasTenant" @click="navigateTo(newSourceUrl)"> - <Plus class="mr-2 size-4" /> - Add data source - </Button> - </div> + <div class="mx-auto max-w-7xl space-y-6"> + <NuxtLink + :to="manageUrl" + class="inline-flex items-center text-sm text-muted-foreground hover:text-foreground" + > + <ChevronLeft class="mr-1 size-4" /> + Back to workspace overview + </NuxtLink> <div class="flex items-center gap-3"> <div class="rounded-lg bg-primary/10 p-2"> <Cable class="size-5 text-primary" /> </div> - <div> + <div class="min-w-0 flex-1"> <h1 class="text-2xl font-semibold tracking-tight">Data Sources</h1> <p class="text-sm text-muted-foreground"> <template v-if="kgName">{{ kgName }} — </template> Manage connected repositories, sync runs, and commit tracking. </p> </div> + <div v-if="allSourcesPrepared" class="ml-auto shrink-0"> + <Badge variant="success"> + <Check class="mr-1 size-3" /> + Ready + </Badge> + </div> </div> <Separator /> @@ -374,192 +538,308 @@ watch(tenantVersion, async () => { <p class="font-medium">No tenant selected</p> </div> - <div v-else-if="loading" class="flex justify-center py-16"> + <div v-else-if="loading" class="flex justify-center py-12"> <Loader2 class="size-8 animate-spin text-muted-foreground" /> </div> <template v-else> - <Card v-if="maintainFocus"> - <CardHeader class="pb-2"> - <CardTitle class="text-sm">Maintenance focus</CardTitle> - <CardDescription class="text-xs"> - Showing sources with new commits since the last extraction baseline. + <Card> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base"> + <Plus class="size-4" /> + Add repositories + </CardTitle> + <CardDescription> + Paste Git URLs (HTTPS or <span class="font-mono text-xs">git@</span>). Private repos need a token below. </CardDescription> </CardHeader> + <CardContent class="space-y-3"> + <div v-for="(url, index) in newUrls" :key="index" class="flex items-center gap-2"> + <input + :value="url" + type="text" + class="flex h-9 flex-1 rounded-md border border-input bg-transparent px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring" + placeholder="https://github.com/org/repo" + @input="updateUrl(index, ($event.target as HTMLInputElement).value)" + @keyup.enter="addRepositories" + /> + <Button + variant="ghost" + size="icon" + :disabled="newUrls.length === 1 && !newUrls[0]" + @click="removeUrlField(index)" + > + <Trash2 class="size-4" /> + </Button> + </div> + <div class="space-y-1.5"> + <Label class="text-xs text-muted-foreground">GitHub access token (optional, for new private repos)</Label> + <Input v-model="addToken" type="password" placeholder="ghp_…" autocomplete="off" /> + </div> + <div class="flex flex-wrap items-center gap-2"> + <Button variant="outline" size="sm" @click="addUrlField"> + <Plus class="mr-2 size-4" /> + Add another + </Button> + <Button size="sm" :disabled="addingUrls || validNewUrls.length === 0" @click="addRepositories"> + <Loader2 v-if="addingUrls" class="mr-2 size-4 animate-spin" /> + Add to project + </Button> + </div> + </CardContent> </Card> - <div - v-if="visibleDataSources.length === 0" - class="flex flex-col items-center gap-4 py-16 text-center" - > - <p class="text-sm text-muted-foreground"> - <template v-if="maintainFocus"> - No sources need maintenance right now. - </template> - <template v-else> - No data sources connected. - </template> - </p> - <Button v-if="!maintainFocus" @click="navigateTo(newSourceUrl)"> - <Plus class="mr-2 size-4" /> - Add your first data source - </Button> - </div> - - <div v-else id="maintain-section" class="space-y-3"> - <div - v-for="ds in visibleDataSources" - :key="ds.id" - class="rounded-lg border bg-card" - :class="isMaintenanceReady(ds) ? 'border-amber-300/60' : ''" - > - <div class="flex flex-wrap items-center justify-between gap-3 p-4"> - <div class="flex items-center gap-3"> - <div class="rounded-md bg-muted p-2"> - <Cable class="size-4 text-muted-foreground" /> - </div> + <div v-if="dataSources.length > 0" id="maintain-section" class="space-y-4"> + <Card v-if="maintainFocus" class="border-amber-300/50"> + <CardHeader class="pb-2"> + <CardTitle class="text-sm">Maintenance focus</CardTitle> + <CardDescription class="text-xs"> + Showing sources with new commits since the last extraction baseline. + </CardDescription> + </CardHeader> + </Card> + + <Card class="border-border/80 bg-muted/15"> + <CardHeader class="pb-2"> + <div class="flex flex-col gap-2 sm:flex-row sm:items-start sm:justify-between"> <div> - <p class="text-sm font-medium">{{ ds.name }}</p> - <p class="text-xs text-muted-foreground">{{ ds.adapter_type }}</p> - <CopyableText :text="ds.id" label="Data source ID copied" class="mt-0.5" /> - </div> - </div> - <div class="flex flex-wrap items-center gap-2"> - <SyncPhaseIndicator - v-if="ds.sync_runs?.[0]" - :status="ds.sync_runs[0].status" - /> - <Badge v-else variant="secondary" class="text-[10px]">Idle</Badge> - <Button size="sm" variant="outline" @click="openEditConfig(ds)"> - <Settings class="mr-1.5 size-3.5" /> - Edit Config - </Button> - <Button - size="sm" - variant="outline" - class="text-destructive hover:bg-destructive/10" - @click="openDeleteDs(ds)" - > - <Trash2 class="mr-1.5 size-3.5" /> - Delete - </Button> - <Button size="sm" variant="outline" @click="triggerSync(ds.id)"> - Sync Now - </Button> - </div> - </div> - - <div class="border-t px-4 py-3"> - <p class="mb-2 text-[11px] font-semibold uppercase tracking-wider text-muted-foreground"> - Commit Status - </p> - <div class="grid gap-2 sm:grid-cols-3"> - <div class="rounded-md border bg-muted/20 p-2"> - <p class="text-[10px] uppercase tracking-wider text-muted-foreground">Local clone commit</p> - <p class="mt-1 break-all font-mono text-xs">{{ ds.clone_head_commit ?? '—' }}</p> + <CardTitle class="flex items-center gap-2 text-base"> + <GitBranch class="size-4 text-primary" /> + Data sources overview + </CardTitle> </div> - <div class="rounded-md border bg-muted/20 p-2"> - <p class="text-[10px] uppercase tracking-wider text-muted-foreground">Last extraction baseline</p> - <p class="mt-1 break-all font-mono text-xs">{{ ds.last_extraction_baseline_commit ?? '—' }}</p> - </div> - <div class="rounded-md border bg-muted/20 p-2"> - <p class="text-[10px] uppercase tracking-wider text-muted-foreground">Tracked branch head</p> - <p class="mt-1 break-all font-mono text-xs">{{ ds.tracked_branch_head_commit ?? '—' }}</p> + <div class="flex flex-wrap gap-2"> + <Button + variant="outline" + size="sm" + :disabled="refreshingAllCommits || visibleDataSources.length === 0" + @click="refreshAllCommitRefs" + > + <Loader2 v-if="refreshingAllCommits" class="mr-2 size-4 animate-spin" /> + <RefreshCw v-else class="mr-2 size-4" /> + Refresh commits + </Button> </div> </div> - <div class="mt-2 flex flex-wrap gap-2"> - <Button - size="sm" - variant="outline" - class="h-7 text-[10px]" - :disabled="refreshingCommitRefs[ds.id] === true" - @click="refreshCommitRefs(ds.id)" - > - <RefreshCw - class="mr-1 size-3" - :class="refreshingCommitRefs[ds.id] ? 'animate-spin' : ''" - /> - Refresh commits - </Button> - <Button - size="sm" - variant="outline" - class="h-7 text-[10px]" - :disabled="adoptingBaselines[ds.id] === true || !isMaintenanceReady(ds)" - @click="adoptTrackedHeadBaseline(ds.id)" - > - Adopt tracked head as baseline - </Button> - </div> - + <CardDescription> + Each row is one connected repository. Compare extraction baseline to tracked branch head + to see when maintenance syncs are needed. + </CardDescription> + </CardHeader> + <CardContent class="space-y-3"> <div - v-if="ds.diff_summary" - class="mt-3 rounded-md border p-2" - :class="isMaintenanceReady(ds) ? 'border-amber-300 bg-amber-50/50 dark:border-amber-800 dark:bg-amber-950/20' : 'bg-muted/10'" + v-if="visibleDataSources.length === 0" + class="rounded-md border bg-muted/20 px-4 py-8 text-center text-sm text-muted-foreground" > - <div class="flex items-center justify-between gap-2 text-xs"> - <span> - <span class="font-medium">{{ ds.diff_summary.total_changed_files }}</span> - changed files - </span> - <Badge - :variant="isMaintenanceReady(ds) ? 'default' : 'secondary'" - class="text-[10px]" - > - {{ isMaintenanceReady(ds) ? 'New commits available' : 'Up to date' }} - </Badge> - </div> - <Button - v-if="ds.diff_summary.changed_files.length > 0" - size="sm" - variant="ghost" - class="mt-2 h-6 px-2 text-[10px]" - @click="toggleDiffExpanded(ds.id)" - > - {{ isDiffExpanded(ds.id) ? 'Hide changed files' : 'Show changed files' }} - </Button> - <div - v-if="isDiffExpanded(ds.id)" - class="mt-2 max-h-48 space-y-1 overflow-y-auto rounded-md border bg-background/80 p-2" - > - <div - v-for="file in ds.diff_summary.changed_files" - :key="`${file.status}:${file.path}`" - class="flex justify-between gap-2 text-[11px]" - > - <span class="break-all font-mono">{{ file.path }}</span> - <Badge variant="outline" class="h-5 text-[10px] uppercase">{{ file.status }}</Badge> - </div> - </div> + <template v-if="maintainFocus">No sources need maintenance right now.</template> + <template v-else>No data sources to display.</template> </div> - </div> - <div v-if="ds.sync_runs?.length" class="border-t px-4 py-3"> - <p class="mb-2 text-[11px] font-semibold uppercase tracking-wider text-muted-foreground"> - Sync History - </p> - <div class="space-y-1"> - <div - v-for="run in ds.sync_runs" - :key="run.id" - class="flex items-center gap-2 text-xs text-muted-foreground" - > - <SyncPhaseIndicator :status="run.status" /> - <span>{{ new Date(run.started_at).toLocaleString() }}</span> - <span v-if="run.error" class="text-destructive">{{ run.error }}</span> - <Button - size="sm" - variant="ghost" - class="ml-auto h-6 px-2 text-[10px]" - @click="viewLogs(ds, run)" - > - <ScrollText class="mr-1 size-3" /> - View Logs - </Button> - </div> + <div v-else class="overflow-x-auto rounded-md border"> + <table class="w-full min-w-[880px] text-sm"> + <thead> + <tr class="border-b bg-muted/50 text-left"> + <th class="px-3 py-2 font-medium">Source</th> + <th class="px-3 py-2 font-medium">Branch</th> + <th class="px-3 py-2 font-medium">Status</th> + <th class="px-3 py-2 font-medium">Last extraction baseline</th> + <th class="px-3 py-2 font-medium">Tracked branch head</th> + <th class="px-3 py-2 font-medium">Actions</th> + </tr> + </thead> + <tbody> + <tr + v-for="ds in visibleDataSources" + :key="ds.id" + class="border-b border-border/60 align-top last:border-0" + :class="isMaintenanceReady(ds) ? 'bg-amber-50/40 dark:bg-amber-950/10' : ''" + > + <td class="px-3 py-2"> + <p class="font-medium leading-tight">{{ ds.name }}</p> + <p + class="mt-0.5 max-w-[20rem] truncate font-mono text-xs text-muted-foreground" + :title="resolveRepoUrl(ds.connection_config)" + > + {{ resolveRepoUrl(ds.connection_config) }} + </p> + </td> + <td class="px-3 py-2 font-mono text-xs"> + {{ resolveTrackedBranch(ds.connection_config) }} + </td> + <td class="px-3 py-2"> + <Badge :variant="prepStatusBadgeVariant(latestStatus(ds))" class="text-xs"> + {{ resolvePrepStatusLabel(latestStatus(ds)) }} + </Badge> + <div v-if="latestStatus(ds) && !isSyncTerminal(latestStatus(ds))" class="mt-1"> + <SyncPhaseIndicator :status="latestStatus(ds)!" /> + </div> + </td> + <td class="px-3 py-2 font-mono text-xs"> + <div :class="commitStatusClass(ds.last_extraction_baseline_commit, ds.tracked_branch_head_commit)"> + <span :title="ds.last_extraction_baseline_commit || ''"> + {{ shortCommitHash(ds.last_extraction_baseline_commit) }} + </span> + </div> + <div + class="mt-0.5 text-[10px]" + :class="commitStatusClass(ds.last_extraction_baseline_commit, ds.tracked_branch_head_commit)" + > + {{ commitStatusLabel(ds.last_extraction_baseline_commit, ds.tracked_branch_head_commit) }} + </div> + </td> + <td class="px-3 py-2 font-mono text-xs"> + <span :title="ds.tracked_branch_head_commit || ''"> + {{ shortCommitHash(ds.tracked_branch_head_commit) }} + </span> + </td> + <td class="px-3 py-2"> + <div class="flex flex-wrap gap-1"> + <Button size="sm" variant="ghost" class="h-7 px-2 text-[10px]" @click="openEditConfig(ds)"> + <Settings class="mr-1 size-3" /> + Edit + </Button> + <Button + size="sm" + variant="ghost" + class="h-7 px-2 text-[10px]" + :disabled="refreshingCommitRefs[ds.id] === true" + @click="refreshCommitRefs(ds.id)" + > + <RefreshCw class="mr-1 size-3" :class="refreshingCommitRefs[ds.id] ? 'animate-spin' : ''" /> + Refresh + </Button> + <Button + size="sm" + variant="ghost" + class="h-7 px-2 text-[10px]" + :disabled="!isMaintenanceReady(ds) || adoptingBaselines[ds.id] === true" + @click="adoptTrackedHeadBaseline(ds.id)" + > + Adopt baseline + </Button> + <Button + size="sm" + variant="ghost" + class="h-7 px-2 text-[10px]" + @click="triggerSync(ds.id, 'ingest_only')" + > + Prepare + </Button> + <Button + size="sm" + variant="ghost" + class="h-7 px-2 text-[10px] text-destructive" + @click="openDeleteDs(ds)" + > + <Trash2 class="mr-1 size-3" /> + Delete + </Button> + </div> + + <div + v-if="ds.diff_summary && ds.diff_summary.total_changed_files > 0" + class="mt-2 rounded border p-2 text-[11px]" + :class="isMaintenanceReady(ds) ? 'border-amber-300 bg-amber-50/50 dark:border-amber-800 dark:bg-amber-950/20' : 'bg-muted/10'" + > + <div class="flex items-center justify-between gap-2"> + <span> + <span class="font-medium">{{ ds.diff_summary.total_changed_files }}</span> + changed files + </span> + <Badge :variant="isMaintenanceReady(ds) ? 'default' : 'secondary'" class="text-[10px]"> + {{ isMaintenanceReady(ds) ? 'New commits available' : 'Up to date' }} + </Badge> + </div> + <Button + size="sm" + variant="ghost" + class="mt-1 h-6 px-2 text-[10px]" + @click="toggleDiffExpanded(ds.id)" + > + {{ isDiffExpanded(ds.id) ? 'Hide files' : 'Show files' }} + </Button> + <div + v-if="isDiffExpanded(ds.id)" + class="mt-1 max-h-32 space-y-1 overflow-y-auto" + > + <div + v-for="file in ds.diff_summary.changed_files" + :key="`${file.status}:${file.path}`" + class="flex justify-between gap-2 font-mono" + > + <span class="break-all">{{ file.path }}</span> + <Badge variant="outline" class="h-5 text-[10px] uppercase">{{ file.status }}</Badge> + </div> + </div> + </div> + + <div v-if="ds.sync_runs?.length" class="mt-2 space-y-1"> + <div + v-for="run in ds.sync_runs.slice(0, 2)" + :key="run.id" + class="flex items-center gap-2 text-[10px] text-muted-foreground" + > + <SyncPhaseIndicator :status="run.status" /> + <span>{{ new Date(run.started_at).toLocaleString() }}</span> + <Button + size="sm" + variant="ghost" + class="ml-auto h-5 px-1" + @click="viewLogs(ds, run)" + > + <ScrollText class="mr-1 size-3" /> + Logs + </Button> + </div> + </div> + </td> + </tr> + </tbody> + </table> </div> - </div> - </div> + </CardContent> + </Card> + </div> + + <Card v-if="allSourcesPrepared" class="border-primary/40"> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base text-green-600 dark:text-green-400"> + <Check class="size-5" /> + Data Sources ready + </CardTitle> + <CardDescription> + {{ preparedCount }} of {{ dataSources.length }} source{{ dataSources.length === 1 ? '' : 's' }} + prepared for graph management and extraction. + </CardDescription> + </CardHeader> + <CardContent class="flex flex-col gap-4 sm:flex-row sm:flex-wrap"> + <p class="w-full text-sm text-muted-foreground"> + Ingestion context is prepared. Open graph management to design schema, run extraction, + or continue in the manage workspace. + </p> + <Button as-child> + <NuxtLink :to="graphManagementUrl" class="inline-flex items-center gap-2"> + Open Graph Management + <ArrowRight class="size-4" /> + </NuxtLink> + </Button> + <Button as-child variant="outline"> + <NuxtLink :to="manageUrl" class="inline-flex items-center gap-2"> + <LayoutDashboard class="size-4" /> + Back to workspace overview + </NuxtLink> + </Button> + </CardContent> + </Card> + + <div + v-if="!allSourcesPrepared && dataSources.length === 0" + class="rounded-lg border bg-muted/50 p-4" + > + <p class="text-sm text-muted-foreground"> + <strong>Flow:</strong> add repository URLs above, refresh commits to resolve branch heads, + then prepare ingestion context before opening graph management. + </p> </div> </template> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue index 8a5344106..331d775e6 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue @@ -314,18 +314,25 @@ async function refreshSourceSyncStatus(row: CreatedSourceRow) { } } -async function pollUntilTerminal(row: CreatedSourceRow, timeoutMs = 600_000) { +async function pollUntilAllTerminal(rows: CreatedSourceRow[], timeoutMs = 600_000) { const started = Date.now() while (Date.now() - started < timeoutMs) { - await refreshSourceSyncStatus(row) - if (isSyncTerminal(row.syncStatus as SyncRunStatus)) return - await new Promise((resolve) => setTimeout(resolve, 3000)) + await Promise.all(rows.map((row) => refreshSourceSyncStatus(row))) + const finished = rows.filter((row) => isSyncTerminal(row.syncStatus as SyncRunStatus)).length + syncCompletedInRun.value = finished + syncStepLabel.value = `${finished} / ${rows.length}` + if (finished === rows.length) return + await new Promise((resolve) => setTimeout(resolve, 1500)) + } + for (const row of rows) { + if (!isSyncTerminal(row.syncStatus as SyncRunStatus)) { + row.syncStatus = 'failed' + row.syncError = 'Sync timed out' + } } - row.syncStatus = 'failed' - row.syncError = 'Sync timed out' } -async function runSequentialIngestionPrep() { +async function runParallelIngestionPrep() { const queue = createdSources.value.filter( (s) => s.syncStatus === 'idle' || s.syncStatus === 'failed' || s.syncStatus === 'queued', ) @@ -338,33 +345,37 @@ async function runSequentialIngestionPrep() { syncRunTotal.value = queue.length syncCompletedInRun.value = 0 readyForStats.value = false + syncStepLabel.value = `0 / ${queue.length}` + syncActiveName.value = `${queue.length} source${queue.length === 1 ? '' : 's'}` - try { - for (let i = 0; i < queue.length; i++) { - const target = queue[i]! - syncStepLabel.value = `${i + 1} / ${queue.length}` - syncActiveName.value = target.name - target.syncStatus = 'pending' - target.syncError = null + for (const target of queue) { + target.syncStatus = 'pending' + target.syncError = null + } - try { - await apiFetch(`/management/data-sources/${target.id}/sync`, { - method: 'POST', - body: { mode: 'ingest_only' }, - }) - await pollUntilTerminal(target) - if (target.syncStatus === 'failed') { - toast.error(`Preparation failed: ${target.name}`, { - description: target.syncError ?? undefined, + try { + await Promise.allSettled( + queue.map(async (target) => { + try { + await apiFetch(`/management/data-sources/${target.id}/sync`, { + method: 'POST', + body: { mode: 'ingest_only' }, }) + } catch (err: unknown) { + target.syncStatus = 'failed' + target.syncError = err instanceof Error ? err.message : 'Preparation failed' } - } catch (err: unknown) { - target.syncStatus = 'failed' - target.syncError = err instanceof Error ? err.message : 'Preparation failed' - toast.error(`Preparation failed: ${target.name}`, { description: target.syncError }) - } + }), + ) - syncCompletedInRun.value = i + 1 + await pollUntilAllTerminal(queue) + + for (const target of queue) { + if (target.syncStatus === 'failed') { + toast.error(`Preparation failed: ${target.name}`, { + description: target.syncError ?? undefined, + }) + } } const allPrepared = createdSources.value.every((s) => s.syncStatus === 'ingested') @@ -562,8 +573,7 @@ onUnmounted(() => { <CardTitle class="text-base">Prepare ingestion context</CardTitle> <CardDescription> Fetch repository content and build job packages for each source. No AI extraction - runs here — that happens later in graph management. Sources are prepared one at a - time so you can follow progress. + runs here — that happens later in graph management. Sources are prepared in parallel. </CardDescription> </CardHeader> <CardContent class="space-y-4"> @@ -572,7 +582,7 @@ onUnmounted(() => { type="button" size="sm" :disabled="syncRunActive || createdSources.length === 0" - @click="runSequentialIngestionPrep" + @click="runParallelIngestionPrep" > <Loader2 v-if="syncRunActive" class="mr-2 size-4 animate-spin" /> <GitBranch v-if="!syncRunActive" class="mr-2 size-4" /> diff --git a/src/dev-ui/app/tests/data-sources.test.ts b/src/dev-ui/app/tests/data-sources.test.ts index be978914a..f576c0cb3 100644 --- a/src/dev-ui/app/tests/data-sources.test.ts +++ b/src/dev-ui/app/tests/data-sources.test.ts @@ -3211,9 +3211,9 @@ describe('Commit-hash status cues - structural verification', () => { it('renders commit status section with canonical commit labels', () => { expect(source).toContain('Commit Status') - expect(source).toContain('Local clone commit') expect(source).toContain('Commit during last extraction') expect(source).toContain('Tracked branch head commit') + expect(source).not.toContain('Local clone commit') }) it('renders visual readiness cue when new commits exist', () => { diff --git a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts new file mode 100644 index 000000000..68837d47d --- /dev/null +++ b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts @@ -0,0 +1,72 @@ +import { describe, it, expect } from 'vitest' +import { readFileSync } from 'fs' +import { resolve } from 'path' +import { + commitStatusLabel, + prepStatusBadgeVariant, + resolvePrepStatusLabel, + resolveRepoUrl, + shortCommitHash, +} from '@/utils/kgDataSourcesCommits' + +const phase1Vue = readFileSync( + resolve(__dirname, '../pages/knowledge-graphs/[kgId]/data-sources/index.vue'), + 'utf-8', +) + +const newVue = readFileSync( + resolve(__dirname, '../pages/knowledge-graphs/[kgId]/data-sources/new.vue'), + 'utf-8', +) + +describe('KG data sources phase1 layout', () => { + it('uses wide page container like k-extract phase1', () => { + expect(phase1Vue).toContain('max-w-7xl') + }) + + it('renders add repositories and overview sections', () => { + expect(phase1Vue).toContain('Add repositories') + expect(phase1Vue).toContain('Data sources overview') + expect(phase1Vue).toContain('Add to project') + }) + + it('renders data sources ready footer with graph management link', () => { + expect(phase1Vue).toContain('Data Sources ready') + expect(phase1Vue).toContain('Open Graph Management') + expect(phase1Vue).toContain('step=graph-management') + }) + + it('does not render legacy per-card commit status layout', () => { + expect(phase1Vue).not.toContain('Commit Status') + expect(phase1Vue).not.toContain('Local clone commit') + }) +}) + +describe('KG wizard parallel ingestion prep', () => { + it('prepares sources in parallel', () => { + expect(newVue).toContain('runParallelIngestionPrep') + expect(newVue).toContain('Promise.allSettled') + expect(newVue).toContain('pollUntilAllTerminal') + expect(newVue).not.toContain('runSequentialIngestionPrep') + }) +}) + +describe('kgDataSourcesCommits helpers', () => { + it('shortens commit hashes for display', () => { + expect(shortCommitHash('abcdef1234567890')).toBe('abcdef123456') + expect(shortCommitHash(null)).toBe('—') + }) + + it('derives repo url from connection config', () => { + expect(resolveRepoUrl({ repo_url: 'https://github.com/org/repo' })).toBe( + 'https://github.com/org/repo', + ) + expect(resolveRepoUrl({ owner: 'org', repo: 'repo', branch: 'dev' })).toContain('/tree/dev') + }) + + it('maps sync statuses to prep labels', () => { + expect(resolvePrepStatusLabel('ingested')).toBe('Prepared') + expect(prepStatusBadgeVariant('ingested')).toBe('success') + expect(commitStatusLabel('abc', 'abc')).toBe('matches branch head') + }) +}) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 4a770be9b..a7e3b1e24 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -310,6 +310,8 @@ describe('KG-MANAGE-005 - graph-scoped data sources step', () => { ) expect(kgDataSourcesIndex).toContain('Back to workspace overview') expect(kgDataSourcesIndex).toContain('buildKgManageUrl') + expect(kgDataSourcesIndex).toContain('Data sources overview') + expect(kgDataSourcesIndex).toContain('max-w-7xl') }) }) diff --git a/src/dev-ui/app/utils/kgDataSourcesCommits.ts b/src/dev-ui/app/utils/kgDataSourcesCommits.ts new file mode 100644 index 000000000..26ab8ef0c --- /dev/null +++ b/src/dev-ui/app/utils/kgDataSourcesCommits.ts @@ -0,0 +1,78 @@ +import type { SyncRunStatus } from '@/utils/kgDataSourcesSync' + +export function shortCommitHash(hash: string | null | undefined): string { + if (!hash) return '—' + return hash.length > 12 ? hash.slice(0, 12) : hash +} + +export function commitStatusClass( + current: string | null | undefined, + remote: string | null | undefined, +): string { + if (!current || !remote) return 'text-muted-foreground' + return current === remote + ? 'text-green-600 dark:text-green-400' + : 'text-amber-600 dark:text-amber-400' +} + +export function commitStatusLabel( + current: string | null | undefined, + remote: string | null | undefined, +): string { + if (!current || !remote) return 'not set' + return current === remote ? 'matches branch head' : 'new commits on branch' +} + +export function resolveRepoUrl(connectionConfig: Record<string, string> | undefined): string { + if (!connectionConfig) return '—' + if (connectionConfig.repo_url) return connectionConfig.repo_url + if (connectionConfig.owner && connectionConfig.repo) { + const branch = connectionConfig.branch ?? 'main' + return `https://github.com/${connectionConfig.owner}/${connectionConfig.repo}/tree/${branch}` + } + return '—' +} + +export function resolveTrackedBranch(connectionConfig: Record<string, string> | undefined): string { + if (!connectionConfig) return 'main' + return connectionConfig.branch ?? 'main' +} + +export type PrepStatusLabel = 'Prepared' | 'Synced' | 'Preparing' | 'Failed' | 'Not prepared' + +export function resolvePrepStatusLabel(status: SyncRunStatus | undefined): PrepStatusLabel { + switch (status) { + case 'ingested': + return 'Prepared' + case 'completed': + return 'Synced' + case 'failed': + return 'Failed' + case 'pending': + case 'ingesting': + case 'ai_extracting': + case 'applying': + return 'Preparing' + default: + return 'Not prepared' + } +} + +export function prepStatusBadgeVariant( + status: SyncRunStatus | undefined, +): 'success' | 'destructive' | 'secondary' | 'outline' { + switch (status) { + case 'ingested': + case 'completed': + return 'success' + case 'failed': + return 'destructive' + case 'pending': + case 'ingesting': + case 'ai_extracting': + case 'applying': + return 'secondary' + default: + return 'outline' + } +} From e0001b804dc03b90c500e8e3c1d3b5d95154319c Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 28 May 2026 18:40:29 -0400 Subject: [PATCH 058/153] feat(data-sources): persist prepare metadata and bulk actions on KG page Track last prepared commit and file count during ingest-only runs so the overview can show branch freshness, file totals, and bulk prepare/check flows. Co-authored-by: Cursor <cursoragent@cursor.com> --- ...3l4_add_prepared_fields_to_data_sources.py | 35 ++++ .../application/services/ingestion_service.py | 18 +- .../ingestion/application/value_objects.py | 16 ++ .../ingestion/infrastructure/event_handler.py | 11 +- src/api/ingestion/ports/services.py | 5 +- src/api/main.py | 11 +- .../domain/aggregates/data_source.py | 22 ++ .../infrastructure/models/data_source.py | 2 + .../repositories/data_source_repository.py | 6 + .../infrastructure/sync_lifecycle_handler.py | 33 +++ .../presentation/data_sources/models.py | 16 ++ .../application/test_ingestion_service.py | 18 +- .../test_ingestion_event_handler.py | 11 +- .../test_sync_lifecycle_handler.py | 26 ++- .../tests/unit/management/test_data_source.py | 33 +++ .../unit/test_sessioned_ingestion_handler.py | 66 ++++++ .../[kgId]/data-sources/index.vue | 188 +++++++++--------- .../app/tests/kg-data-sources-phase1.test.ts | 25 ++- src/dev-ui/app/utils/kgDataSourcesCommits.ts | 32 +++ 19 files changed, 455 insertions(+), 119 deletions(-) create mode 100644 src/api/infrastructure/migrations/versions/g9h0i1j2k3l4_add_prepared_fields_to_data_sources.py create mode 100644 src/api/ingestion/application/value_objects.py diff --git a/src/api/infrastructure/migrations/versions/g9h0i1j2k3l4_add_prepared_fields_to_data_sources.py b/src/api/infrastructure/migrations/versions/g9h0i1j2k3l4_add_prepared_fields_to_data_sources.py new file mode 100644 index 000000000..4c2c99c62 --- /dev/null +++ b/src/api/infrastructure/migrations/versions/g9h0i1j2k3l4_add_prepared_fields_to_data_sources.py @@ -0,0 +1,35 @@ +"""add prepared commit and file count columns to data_sources + +Tracks the commit SHA and file count captured during the last ingest-only +prepare run so the UI can show branch freshness and JobPackage scope. + +Revision ID: g9h0i1j2k3l4 +Revises: fc2d3e4f5a6b +Create Date: 2026-05-26 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "g9h0i1j2k3l4" +down_revision: Union[str, Sequence[str], None] = "fc2d3e4f5a6b" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "data_sources", + sa.Column("last_prepared_commit", sa.String(length=64), nullable=True), + ) + op.add_column( + "data_sources", + sa.Column("last_prepared_file_count", sa.Integer(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("data_sources", "last_prepared_file_count") + op.drop_column("data_sources", "last_prepared_commit") diff --git a/src/api/ingestion/application/services/ingestion_service.py b/src/api/ingestion/application/services/ingestion_service.py index 489a10ef4..293554c78 100644 --- a/src/api/ingestion/application/services/ingestion_service.py +++ b/src/api/ingestion/application/services/ingestion_service.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import TYPE_CHECKING +from ingestion.application.value_objects import IngestionRunResult from ingestion.ports.adapters import IDatasourceAdapter if TYPE_CHECKING: @@ -16,7 +17,6 @@ from shared_kernel.job_package.builder import JobPackageBuilder from shared_kernel.job_package.value_objects import ( AdapterCheckpoint, - JobPackageId, SyncMode, ) @@ -61,7 +61,7 @@ async def run( tenant_id: str | None = None, credentials: dict[str, str] | None = None, baseline_commit: str | None = None, - ) -> JobPackageId: + ) -> IngestionRunResult: """Run the ingestion pipeline for a data source sync. Args: @@ -77,7 +77,7 @@ async def run( incremental extraction checkpoint state Returns: - The JobPackageId of the produced ZIP archive + IngestionRunResult with the produced JobPackage metadata Raises: ValueError: If the adapter_type is not registered @@ -138,4 +138,14 @@ async def run( self._work_dir.mkdir(parents=True, exist_ok=True) builder.build(self._work_dir) - return builder._package_id + prepared_commit_sha = None + if result.new_checkpoint is not None: + prepared_commit_sha = result.new_checkpoint.data.get("commit_sha") + + return IngestionRunResult( + job_package_id=builder._package_id, + entry_count=len(result.changeset_entries), + prepared_commit_sha=( + str(prepared_commit_sha) if prepared_commit_sha is not None else None + ), + ) diff --git a/src/api/ingestion/application/value_objects.py b/src/api/ingestion/application/value_objects.py new file mode 100644 index 000000000..5aa28ec68 --- /dev/null +++ b/src/api/ingestion/application/value_objects.py @@ -0,0 +1,16 @@ +"""Application-layer value objects for the Ingestion bounded context.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from shared_kernel.job_package.value_objects import JobPackageId + + +@dataclass(frozen=True) +class IngestionRunResult: + """Outcome of a successful ingestion pipeline run.""" + + job_package_id: JobPackageId + entry_count: int + prepared_commit_sha: str | None diff --git a/src/api/ingestion/infrastructure/event_handler.py b/src/api/ingestion/infrastructure/event_handler.py index 788920217..052f6b9bc 100644 --- a/src/api/ingestion/infrastructure/event_handler.py +++ b/src/api/ingestion/infrastructure/event_handler.py @@ -106,6 +106,9 @@ async def handle( "data_source_id": data_source_id, "knowledge_graph_id": knowledge_graph_id, "no_changes_detected": True, + "prepared_commit_sha": payload.get( + "tracked_branch_head_commit" + ), "occurred_at": now.isoformat(), }, occurred_at=now, @@ -129,7 +132,7 @@ async def handle( return try: - job_package_id = await self._ingestion_service.run( + ingestion_result = await self._ingestion_service.run( sync_run_id=sync_run_id, data_source_id=data_source_id, knowledge_graph_id=knowledge_graph_id, @@ -168,7 +171,9 @@ async def handle( "sync_run_id": sync_run_id, "data_source_id": data_source_id, "knowledge_graph_id": knowledge_graph_id, - "job_package_id": str(job_package_id), + "job_package_id": str(ingestion_result.job_package_id), + "prepared_commit_sha": ingestion_result.prepared_commit_sha, + "prepared_file_count": ingestion_result.entry_count, "occurred_at": now.isoformat(), }, occurred_at=now, @@ -182,7 +187,7 @@ async def handle( "sync_run_id": sync_run_id, "data_source_id": data_source_id, "knowledge_graph_id": knowledge_graph_id, - "job_package_id": str(job_package_id), + "job_package_id": str(ingestion_result.job_package_id), "occurred_at": now.isoformat(), }, occurred_at=now, diff --git a/src/api/ingestion/ports/services.py b/src/api/ingestion/ports/services.py index 6aee85417..150ccf224 100644 --- a/src/api/ingestion/ports/services.py +++ b/src/api/ingestion/ports/services.py @@ -4,6 +4,7 @@ from typing import Protocol +from ingestion.application.value_objects import IngestionRunResult from shared_kernel.job_package.value_objects import JobPackageId @@ -25,7 +26,7 @@ async def run( tenant_id: str | None = None, credentials: dict[str, str] | None = None, baseline_commit: str | None = None, - ) -> JobPackageId: + ) -> IngestionRunResult: """Run the ingestion pipeline. Args: @@ -39,7 +40,7 @@ async def run( baseline_commit: Optional commit SHA used as incremental baseline Returns: - JobPackageId for the produced archive + IngestionRunResult with the produced archive metadata Raises: ValueError: If the adapter_type is unknown diff --git a/src/api/main.py b/src/api/main.py index b45a75cb9..ecdf42dcc 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -249,10 +249,13 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: if data_source_id and adapter_type == "github": ds = await ds_repo.get_by_id(DataSourceId(value=data_source_id)) if ds is not None: - if ds.last_extraction_baseline_commit: - enriched_payload["baseline_commit"] = ( - ds.last_extraction_baseline_commit - ) + pipeline_mode = str(payload.get("pipeline_mode", "full")) + if pipeline_mode == "ingest_only": + baseline_commit = ds.last_prepared_commit + else: + baseline_commit = ds.last_extraction_baseline_commit + if baseline_commit: + enriched_payload["baseline_commit"] = baseline_commit if ds.credentials_path and tenant_id and credential_reader is not None: try: diff --git a/src/api/management/domain/aggregates/data_source.py b/src/api/management/domain/aggregates/data_source.py index e61ee09a5..09fe5aa38 100644 --- a/src/api/management/domain/aggregates/data_source.py +++ b/src/api/management/domain/aggregates/data_source.py @@ -66,6 +66,8 @@ class DataSource: clone_head_commit: str | None = None last_extraction_baseline_commit: str | None = None tracked_branch_head_commit: str | None = None + last_prepared_commit: str | None = None + last_prepared_file_count: int | None = None ontology: Ontology | None = None _pending_events: list[DomainEvent] = field(default_factory=list, repr=False) _probe: DataSourceProbe = field( @@ -378,6 +380,26 @@ def advance_extraction_baseline_to_tracked_head(self) -> None: if self.tracked_branch_head_commit: self.last_extraction_baseline_commit = self.tracked_branch_head_commit + def record_ingestion_prepared( + self, + *, + prepared_commit: str | None, + prepared_file_count: int | None = None, + ) -> None: + """Record the commit and file count from a successful ingest-only prepare. + + Raises: + AggregateDeletedError: If the data source has been marked for deletion + """ + if self._deleted: + raise AggregateDeletedError( + "Cannot record ingestion prepare on a deleted data source" + ) + if prepared_commit: + self.last_prepared_commit = prepared_commit + if prepared_file_count is not None: + self.last_prepared_file_count = prepared_file_count + def mark_for_deletion( self, *, diff --git a/src/api/management/infrastructure/models/data_source.py b/src/api/management/infrastructure/models/data_source.py index c8b5da737..737a68d7d 100644 --- a/src/api/management/infrastructure/models/data_source.py +++ b/src/api/management/infrastructure/models/data_source.py @@ -49,6 +49,8 @@ class DataSourceModel(Base, TimestampMixin): tracked_branch_head_commit: Mapped[str | None] = mapped_column( String(64), nullable=True ) + last_prepared_commit: Mapped[str | None] = mapped_column(String(64), nullable=True) + last_prepared_file_count: Mapped[int | None] = mapped_column(nullable=True) ontology_json: Mapped[dict | None] = mapped_column(JSONB, nullable=True) __table_args__ = ( diff --git a/src/api/management/infrastructure/repositories/data_source_repository.py b/src/api/management/infrastructure/repositories/data_source_repository.py index de2f23ff6..f4e650b3c 100644 --- a/src/api/management/infrastructure/repositories/data_source_repository.py +++ b/src/api/management/infrastructure/repositories/data_source_repository.py @@ -85,6 +85,8 @@ async def save(self, data_source: DataSource) -> None: data_source.last_extraction_baseline_commit ) model.tracked_branch_head_commit = data_source.tracked_branch_head_commit + model.last_prepared_commit = data_source.last_prepared_commit + model.last_prepared_file_count = data_source.last_prepared_file_count model.updated_at = data_source.updated_at model.ontology_json = ontology_json else: @@ -104,6 +106,8 @@ async def save(self, data_source: DataSource) -> None: data_source.last_extraction_baseline_commit ), tracked_branch_head_commit=data_source.tracked_branch_head_commit, + last_prepared_commit=data_source.last_prepared_commit, + last_prepared_file_count=data_source.last_prepared_file_count, ontology_json=ontology_json, created_at=data_source.created_at, updated_at=data_source.updated_at, @@ -220,5 +224,7 @@ def _to_domain(self, model: DataSourceModel) -> DataSource: clone_head_commit=model.clone_head_commit, last_extraction_baseline_commit=model.last_extraction_baseline_commit, tracked_branch_head_commit=model.tracked_branch_head_commit, + last_prepared_commit=model.last_prepared_commit, + last_prepared_file_count=model.last_prepared_file_count, ontology=ontology, ) diff --git a/src/api/management/infrastructure/sync_lifecycle_handler.py b/src/api/management/infrastructure/sync_lifecycle_handler.py index 023f42112..97ebe2927 100644 --- a/src/api/management/infrastructure/sync_lifecycle_handler.py +++ b/src/api/management/infrastructure/sync_lifecycle_handler.py @@ -139,6 +139,12 @@ async def handle( sync_run.logs.append( f"[{now.isoformat()}] Ingestion context prepared for later extraction." ) + await self._update_data_source_ingestion_prepared( + data_source_id=sync_run.data_source_id, + prepared_commit=payload.get("prepared_commit_sha"), + prepared_file_count=payload.get("prepared_file_count"), + no_changes_detected=payload.get("no_changes_detected") is True, + ) elif event_type == "MutationsApplied": sync_run.status = "completed" @@ -229,3 +235,30 @@ async def _update_data_source_last_sync_at( ds.record_sync_completed() ds.advance_extraction_baseline_to_tracked_head() await self._ds_repo.save(ds) + + async def _update_data_source_ingestion_prepared( + self, + *, + data_source_id: str, + prepared_commit: str | None, + prepared_file_count: int | None, + no_changes_detected: bool, + ) -> None: + """Persist ingest-only prepare metadata on the data source.""" + ds = await self._ds_repo.get_by_id(DataSourceId(value=data_source_id)) + if ds is None: + return + + commit = prepared_commit + if not commit and no_changes_detected: + commit = ds.tracked_branch_head_commit + + file_count = prepared_file_count + if no_changes_detected: + file_count = None + + ds.record_ingestion_prepared( + prepared_commit=commit, + prepared_file_count=file_count, + ) + await self._ds_repo.save(ds) diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index 1d3a380b8..f05b9b42d 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -199,6 +199,12 @@ class DataSourceResponse(BaseModel): tracked_branch_head_commit: str | None = Field( None, description="Latest known commit at the tracked source branch head" ) + last_prepared_commit: str | None = Field( + None, description="Commit SHA captured during the last ingest-only prepare" + ) + last_prepared_file_count: int | None = Field( + None, description="Number of files in the JobPackage from the last prepare" + ) connection_config: dict[str, str] = Field( default_factory=dict, description="Adapter connection configuration (non-secret)", @@ -231,6 +237,8 @@ def from_domain(cls, ds: DataSource) -> DataSourceResponse: clone_head_commit=ds.clone_head_commit, last_extraction_baseline_commit=ds.last_extraction_baseline_commit, tracked_branch_head_commit=ds.tracked_branch_head_commit, + last_prepared_commit=ds.last_prepared_commit, + last_prepared_file_count=ds.last_prepared_file_count, connection_config=dict(ds.connection_config), created_at=ds.created_at, updated_at=ds.updated_at, @@ -476,6 +484,12 @@ class DataSourceWithSyncResponse(BaseModel): tracked_branch_head_commit: str | None = Field( None, description="Latest known commit at the tracked source branch head" ) + last_prepared_commit: str | None = Field( + None, description="Commit SHA captured during the last ingest-only prepare" + ) + last_prepared_file_count: int | None = Field( + None, description="Number of files in the JobPackage from the last prepare" + ) connection_config: dict[str, str] = Field( default_factory=dict, description="Adapter connection configuration (non-secret)", @@ -515,6 +529,8 @@ def from_domain_pair( clone_head_commit=ds.clone_head_commit, last_extraction_baseline_commit=ds.last_extraction_baseline_commit, tracked_branch_head_commit=ds.tracked_branch_head_commit, + last_prepared_commit=ds.last_prepared_commit, + last_prepared_file_count=ds.last_prepared_file_count, connection_config=dict(ds.connection_config), created_at=ds.created_at, updated_at=ds.updated_at, diff --git a/src/api/tests/unit/ingestion/application/test_ingestion_service.py b/src/api/tests/unit/ingestion/application/test_ingestion_service.py index 9d5be9cd8..3a06cf64f 100644 --- a/src/api/tests/unit/ingestion/application/test_ingestion_service.py +++ b/src/api/tests/unit/ingestion/application/test_ingestion_service.py @@ -12,6 +12,7 @@ import pytest from ingestion.application.services.ingestion_service import IngestionService +from ingestion.application.value_objects import IngestionRunResult from ingestion.ports.adapters import ExtractionResult, IDatasourceAdapter from shared_kernel.job_package.value_objects import ( AdapterCheckpoint, @@ -39,7 +40,7 @@ def _make_extraction_result( content_type="text/x-python", metadata={}, ) - checkpoint = AdapterCheckpoint(schema_version="1.0.0", data={}) + checkpoint = AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "deadbeef"}) return ExtractionResult( changeset_entries=[entry], content_blobs={content_ref.hex_digest: content}, @@ -95,7 +96,7 @@ async def test_run_returns_job_package_id(self): adapter_registry=registry, work_dir=Path(tmpdir), ) - job_id = await service.run( + result = await service.run( sync_run_id="run-001", data_source_id="ds-001", knowledge_graph_id="kg-001", @@ -104,7 +105,10 @@ async def test_run_returns_job_package_id(self): credentials_path=None, ) - assert isinstance(job_id, JobPackageId) + assert isinstance(result, IngestionRunResult) + assert isinstance(result.job_package_id, JobPackageId) + assert result.entry_count == 1 + assert result.prepared_commit_sha == "deadbeef" async def test_run_creates_zip_archive(self): """run() should create a ZIP archive in the work directory.""" @@ -117,7 +121,7 @@ async def test_run_creates_zip_archive(self): adapter_registry=registry, work_dir=work_dir, ) - job_id = await service.run( + result = await service.run( sync_run_id="run-001", data_source_id="ds-001", knowledge_graph_id="kg-001", @@ -126,7 +130,7 @@ async def test_run_creates_zip_archive(self): credentials_path=None, ) # The archive should exist - archive_path = work_dir / job_id.archive_name() + archive_path = work_dir / result.job_package_id.archive_name() assert archive_path.exists() async def test_run_raises_for_unknown_adapter(self): @@ -173,7 +177,7 @@ async def test_run_handles_empty_changeset(self): adapter_registry=registry, work_dir=Path(tmpdir), ) - job_id = await service.run( + result = await service.run( sync_run_id="run-001", data_source_id="ds-001", knowledge_graph_id="kg-001", @@ -181,7 +185,7 @@ async def test_run_handles_empty_changeset(self): connection_config={}, credentials_path=None, ) - assert isinstance(job_id, JobPackageId) + assert isinstance(result, IngestionRunResult) async def test_run_uses_baseline_commit_as_checkpoint(self): """run() should convert baseline_commit into an adapter checkpoint.""" diff --git a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py index ec77a0b35..ce23acb01 100644 --- a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py +++ b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py @@ -14,6 +14,7 @@ import pytest from ingestion.infrastructure.event_handler import IngestionEventHandler +from ingestion.application.value_objects import IngestionRunResult from shared_kernel.job_package.value_objects import ( JobPackageId, ) @@ -69,7 +70,7 @@ async def run( tenant_id: str | None = None, credentials: dict[str, str] | None = None, baseline_commit: str | None = None, - ) -> JobPackageId: + ) -> IngestionRunResult: self.calls.append( { "sync_run_id": sync_run_id, @@ -82,7 +83,11 @@ async def run( ) if self._fail: raise RuntimeError(self._error) - return JobPackageId(value="01HRZZZZZZZZZZZZZZZZZZZZZ0") + return IngestionRunResult( + job_package_id=JobPackageId(value="01HRZZZZZZZZZZZZZZZZZZZZZ0"), + entry_count=42, + prepared_commit_sha="abc123def456", + ) @pytest.fixture @@ -252,6 +257,8 @@ async def test_emits_ingestion_prepared_when_ingest_only( event = outbox.appended[0] assert event["event_type"] == "IngestionPrepared" assert event["payload"]["job_package_id"] is not None + assert event["payload"]["prepared_commit_sha"] == "abc123def456" + assert event["payload"]["prepared_file_count"] == 42 async def test_no_changes_ingest_only_emits_ingestion_prepared( self, diff --git a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py index 624beadc4..ae9a0e67d 100644 --- a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py +++ b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py @@ -130,18 +130,42 @@ async def test_ingestion_prepared_sets_ingested( run = _make_sync_run(status="ingesting") mock_sync_run_repo.get_by_id.return_value = run + from management.domain.aggregates import DataSource + from management.domain.value_objects import DataSourceId, Schedule, ScheduleType + from shared_kernel.datasource_types import DataSourceAdapterType + + now = datetime.now(UTC) + ds = DataSource( + id=DataSourceId(value=run.data_source_id), + knowledge_graph_id="kg-001", + tenant_id="tenant-001", + name="Repo", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"owner": "org", "repo": "repo"}, + credentials_path=None, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + created_at=now, + updated_at=now, + ) + mock_ds_repo.get_by_id.return_value = ds + await handler.handle( "IngestionPrepared", _payload( sync_run_id=run.id, job_package_id="pkg-001", + prepared_commit_sha="abc123", + prepared_file_count=99, ), ) saved_run: DataSourceSyncRun = mock_sync_run_repo.save.call_args[0][0] assert saved_run.status == "ingested" assert saved_run.completed_at is not None - mock_ds_repo.get_by_id.assert_not_called() + assert ds.last_prepared_commit == "abc123" + assert ds.last_prepared_file_count == 99 + mock_ds_repo.save.assert_awaited_once() @pytest.mark.asyncio diff --git a/src/api/tests/unit/management/test_data_source.py b/src/api/tests/unit/management/test_data_source.py index 4912c364c..4beb96b8a 100644 --- a/src/api/tests/unit/management/test_data_source.py +++ b/src/api/tests/unit/management/test_data_source.py @@ -432,6 +432,39 @@ def test_record_sync_completed_raises_after_deletion(self): ds.record_sync_completed() +class TestDataSourceRecordIngestionPrepared: + """Tests for DataSource.record_ingestion_prepared().""" + + def _create_ds(self, **kwargs): + defaults = { + "knowledge_graph_id": "kg-123", + "tenant_id": "tenant-456", + "name": "Source", + "adapter_type": DataSourceAdapterType.GITHUB, + "connection_config": {}, + } + defaults.update(kwargs) + ds = DataSource.create(**defaults) + ds.collect_events() + return ds + + def test_record_ingestion_prepared_sets_commit_and_file_count(self): + ds = self._create_ds() + ds.record_ingestion_prepared( + prepared_commit="abc123", + prepared_file_count=55, + ) + assert ds.last_prepared_commit == "abc123" + assert ds.last_prepared_file_count == 55 + + def test_record_ingestion_prepared_preserves_file_count_when_none(self): + ds = self._create_ds() + ds.last_prepared_file_count = 10 + ds.record_ingestion_prepared(prepared_commit="abc123", prepared_file_count=None) + assert ds.last_prepared_commit == "abc123" + assert ds.last_prepared_file_count == 10 + + class TestDataSourceMarkForDeletion: """Tests for DataSource.mark_for_deletion() method.""" diff --git a/src/api/tests/unit/test_sessioned_ingestion_handler.py b/src/api/tests/unit/test_sessioned_ingestion_handler.py index b0fc4e7ec..661a14df6 100644 --- a/src/api/tests/unit/test_sessioned_ingestion_handler.py +++ b/src/api/tests/unit/test_sessioned_ingestion_handler.py @@ -179,3 +179,69 @@ async def test_sessioned_ingestion_handler_sets_no_changes_flag_when_heads_match == {"token": "tok"} ) + +@pytest.mark.asyncio +async def test_sessioned_ingestion_handler_uses_last_prepared_for_ingest_only(): + """ingest_only runs should compare against last prepared commit, not extraction baseline.""" + from main import _SessionedIngestionEventHandler + + session = AsyncMock() + session_factory = _make_session_factory(session) + handler = _SessionedIngestionEventHandler(session_factory=session_factory) + handler._resolve_github_tracked_head_commit = AsyncMock(return_value="prepared123") # type: ignore[attr-defined] + + outbox_repo = MagicMock() + ds_repo = MagicMock() + secret_store = MagicMock() + ingestion_handler = MagicMock() + ingestion_handler.handle = AsyncMock() + ingestion_service = MagicMock() + + data_source = _make_data_source() + data_source.last_prepared_commit = "prepared123" + ds_repo.get_by_id = AsyncMock(return_value=data_source) + ds_repo.save = AsyncMock() + secret_store.retrieve = AsyncMock(return_value={"token": "tok"}) + + payload = { + "sync_run_id": "run-003", + "data_source_id": data_source.id.value, + "knowledge_graph_id": data_source.knowledge_graph_id, + "tenant_id": data_source.tenant_id, + "adapter_type": "github", + "connection_config": data_source.connection_config, + "credentials_path": data_source.credentials_path, + "pipeline_mode": "ingest_only", + } + + management_settings = MagicMock() + management_settings.encryption_key.get_secret_value.return_value = ( + "WlAwWU83a2hSODl2SVY4MHBzQWpwaDBSUHhOU3NfQ3R6aXpvNTJfNE5odz0=" + ) + + with ( + patch("infrastructure.outbox.repository.OutboxRepository", return_value=outbox_repo), + patch( + "management.infrastructure.repositories.data_source_repository.DataSourceRepository", + return_value=ds_repo, + ), + patch( + "management.infrastructure.repositories.fernet_secret_store.FernetSecretStore", + return_value=secret_store, + ), + patch( + "ingestion.application.services.ingestion_service.IngestionService", + return_value=ingestion_service, + ), + patch( + "ingestion.infrastructure.event_handler.IngestionEventHandler", + return_value=ingestion_handler, + ), + patch("main.get_management_settings", return_value=management_settings), + ): + await handler.handle("SyncStarted", payload) + + call_payload = ingestion_handler.handle.call_args.args[1] + assert call_payload["baseline_commit"] == "prepared123" + assert call_payload["no_changes_detected"] is True + diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue index 4f610dd1f..91086fd26 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue @@ -24,6 +24,7 @@ import { import { isMaintenanceReady } from '@/utils/kgManageWorkspace' import { hasAnyActiveSync, + isActiveSyncStatus, isSyncTerminal, latestSyncRun, type SyncRunStatus, @@ -31,6 +32,10 @@ import { import { commitStatusClass, commitStatusLabel, + formatPreparedFileCount, + isIngestionPreparedAtHead, + needsIngestionPrepare, + prepareCommitStatusLabel, prepStatusBadgeVariant, resolvePrepStatusLabel, resolveRepoUrl, @@ -99,6 +104,8 @@ interface DataSourceItem { connection_config?: Record<string, string> last_extraction_baseline_commit?: string | null tracked_branch_head_commit?: string | null + last_prepared_commit?: string | null + last_prepared_file_count?: number | null sync_runs?: SyncRun[] diff_summary?: DataSourceDiffSummary | null } @@ -114,9 +121,8 @@ const kgName = ref('') const dataSources = ref<DataSourceItem[]>([]) const loading = ref(false) const expandedDiffLists = ref<Record<string, boolean>>({}) -const refreshingCommitRefs = ref<Record<string, boolean>>({}) -const refreshingAllCommits = ref(false) -const adoptingBaselines = ref<Record<string, boolean>>({}) +const checkingAllCommits = ref(false) +const preparingAll = ref(false) const newUrls = ref<string[]>(['']) const addToken = ref('') @@ -140,10 +146,20 @@ const validNewUrls = computed(() => ) const preparedCount = computed(() => - dataSources.value.filter((ds) => { - const status = latestSyncRun(ds.sync_runs)?.status - return status === 'ingested' || status === 'completed' - }).length, + dataSources.value.filter((ds) => isIngestionPreparedAtHead(ds)).length, +) + +const sourcesNeedingPrepare = computed(() => + visibleDataSources.value.filter( + (ds) => needsIngestionPrepare(ds) && !isActiveSyncStatus(latestStatus(ds)), + ), +) + +const canBulkPrepare = computed( + () => + sourcesNeedingPrepare.value.length > 0 + && !preparingAll.value + && !hasAnyActiveSync(dataSources.value), ) const allSourcesPrepared = computed( @@ -251,7 +267,7 @@ async function addRepositories() { newUrls.value = [''] addToken.value = '' toast.success(`Added ${added} source${added === 1 ? '' : 's'}`, { - description: 'Refresh commits or prepare ingestion context when ready.', + description: 'Check for new commits or prepare ingestion context when ready.', }) await loadDataSources() } @@ -325,64 +341,48 @@ function toggleDiffExpanded(dsId: string) { expandedDiffLists.value[dsId] = !isDiffExpanded(dsId) } -async function triggerSync(dsId: string, mode: 'full' | 'ingest_only' = 'full') { - try { - await apiFetch(`/management/data-sources/${dsId}/sync`, { - method: 'POST', - body: mode === 'ingest_only' ? { mode: 'ingest_only' } : undefined, - }) - toast.success(mode === 'ingest_only' ? 'Preparation started' : 'Sync triggered') - await loadDataSources() - if (hasAnyActiveSync(dataSources.value)) startPolling() - } catch { - toast.error('Failed to trigger sync') - } -} - -async function refreshCommitRefs(dsId: string) { - refreshingCommitRefs.value[dsId] = true - try { - await apiFetch(`/management/data-sources/${dsId}/commit-refs/refresh`, { method: 'POST' }) - toast.success('Commit references refreshed') - await loadDataSources() - } catch { - toast.error('Failed to refresh commit references') - } finally { - refreshingCommitRefs.value[dsId] = false - } -} - -async function refreshAllCommitRefs() { +async function checkAllCommitRefs() { if (visibleDataSources.value.length === 0) return - refreshingAllCommits.value = true + checkingAllCommits.value = true try { await Promise.allSettled( visibleDataSources.value.map((ds) => apiFetch(`/management/data-sources/${ds.id}/commit-refs/refresh`, { method: 'POST' }), ), ) - toast.success('Commit references refreshed') + toast.success('Branch heads updated') await loadDataSources() } catch { - toast.error('Failed to refresh commit references') + toast.error('Failed to check for new commits') } finally { - refreshingAllCommits.value = false + checkingAllCommits.value = false } } -async function adoptTrackedHeadBaseline(dsId: string) { - adoptingBaselines.value[dsId] = true +async function prepareAllDataSources() { + const queue = sourcesNeedingPrepare.value + if (queue.length === 0) { + toast.error('No data sources need preparation') + return + } + + preparingAll.value = true try { - await apiFetch(`/management/data-sources/${dsId}/commit-refs/adopt-tracked-head`, { - method: 'POST', - }) - toast.success('Baseline updated to tracked head') + await Promise.allSettled( + queue.map((ds) => + apiFetch(`/management/data-sources/${ds.id}/sync`, { + method: 'POST', + body: { mode: 'ingest_only' }, + }), + ), + ) + toast.success(`Preparing ${queue.length} data source${queue.length === 1 ? '' : 's'}`) await loadDataSources() - } catch (err) { - const msg = err instanceof Error ? err.message : 'Failed to update baseline' - toast.error('Failed to update baseline', { description: msg }) + if (hasAnyActiveSync(dataSources.value)) startPolling() + } catch { + toast.error('Failed to start preparation') } finally { - adoptingBaselines.value[dsId] = false + preparingAll.value = false } } @@ -558,6 +558,13 @@ watch(tenantVersion, async () => { <input :value="url" type="text" + autocomplete="off" + autocorrect="off" + autocapitalize="off" + spellcheck="false" + data-lpignore="true" + data-1p-ignore + :name="`kg-ds-repo-url-${index}`" class="flex h-9 flex-1 rounded-md border border-input bg-transparent px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring" placeholder="https://github.com/org/repo" @input="updateUrl(index, ($event.target as HTMLInputElement).value)" @@ -612,18 +619,26 @@ watch(tenantVersion, async () => { <Button variant="outline" size="sm" - :disabled="refreshingAllCommits || visibleDataSources.length === 0" - @click="refreshAllCommitRefs" + :disabled="checkingAllCommits || visibleDataSources.length === 0" + @click="checkAllCommitRefs" > - <Loader2 v-if="refreshingAllCommits" class="mr-2 size-4 animate-spin" /> + <Loader2 v-if="checkingAllCommits" class="mr-2 size-4 animate-spin" /> <RefreshCw v-else class="mr-2 size-4" /> - Refresh commits + Check for new commits + </Button> + <Button + size="sm" + :disabled="!canBulkPrepare" + @click="prepareAllDataSources" + > + <Loader2 v-if="preparingAll" class="mr-2 size-4 animate-spin" /> + Prepare data sources </Button> </div> </div> <CardDescription> - Each row is one connected repository. Compare extraction baseline to tracked branch head - to see when maintenance syncs are needed. + Each row is one connected repository. Prepare ingestion context when tracked branch + head moves ahead of the last prepared commit. </CardDescription> </CardHeader> <CardContent class="space-y-3"> @@ -636,12 +651,13 @@ watch(tenantVersion, async () => { </div> <div v-else class="overflow-x-auto rounded-md border"> - <table class="w-full min-w-[880px] text-sm"> + <table class="w-full min-w-[960px] text-sm"> <thead> <tr class="border-b bg-muted/50 text-left"> <th class="px-3 py-2 font-medium">Source</th> <th class="px-3 py-2 font-medium">Branch</th> <th class="px-3 py-2 font-medium">Status</th> + <th class="px-3 py-2 font-medium">Files on branch</th> <th class="px-3 py-2 font-medium">Last extraction baseline</th> <th class="px-3 py-2 font-medium">Tracked branch head</th> <th class="px-3 py-2 font-medium">Actions</th> @@ -652,7 +668,7 @@ watch(tenantVersion, async () => { v-for="ds in visibleDataSources" :key="ds.id" class="border-b border-border/60 align-top last:border-0" - :class="isMaintenanceReady(ds) ? 'bg-amber-50/40 dark:bg-amber-950/10' : ''" + :class="needsIngestionPrepare(ds) ? 'bg-amber-50/40 dark:bg-amber-950/10' : ''" > <td class="px-3 py-2"> <p class="font-medium leading-tight">{{ ds.name }}</p> @@ -674,6 +690,9 @@ watch(tenantVersion, async () => { <SyncPhaseIndicator :status="latestStatus(ds)!" /> </div> </td> + <td class="px-3 py-2 font-mono text-xs tabular-nums"> + {{ formatPreparedFileCount(ds.last_prepared_file_count) }} + </td> <td class="px-3 py-2 font-mono text-xs"> <div :class="commitStatusClass(ds.last_extraction_baseline_commit, ds.tracked_branch_head_commit)"> <span :title="ds.last_extraction_baseline_commit || ''"> @@ -688,9 +707,19 @@ watch(tenantVersion, async () => { </div> </td> <td class="px-3 py-2 font-mono text-xs"> - <span :title="ds.tracked_branch_head_commit || ''"> - {{ shortCommitHash(ds.tracked_branch_head_commit) }} - </span> + <div + :class="commitStatusClass(ds.last_prepared_commit, ds.tracked_branch_head_commit)" + > + <span :title="ds.tracked_branch_head_commit || ''"> + {{ shortCommitHash(ds.tracked_branch_head_commit) }} + </span> + </div> + <div + class="mt-0.5 text-[10px]" + :class="commitStatusClass(ds.last_prepared_commit, ds.tracked_branch_head_commit)" + > + {{ prepareCommitStatusLabel(ds.last_prepared_commit, ds.tracked_branch_head_commit) }} + </div> </td> <td class="px-3 py-2"> <div class="flex flex-wrap gap-1"> @@ -698,33 +727,6 @@ watch(tenantVersion, async () => { <Settings class="mr-1 size-3" /> Edit </Button> - <Button - size="sm" - variant="ghost" - class="h-7 px-2 text-[10px]" - :disabled="refreshingCommitRefs[ds.id] === true" - @click="refreshCommitRefs(ds.id)" - > - <RefreshCw class="mr-1 size-3" :class="refreshingCommitRefs[ds.id] ? 'animate-spin' : ''" /> - Refresh - </Button> - <Button - size="sm" - variant="ghost" - class="h-7 px-2 text-[10px]" - :disabled="!isMaintenanceReady(ds) || adoptingBaselines[ds.id] === true" - @click="adoptTrackedHeadBaseline(ds.id)" - > - Adopt baseline - </Button> - <Button - size="sm" - variant="ghost" - class="h-7 px-2 text-[10px]" - @click="triggerSync(ds.id, 'ingest_only')" - > - Prepare - </Button> <Button size="sm" variant="ghost" @@ -739,15 +741,15 @@ watch(tenantVersion, async () => { <div v-if="ds.diff_summary && ds.diff_summary.total_changed_files > 0" class="mt-2 rounded border p-2 text-[11px]" - :class="isMaintenanceReady(ds) ? 'border-amber-300 bg-amber-50/50 dark:border-amber-800 dark:bg-amber-950/20' : 'bg-muted/10'" + :class="needsIngestionPrepare(ds) ? 'border-amber-300 bg-amber-50/50 dark:border-amber-800 dark:bg-amber-950/20' : 'bg-muted/10'" > <div class="flex items-center justify-between gap-2"> <span> <span class="font-medium">{{ ds.diff_summary.total_changed_files }}</span> changed files </span> - <Badge :variant="isMaintenanceReady(ds) ? 'default' : 'secondary'" class="text-[10px]"> - {{ isMaintenanceReady(ds) ? 'New commits available' : 'Up to date' }} + <Badge :variant="needsIngestionPrepare(ds) ? 'default' : 'secondary'" class="text-[10px]"> + {{ needsIngestionPrepare(ds) ? 'Prepare needed' : 'Up to date' }} </Badge> </div> <Button @@ -837,8 +839,8 @@ watch(tenantVersion, async () => { class="rounded-lg border bg-muted/50 p-4" > <p class="text-sm text-muted-foreground"> - <strong>Flow:</strong> add repository URLs above, refresh commits to resolve branch heads, - then prepare ingestion context before opening graph management. + <strong>Flow:</strong> add repository URLs above, check for new commits to resolve branch heads, + then prepare data sources before opening graph management. </p> </div> </template> diff --git a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts index 68837d47d..3f229ae86 100644 --- a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts +++ b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts @@ -3,7 +3,10 @@ import { readFileSync } from 'fs' import { resolve } from 'path' import { commitStatusLabel, + isIngestionPreparedAtHead, + needsIngestionPrepare, prepStatusBadgeVariant, + prepareCommitStatusLabel, resolvePrepStatusLabel, resolveRepoUrl, shortCommitHash, @@ -36,9 +39,22 @@ describe('KG data sources phase1 layout', () => { expect(phase1Vue).toContain('step=graph-management') }) - it('does not render legacy per-card commit status layout', () => { - expect(phase1Vue).not.toContain('Commit Status') - expect(phase1Vue).not.toContain('Local clone commit') + it('renders bulk commit check and prepare actions', () => { + expect(phase1Vue).toContain('Check for new commits') + expect(phase1Vue).toContain('Prepare data sources') + expect(phase1Vue).toContain('prepareAllDataSources') + expect(phase1Vue).not.toContain('Refresh commits') + expect(phase1Vue).not.toContain('Adopt baseline') + }) + + it('disables autofill on repository URL inputs', () => { + expect(phase1Vue).toContain('autocomplete="off"') + expect(phase1Vue).toContain('data-lpignore="true"') + }) + + it('shows files on branch column', () => { + expect(phase1Vue).toContain('Files on branch') + expect(phase1Vue).toContain('formatPreparedFileCount') }) }) @@ -68,5 +84,8 @@ describe('kgDataSourcesCommits helpers', () => { expect(resolvePrepStatusLabel('ingested')).toBe('Prepared') expect(prepStatusBadgeVariant('ingested')).toBe('success') expect(commitStatusLabel('abc', 'abc')).toBe('matches branch head') + expect(prepareCommitStatusLabel('abc', 'abc')).toBe('prepared at branch head') + expect(needsIngestionPrepare({ tracked_branch_head_commit: 'abc', last_prepared_commit: null })).toBe(true) + expect(isIngestionPreparedAtHead({ tracked_branch_head_commit: 'abc', last_prepared_commit: 'abc' })).toBe(true) }) }) diff --git a/src/dev-ui/app/utils/kgDataSourcesCommits.ts b/src/dev-ui/app/utils/kgDataSourcesCommits.ts index 26ab8ef0c..939b55f2e 100644 --- a/src/dev-ui/app/utils/kgDataSourcesCommits.ts +++ b/src/dev-ui/app/utils/kgDataSourcesCommits.ts @@ -23,6 +23,38 @@ export function commitStatusLabel( return current === remote ? 'matches branch head' : 'new commits on branch' } +export function prepareCommitStatusLabel( + prepared: string | null | undefined, + tracked: string | null | undefined, +): string { + if (!tracked) return 'branch head unknown' + if (!prepared) return 'not prepared yet' + return prepared === tracked ? 'prepared at branch head' : 'new commits to prepare' +} + +export function needsIngestionPrepare(ds: { + last_prepared_commit?: string | null + tracked_branch_head_commit?: string | null +}): boolean { + const tracked = ds.tracked_branch_head_commit + if (!tracked) return false + return ds.last_prepared_commit !== tracked +} + +export function isIngestionPreparedAtHead(ds: { + last_prepared_commit?: string | null + tracked_branch_head_commit?: string | null +}): boolean { + const tracked = ds.tracked_branch_head_commit + const prepared = ds.last_prepared_commit + return !!tracked && !!prepared && prepared === tracked +} + +export function formatPreparedFileCount(count: number | null | undefined): string { + if (count === null || count === undefined) return '—' + return count.toLocaleString() +} + export function resolveRepoUrl(connectionConfig: Record<string, string> | undefined): string { if (!connectionConfig) return '—' if (connectionConfig.repo_url) return connectionConfig.repo_url From 3012df564460f60307fc446792a31563ce469d2c Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 28 May 2026 21:42:57 -0400 Subject: [PATCH 059/153] feat(ui): align KG manage workspace with k-extract project hub Rework the manage overview into a phased workspace hub and add unpulled-commit tracking on data sources so ingestion status matches a git-pull mental model. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/api/main.py | 6 +- .../domain/aggregates/data_source.py | 1 + .../management/domain/commit_pull_state.py | 37 ++ .../presentation/data_sources/models.py | 27 + .../presentation/data_sources/routes.py | 9 +- .../domain/test_commit_pull_state.py | 63 +++ .../test_sync_lifecycle_handler.py | 1 + .../tests/unit/management/test_data_source.py | 1 + .../[kgId]/data-sources/index.vue | 102 +++- .../pages/knowledge-graphs/[kgId]/manage.vue | 502 +++++++++++++++--- .../app/tests/kg-data-sources-phase1.test.ts | 25 +- .../app/tests/kg-manage-workspace-hub.test.ts | 89 ++++ .../knowledge-graph-manage-workspace.test.ts | 50 +- src/dev-ui/app/utils/kgDataSourcesCommits.ts | 72 ++- src/dev-ui/app/utils/kgManageWorkspaceHub.ts | 267 ++++++++++ 15 files changed, 1106 insertions(+), 146 deletions(-) create mode 100644 src/api/management/domain/commit_pull_state.py create mode 100644 src/api/tests/unit/management/domain/test_commit_pull_state.py create mode 100644 src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts create mode 100644 src/dev-ui/app/utils/kgManageWorkspaceHub.ts diff --git a/src/api/main.py b/src/api/main.py index ecdf42dcc..da073481e 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -251,7 +251,11 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: if ds is not None: pipeline_mode = str(payload.get("pipeline_mode", "full")) if pipeline_mode == "ingest_only": - baseline_commit = ds.last_prepared_commit + from management.domain.commit_pull_state import ( + resolve_ingested_head_commit, + ) + + baseline_commit = resolve_ingested_head_commit(ds) else: baseline_commit = ds.last_extraction_baseline_commit if baseline_commit: diff --git a/src/api/management/domain/aggregates/data_source.py b/src/api/management/domain/aggregates/data_source.py index 09fe5aa38..075ecf049 100644 --- a/src/api/management/domain/aggregates/data_source.py +++ b/src/api/management/domain/aggregates/data_source.py @@ -397,6 +397,7 @@ def record_ingestion_prepared( ) if prepared_commit: self.last_prepared_commit = prepared_commit + self.clone_head_commit = prepared_commit if prepared_file_count is not None: self.last_prepared_file_count = prepared_file_count diff --git a/src/api/management/domain/commit_pull_state.py b/src/api/management/domain/commit_pull_state.py new file mode 100644 index 000000000..91568b468 --- /dev/null +++ b/src/api/management/domain/commit_pull_state.py @@ -0,0 +1,37 @@ +"""Git pull-style commit state for Git-backed data sources. + +``tracked_branch_head_commit`` is the remote branch tip (what ``git pull`` would +fast-forward to). ``clone_head_commit`` / ``last_prepared_commit`` record what +we have ingested locally. ``newest_unpulled_commit`` is the branch tip when it +differs from what we have — the newest commit on the branch we do not have yet. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from management.domain.aggregates import DataSource + + +def resolve_ingested_head_commit(data_source: DataSource) -> str | None: + """Commit whose content we have ingested (local HEAD after pull/prepare).""" + return data_source.clone_head_commit or data_source.last_prepared_commit + + +def resolve_newest_unpulled_commit(data_source: DataSource) -> str | None: + """Newest commit on the tracked branch that we do not have yet, if any.""" + remote_tip = data_source.tracked_branch_head_commit + if not remote_tip: + return None + ingested = resolve_ingested_head_commit(data_source) + if not ingested: + return remote_tip + if ingested == remote_tip: + return None + return remote_tip + + +def has_unpulled_commits(data_source: DataSource) -> bool: + """True when the remote branch tip is ahead of our ingested head.""" + return resolve_newest_unpulled_commit(data_source) is not None diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index f05b9b42d..ec4aca13a 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -9,6 +9,10 @@ from management.application.services.data_source_service import DataSourceWithLatestRun from management.domain.aggregates import DataSource +from management.domain.commit_pull_state import ( + resolve_ingested_head_commit, + resolve_newest_unpulled_commit, +) from management.domain.entities import DataSourceSyncRun from management.domain.value_objects import Ontology, OntologyEdgeType, OntologyNodeType @@ -205,6 +209,17 @@ class DataSourceResponse(BaseModel): last_prepared_file_count: int | None = Field( None, description="Number of files in the JobPackage from the last prepare" ) + ingested_head_commit: str | None = Field( + None, + description="Commit we have ingested locally (clone head / last prepare)", + ) + newest_unpulled_commit: str | None = Field( + None, + description=( + "Newest commit on the tracked branch we do not have yet; " + "null when up to date with branch tip" + ), + ) connection_config: dict[str, str] = Field( default_factory=dict, description="Adapter connection configuration (non-secret)", @@ -239,6 +254,8 @@ def from_domain(cls, ds: DataSource) -> DataSourceResponse: tracked_branch_head_commit=ds.tracked_branch_head_commit, last_prepared_commit=ds.last_prepared_commit, last_prepared_file_count=ds.last_prepared_file_count, + ingested_head_commit=resolve_ingested_head_commit(ds), + newest_unpulled_commit=resolve_newest_unpulled_commit(ds), connection_config=dict(ds.connection_config), created_at=ds.created_at, updated_at=ds.updated_at, @@ -490,6 +507,14 @@ class DataSourceWithSyncResponse(BaseModel): last_prepared_file_count: int | None = Field( None, description="Number of files in the JobPackage from the last prepare" ) + ingested_head_commit: str | None = Field( + None, + description="Commit we have ingested locally (clone head / last prepare)", + ) + newest_unpulled_commit: str | None = Field( + None, + description="Newest commit on branch we do not have yet; null if up to date", + ) connection_config: dict[str, str] = Field( default_factory=dict, description="Adapter connection configuration (non-secret)", @@ -531,6 +556,8 @@ def from_domain_pair( tracked_branch_head_commit=ds.tracked_branch_head_commit, last_prepared_commit=ds.last_prepared_commit, last_prepared_file_count=ds.last_prepared_file_count, + ingested_head_commit=resolve_ingested_head_commit(ds), + newest_unpulled_commit=resolve_newest_unpulled_commit(ds), connection_config=dict(ds.connection_config), created_at=ds.created_at, updated_at=ds.updated_at, diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index ea67a40af..d063859ee 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -66,7 +66,7 @@ def _build_operation_count_entry_previews( @router.post( "/data-sources/{ds_id}/commit-refs/refresh", status_code=status.HTTP_200_OK, - summary="Refresh source commit references for a data source", + summary="Check remote branch tip and unpulled commits for a data source", ) async def refresh_commit_references( ds_id: str, @@ -76,7 +76,12 @@ async def refresh_commit_references( GitCommitReferenceService, Depends(get_git_commit_reference_service) ], ) -> DataSourceResponse: - """Refresh tracked/cloned commit references for a Git-backed data source.""" + """Resolve the remote branch tip and whether we have unpulled commits. + + Updates ``tracked_branch_head_commit`` to the current GitHub branch HEAD + (the commit ``git pull`` would fast-forward to). The response includes + ``newest_unpulled_commit`` when that tip is ahead of our ingested head. + """ try: ds = await service.get( user_id=current_user.user_id.value, diff --git a/src/api/tests/unit/management/domain/test_commit_pull_state.py b/src/api/tests/unit/management/domain/test_commit_pull_state.py new file mode 100644 index 000000000..ec499f19a --- /dev/null +++ b/src/api/tests/unit/management/domain/test_commit_pull_state.py @@ -0,0 +1,63 @@ +"""Unit tests for git pull-style commit state helpers.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +from management.domain.aggregates import DataSource +from management.domain.commit_pull_state import ( + has_unpulled_commits, + resolve_ingested_head_commit, + resolve_newest_unpulled_commit, +) +from management.domain.value_objects import DataSourceId, Schedule, ScheduleType +from shared_kernel.datasource_types import DataSourceAdapterType + + +def _ds(**overrides) -> DataSource: + now = datetime.now(UTC) + base = dict( + id=DataSourceId(value="01JTESTCOMMITPULLSTATE000"), + knowledge_graph_id="kg-001", + tenant_id="tenant-001", + name="repo", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"owner": "o", "repo": "r", "branch": "main"}, + credentials_path=None, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + created_at=now, + updated_at=now, + clone_head_commit=None, + last_prepared_commit=None, + tracked_branch_head_commit=None, + ) + base.update(overrides) + return DataSource(**base) + + +class TestCommitPullState: + def test_ingested_head_prefers_clone_over_prepared(self): + ds = _ds(clone_head_commit="clone-sha", last_prepared_commit="prep-sha") + assert resolve_ingested_head_commit(ds) == "clone-sha" + + def test_newest_unpulled_is_remote_tip_when_never_ingested(self): + ds = _ds(tracked_branch_head_commit="remote-tip") + assert resolve_newest_unpulled_commit(ds) == "remote-tip" + assert has_unpulled_commits(ds) is True + + def test_newest_unpulled_none_when_up_to_date(self): + ds = _ds( + clone_head_commit="same-sha", + tracked_branch_head_commit="same-sha", + ) + assert resolve_newest_unpulled_commit(ds) is None + assert has_unpulled_commits(ds) is False + + def test_newest_unpulled_is_branch_tip_when_behind(self): + ds = _ds( + clone_head_commit="old-sha", + tracked_branch_head_commit="new-tip", + ) + assert resolve_newest_unpulled_commit(ds) == "new-tip" + assert has_unpulled_commits(ds) is True diff --git a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py index ae9a0e67d..dd5ab7f46 100644 --- a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py +++ b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py @@ -164,6 +164,7 @@ async def test_ingestion_prepared_sets_ingested( assert saved_run.status == "ingested" assert saved_run.completed_at is not None assert ds.last_prepared_commit == "abc123" + assert ds.clone_head_commit == "abc123" assert ds.last_prepared_file_count == 99 mock_ds_repo.save.assert_awaited_once() diff --git a/src/api/tests/unit/management/test_data_source.py b/src/api/tests/unit/management/test_data_source.py index 4beb96b8a..49c20ae0d 100644 --- a/src/api/tests/unit/management/test_data_source.py +++ b/src/api/tests/unit/management/test_data_source.py @@ -455,6 +455,7 @@ def test_record_ingestion_prepared_sets_commit_and_file_count(self): prepared_file_count=55, ) assert ds.last_prepared_commit == "abc123" + assert ds.clone_head_commit == "abc123" assert ds.last_prepared_file_count == 55 def test_record_ingestion_prepared_preserves_file_count_when_none(self): diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue index 91086fd26..f28fe0d80 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue @@ -30,12 +30,16 @@ import { type SyncRunStatus, } from '@/utils/kgDataSourcesSync' import { + formatPreparedFileCount, commitStatusClass, commitStatusLabel, - formatPreparedFileCount, + hasUnpulledCommits, isIngestionPreparedAtHead, needsIngestionPrepare, - prepareCommitStatusLabel, + resolveBranchTipCommit, + resolveIngestedHeadCommit, + resolveNewestUnpulledCommit, + unpulledCommitStatusLabel, prepStatusBadgeVariant, resolvePrepStatusLabel, resolveRepoUrl, @@ -106,6 +110,9 @@ interface DataSourceItem { tracked_branch_head_commit?: string | null last_prepared_commit?: string | null last_prepared_file_count?: number | null + ingested_head_commit?: string | null + newest_unpulled_commit?: string | null + clone_head_commit?: string | null sync_runs?: SyncRun[] diff_summary?: DataSourceDiffSummary | null } @@ -350,8 +357,18 @@ async function checkAllCommitRefs() { apiFetch(`/management/data-sources/${ds.id}/commit-refs/refresh`, { method: 'POST' }), ), ) - toast.success('Branch heads updated') await loadDataSources() + const unpulled = visibleDataSources.value.filter((ds) => hasUnpulledCommits(ds)) + if (unpulled.length === 0) { + toast.success('Up to date with remote branches') + } else { + toast.success( + `${unpulled.length} source${unpulled.length === 1 ? '' : 's'} have unpulled commits`, + { + description: 'Newest unpulled commit is shown in the table.', + }, + ) + } } catch { toast.error('Failed to check for new commits') } finally { @@ -637,8 +654,9 @@ watch(tenantVersion, async () => { </div> </div> <CardDescription> - Each row is one connected repository. Prepare ingestion context when tracked branch - head moves ahead of the last prepared commit. + Check for new commits resolves the remote branch tip (like after + <span class="font-mono text-xs">git fetch</span>) and shows the newest commit you + have not ingested yet. Prepare pulls that content into a JobPackage. </CardDescription> </CardHeader> <CardContent class="space-y-3"> @@ -651,7 +669,7 @@ watch(tenantVersion, async () => { </div> <div v-else class="overflow-x-auto rounded-md border"> - <table class="w-full min-w-[960px] text-sm"> + <table class="w-full min-w-[1120px] text-sm"> <thead> <tr class="border-b bg-muted/50 text-left"> <th class="px-3 py-2 font-medium">Source</th> @@ -659,7 +677,9 @@ watch(tenantVersion, async () => { <th class="px-3 py-2 font-medium">Status</th> <th class="px-3 py-2 font-medium">Files on branch</th> <th class="px-3 py-2 font-medium">Last extraction baseline</th> - <th class="px-3 py-2 font-medium">Tracked branch head</th> + <th class="px-3 py-2 font-medium">Ingested at</th> + <th class="px-3 py-2 font-medium">Newest unpulled</th> + <th class="px-3 py-2 font-medium">Branch tip</th> <th class="px-3 py-2 font-medium">Actions</th> </tr> </thead> @@ -668,7 +688,7 @@ watch(tenantVersion, async () => { v-for="ds in visibleDataSources" :key="ds.id" class="border-b border-border/60 align-top last:border-0" - :class="needsIngestionPrepare(ds) ? 'bg-amber-50/40 dark:bg-amber-950/10' : ''" + :class="hasUnpulledCommits(ds) ? 'bg-amber-50/40 dark:bg-amber-950/10' : ''" > <td class="px-3 py-2"> <p class="font-medium leading-tight">{{ ds.name }}</p> @@ -694,33 +714,77 @@ watch(tenantVersion, async () => { {{ formatPreparedFileCount(ds.last_prepared_file_count) }} </td> <td class="px-3 py-2 font-mono text-xs"> - <div :class="commitStatusClass(ds.last_extraction_baseline_commit, ds.tracked_branch_head_commit)"> + <div + :class=" + commitStatusClass( + ds.last_extraction_baseline_commit, + ds.tracked_branch_head_commit, + ) + " + > <span :title="ds.last_extraction_baseline_commit || ''"> {{ shortCommitHash(ds.last_extraction_baseline_commit) }} </span> </div> <div class="mt-0.5 text-[10px]" - :class="commitStatusClass(ds.last_extraction_baseline_commit, ds.tracked_branch_head_commit)" + :class=" + commitStatusClass( + ds.last_extraction_baseline_commit, + ds.tracked_branch_head_commit, + ) + " > - {{ commitStatusLabel(ds.last_extraction_baseline_commit, ds.tracked_branch_head_commit) }} + {{ + commitStatusLabel( + ds.last_extraction_baseline_commit, + ds.tracked_branch_head_commit, + ) + }} + </div> + </td> + <td class="px-3 py-2 font-mono text-xs"> + <span :title="resolveIngestedHeadCommit(ds) || ''"> + {{ shortCommitHash(resolveIngestedHeadCommit(ds)) }} + </span> + <div class="mt-0.5 text-[10px] text-muted-foreground"> + {{ resolveIngestedHeadCommit(ds) ? 'have locally' : 'nothing ingested yet' }} </div> </td> <td class="px-3 py-2 font-mono text-xs"> <div - :class="commitStatusClass(ds.last_prepared_commit, ds.tracked_branch_head_commit)" + :class=" + hasUnpulledCommits(ds) + ? 'text-amber-600 dark:text-amber-400' + : 'text-green-600 dark:text-green-400' + " > - <span :title="ds.tracked_branch_head_commit || ''"> - {{ shortCommitHash(ds.tracked_branch_head_commit) }} + <span :title="resolveNewestUnpulledCommit(ds) || ''"> + {{ shortCommitHash(resolveNewestUnpulledCommit(ds)) }} </span> </div> <div class="mt-0.5 text-[10px]" - :class="commitStatusClass(ds.last_prepared_commit, ds.tracked_branch_head_commit)" + :class=" + hasUnpulledCommits(ds) + ? 'text-amber-600 dark:text-amber-400' + : 'text-muted-foreground' + " > - {{ prepareCommitStatusLabel(ds.last_prepared_commit, ds.tracked_branch_head_commit) }} + {{ + unpulledCommitStatusLabel( + resolveNewestUnpulledCommit(ds), + resolveBranchTipCommit(ds), + ) + }} </div> </td> + <td class="px-3 py-2 font-mono text-xs text-muted-foreground"> + <span :title="resolveBranchTipCommit(ds) || ''"> + {{ shortCommitHash(resolveBranchTipCommit(ds)) }} + </span> + <div class="mt-0.5 text-[10px] text-muted-foreground">remote tip</div> + </td> <td class="px-3 py-2"> <div class="flex flex-wrap gap-1"> <Button size="sm" variant="ghost" class="h-7 px-2 text-[10px]" @click="openEditConfig(ds)"> @@ -741,15 +805,15 @@ watch(tenantVersion, async () => { <div v-if="ds.diff_summary && ds.diff_summary.total_changed_files > 0" class="mt-2 rounded border p-2 text-[11px]" - :class="needsIngestionPrepare(ds) ? 'border-amber-300 bg-amber-50/50 dark:border-amber-800 dark:bg-amber-950/20' : 'bg-muted/10'" + :class="hasUnpulledCommits(ds) ? 'border-amber-300 bg-amber-50/50 dark:border-amber-800 dark:bg-amber-950/20' : 'bg-muted/10'" > <div class="flex items-center justify-between gap-2"> <span> <span class="font-medium">{{ ds.diff_summary.total_changed_files }}</span> changed files </span> - <Badge :variant="needsIngestionPrepare(ds) ? 'default' : 'secondary'" class="text-[10px]"> - {{ needsIngestionPrepare(ds) ? 'Prepare needed' : 'Up to date' }} + <Badge :variant="hasUnpulledCommits(ds) ? 'default' : 'secondary'" class="text-[10px]"> + {{ hasUnpulledCommits(ds) ? 'Unpulled commits' : 'Up to date' }} </Badge> </div> <Button diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 8f175b9c2..3b3fd268d 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -1,11 +1,41 @@ <script setup lang="ts"> import { computed, onMounted, ref, watch } from 'vue' import { toast } from 'vue-sonner' -import { ArrowLeft, CheckCircle2, Coins, DollarSign, Loader2, PlayCircle, ShieldAlert } from 'lucide-vue-next' +import { + ArrowLeft, + ArrowRight, + Box, + CheckCircle2, + ChevronLeft, + Coins, + Database, + DollarSign, + FileText, + GitBranch, + Link2, + Loader2, + Lock, + MessageSquare, + PlayCircle, + ScrollText, + ShieldAlert, + Trash2, + Wrench, +} from 'lucide-vue-next' import { Button } from '@/components/ui/button' import { Badge } from '@/components/ui/badge' import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' import { Separator } from '@/components/ui/separator' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' import SharedConversationPanel from '@/components/extraction/SharedConversationPanel.vue' import { GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS, @@ -23,13 +53,21 @@ import { } from '@/utils/kgGraphManagement' import { buildManageStepUrl, - buildSuggestedNextStep, - buildWorkspaceStepCards, parseManageStepQuery, - resolveStepDestination, stepStatusTintClass, - type WorkspaceStepId, } from '@/utils/kgManageWorkspace' +import { + buildWorkspaceHubNextStep, + buildWorkspaceHubTiles, + resolveWorkspaceHubPhaseBadge, + workspaceHubDescription, + workspaceHubStepBadgeClass, + workspaceHubTileClasses, + type WorkspaceHubOverview, + type WorkspaceHubSourceRow, +} from '@/utils/kgManageWorkspaceHub' +import { isIngestionPreparedAtHead, resolvePrepStatusLabel, resolveRepoUrl } from '@/utils/kgDataSourcesCommits' +import { latestSyncRun } from '@/utils/kgDataSourcesSync' import { appendLocalChatMessage, buildTransitionRestrictionReason, @@ -82,8 +120,13 @@ interface KnowledgeGraphIdentity { interface DataSourceRef { id: string name: string + connection_config?: Record<string, string> last_extraction_baseline_commit?: string | null tracked_branch_head_commit?: string | null + clone_head_commit?: string | null + last_prepared_commit?: string | null + ingested_head_commit?: string | null + newest_unpulled_commit?: string | null } interface MutationLogRunView extends MutationLogRunRecord { @@ -133,7 +176,13 @@ const graphApi = useGraphApi() const kgId = computed(() => String(route.params.kgId ?? '')) const kgIdentity = ref<KnowledgeGraphIdentity | null>(null) const dataSourceCount = ref(0) +const preparedSourceCount = ref(0) const maintenanceReadyCount = ref(0) +const overviewSourceRows = ref<WorkspaceHubSourceRow[]>([]) +const entityTypeLabels = ref<string[]>([]) +const relationshipTypeLabels = ref<string[]>([]) +const deleteKgDialogOpen = ref(false) +const deletingKg = ref(false) const loading = ref(false) const workspaceLoadError = ref<string | null>(null) const workspaceForbidden = ref(false) @@ -187,8 +236,24 @@ const workspaceOverviewInput = computed(() => ({ workspaceStatus: statusProjection.value, })) -const workspaceStepCards = computed(() => buildWorkspaceStepCards(workspaceOverviewInput.value)) -const suggestedNextStep = computed(() => buildSuggestedNextStep(workspaceOverviewInput.value)) +const workspaceHubOverview = computed((): WorkspaceHubOverview => ({ + ...workspaceOverviewInput.value, + preparedSourceCount: preparedSourceCount.value, + entityTypeLabels: entityTypeLabels.value, + relationshipTypeLabels: relationshipTypeLabels.value, +})) + +const workspaceHubTiles = computed(() => buildWorkspaceHubTiles(workspaceHubOverview.value)) +const workspaceHubNextStep = computed(() => buildWorkspaceHubNextStep(workspaceHubOverview.value)) +const workspaceHubPhaseBadge = computed(() => resolveWorkspaceHubPhaseBadge(workspaceHubOverview.value)) +const workspaceHubDescriptionText = computed(() => workspaceHubDescription(workspaceHubOverview.value)) + +const workspaceHubTileIcons = { + 'data-sources': GitBranch, + 'graph-management': MessageSquare, + 'mutation-logs': ScrollText, + maintain: Wrench, +} as const const graphHeaderTitle = computed(() => kgIdentity.value?.name ?? 'Knowledge Graph Manage Workspace', @@ -382,9 +447,78 @@ async function loadOverviewMetrics() { if (!ds.last_extraction_baseline_commit || !ds.tracked_branch_head_commit) return false return ds.last_extraction_baseline_commit !== ds.tracked_branch_head_commit }).length + + let prepared = 0 + const rows: WorkspaceHubSourceRow[] = [] + for (const ds of dataSources) { + let status = 'not prepared' + let statusVariant: WorkspaceHubSourceRow['statusVariant'] = 'secondary' + try { + const runs = await apiFetch<Array<{ status: string }>>( + `/management/data-sources/${ds.id}/sync-runs`, + ) + const latest = latestSyncRun(runs) + if (latest) { + status = resolvePrepStatusLabel(latest.status).toLowerCase() + if (latest.status === 'ingested' || latest.status === 'completed') { + statusVariant = 'success' + } + } + } catch { + // keep default status + } + if (isIngestionPreparedAtHead(ds)) { + prepared += 1 + if (status === 'not prepared') { + status = 'prepared' + statusVariant = 'success' + } + } + rows.push({ + id: ds.id, + name: ds.name, + url: resolveRepoUrl(ds.connection_config), + status, + statusVariant, + }) + } + preparedSourceCount.value = prepared + overviewSourceRows.value = rows + + try { + const ontology = await apiFetch<{ + node_types?: Array<{ label: string }> + edge_types?: Array<{ label: string }> + }>(`/management/knowledge-graphs/${kgId.value}/ontology`) + entityTypeLabels.value = (ontology.node_types ?? []).map((t) => t.label) + relationshipTypeLabels.value = (ontology.edge_types ?? []).map((t) => t.label) + } catch { + entityTypeLabels.value = [] + relationshipTypeLabels.value = [] + } } catch { dataSourceCount.value = 0 + preparedSourceCount.value = 0 maintenanceReadyCount.value = 0 + overviewSourceRows.value = [] + entityTypeLabels.value = [] + relationshipTypeLabels.value = [] + } +} + +async function handleDeleteKnowledgeGraph() { + deletingKg.value = true + try { + await apiFetch(`/management/knowledge-graphs/${kgId.value}`, { method: 'DELETE' }) + toast.success(`Knowledge graph "${kgIdentity.value?.name ?? kgId.value}" deleted`) + deleteKgDialogOpen.value = false + await navigateTo('/knowledge-graphs') + } catch (err) { + toast.error('Failed to delete knowledge graph', { + description: extractErrorMessage(err), + }) + } finally { + deletingKg.value = false } } @@ -486,12 +620,6 @@ async function applyInlineMutations() { } } -function openWorkspaceStep(stepId: WorkspaceStepId) { - navigateTo(resolveStepDestination(kgId.value, stepId, { - dataSourceCount: dataSourceCount.value, - })) -} - function returnToWorkspaceOverview() { navigateTo(buildManageStepUrl(kgId.value)) } @@ -696,10 +824,6 @@ function onRailKeydown(event: KeyboardEvent, itemId: GraphManagementRailItemId) handleActivatableKeydown(event, () => selectRailItem(itemId)) } -function onStepActionKeydown(event: KeyboardEvent, stepId: WorkspaceStepId) { - handleActivatableKeydown(event, () => openWorkspaceStep(stepId)) -} - function onModeSwitchKeydown(event: KeyboardEvent, mode: GraphManagementMode) { handleActivatableKeydown(event, () => setGraphManagementMode(mode)) } @@ -826,7 +950,11 @@ watch(tenantVersion, () => { statusProjection.value = null extractionSession.value = null dataSourceCount.value = 0 + preparedSourceCount.value = 0 maintenanceReadyCount.value = 0 + overviewSourceRows.value = [] + entityTypeLabels.value = [] + relationshipTypeLabels.value = [] workspaceLoadError.value = null workspaceForbidden.value = false workspaceForbiddenReason.value = null @@ -876,35 +1004,39 @@ watch(selectedOpsDataSourceId, () => { <template> <div class="space-y-6"> - <div class="flex items-center justify-between"> - <div class="space-y-1"> - <div class="flex items-center gap-2"> - <h1 class="text-2xl font-semibold tracking-tight">{{ graphHeaderTitle }}</h1> - <Badge v-if="!showOverview" variant="secondary">{{ stepBadgeLabel }}</Badge> - </div> - <p class="text-sm text-muted-foreground"> - <template v-if="showOverview"> - Project workspace for knowledge graph {{ kgId }}. - </template> - <template v-else-if="activeStep === 'graph-management'"> - Conversation-first graph management with shared session and mode-specific workspace panels. - </template> - <template v-else> - Knowledge-graph scoped mutation run visibility and run metrics. - </template> - </p> - </div> - <Button - variant="outline" - size="sm" - @click="showOverview ? navigateTo('/knowledge-graphs') : returnToWorkspaceOverview()" + <template v-if="showOverview"> + <NuxtLink + to="/knowledge-graphs" + class="inline-flex items-center text-sm text-muted-foreground hover:text-foreground" > - <ArrowLeft class="mr-1.5 size-3.5" /> - {{ showOverview ? 'Back to Knowledge Graphs' : 'Back to workspace overview' }} - </Button> - </div> + <ChevronLeft class="mr-1 size-4" /> + Back to Knowledge Graphs + </NuxtLink> + </template> - <Separator /> + <template v-else> + <div class="flex items-center justify-between"> + <div class="space-y-1"> + <div class="flex items-center gap-2"> + <h1 class="text-2xl font-semibold tracking-tight">{{ graphHeaderTitle }}</h1> + <Badge variant="secondary">{{ stepBadgeLabel }}</Badge> + </div> + <p class="text-sm text-muted-foreground"> + <template v-if="activeStep === 'graph-management'"> + Conversation-first graph management with shared session and mode-specific workspace panels. + </template> + <template v-else> + Knowledge-graph scoped mutation run visibility and run metrics. + </template> + </p> + </div> + <Button variant="outline" size="sm" @click="returnToWorkspaceOverview()"> + <ArrowLeft class="mr-1.5 size-3.5" /> + Back to workspace overview + </Button> + </div> + <Separator /> + </template> <div v-if="!hasTenant" class="rounded-lg border border-dashed p-6 text-sm text-muted-foreground"> Select a tenant to manage this workspace. @@ -912,10 +1044,10 @@ watch(selectedOpsDataSourceId, () => { <div v-else-if="workspaceOverviewState.phase === 'loading'" - class="flex items-center gap-2 text-sm text-muted-foreground" + class="flex items-center justify-center gap-2 py-12 text-sm text-muted-foreground" role="status" > - <Loader2 class="size-4 animate-spin" /> + <Loader2 class="size-8 animate-spin" /> {{ workspaceOverviewState.message }} </div> @@ -942,50 +1074,240 @@ watch(selectedOpsDataSourceId, () => { <template v-else-if="statusProjection"> <section v-if="showOverview" class="space-y-6"> - <div> - <h2 class="text-lg font-semibold tracking-tight">Project workspace</h2> - <p class="text-sm text-muted-foreground"> - Choose a step to continue work on this knowledge graph without re-selecting context. - </p> + <div class="flex items-start justify-between gap-4"> + <div class="flex min-w-0 items-center gap-3"> + <Database class="size-8 shrink-0 text-primary" /> + <div class="min-w-0"> + <h2 class="text-2xl font-bold tracking-tight">{{ graphHeaderTitle }}</h2> + <p class="truncate font-mono text-sm text-muted-foreground">{{ kgId }}</p> + <p + v-if="kgIdentity?.description" + class="mt-0.5 text-sm text-muted-foreground" + > + {{ kgIdentity.description }} + </p> + </div> + </div> + <div class="flex shrink-0 items-center gap-2"> + <Button variant="destructive" size="sm" class="gap-1.5" @click="deleteKgDialogOpen = true"> + <Trash2 class="size-4" /> + Delete + </Button> + <Badge :variant="workspaceHubPhaseBadge.variant" class="text-sm"> + {{ workspaceHubPhaseBadge.label }} + </Badge> + </div> </div> - <Card class="border-primary/30 bg-primary/5"> + <Separator /> + + <Card class="border-border"> <CardHeader class="pb-3"> - <CardTitle class="text-base">Suggested next step</CardTitle> - <CardDescription>{{ suggestedNextStep.description }}</CardDescription> + <CardTitle class="text-base">Project workspace</CardTitle> + <CardDescription>{{ workspaceHubDescriptionText }}</CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div + class="flex flex-col gap-3 rounded-lg border p-4 sm:flex-row sm:items-center sm:justify-between" + :class=" + workspaceHubNextStep.primaryPhase + ? 'border-primary/25 bg-primary/5' + : 'border-border bg-muted/40' + " + > + <div class="min-w-0 space-y-1"> + <p + class="text-xs font-semibold uppercase tracking-wide" + :class="workspaceHubNextStep.primaryPhase ? 'text-primary' : 'text-muted-foreground'" + > + {{ workspaceHubNextStep.primaryPhase ? 'Next step' : 'Suggested next step' }} + </p> + <p class="text-sm font-medium leading-snug">{{ workspaceHubNextStep.title }}</p> + <p class="text-sm leading-snug text-muted-foreground">{{ workspaceHubNextStep.description }}</p> + </div> + <Button + as-child + :variant="workspaceHubNextStep.primaryPhase ? 'default' : 'secondary'" + class="w-full shrink-0 sm:w-auto" + > + <NuxtLink :to="workspaceHubNextStep.to" class="inline-flex items-center justify-center gap-2"> + {{ workspaceHubNextStep.label }} + <ArrowRight class="size-4" /> + </NuxtLink> + </Button> + </div> + + <div class="grid gap-2 sm:grid-cols-2 lg:grid-cols-4"> + <template v-for="item in workspaceHubTiles" :key="item.key"> + <NuxtLink + v-if="item.enabled" + :to="item.to" + class="flex flex-col gap-2 rounded-lg border p-4 text-left transition-colors hover:border-primary/50 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" + :class="[ + workspaceHubTileClasses(item), + item.tone === 'success' + ? 'hover:bg-green-500/10 dark:hover:bg-green-950/30' + : 'hover:bg-muted/60', + ]" + > + <div class="flex items-start justify-between gap-2"> + <div class="flex min-w-0 flex-1 items-center gap-2"> + <component + :is="workspaceHubTileIcons[item.key]" + class="size-4 shrink-0" + :class=" + item.tone === 'success' + ? 'text-green-600 dark:text-green-400' + : 'text-primary' + " + /> + <span class="text-sm font-semibold leading-tight">{{ item.title }}</span> + </div> + <div :class="workspaceHubStepBadgeClass(item)"> + <CheckCircle2 v-if="item.done" class="size-4" /> + <span v-else class="text-xs font-bold leading-none">{{ item.step }}</span> + </div> + </div> + <p class="text-xs leading-snug text-muted-foreground">{{ item.subtitle }}</p> + <span + class="text-xs font-medium" + :class=" + item.tone === 'success' + ? 'text-green-700 dark:text-green-400' + : 'text-primary' + " + > + {{ item.linkLabel }} + </span> + </NuxtLink> + <div + v-else + class="flex flex-col gap-2 rounded-lg border border-dashed border-rose-200/80 bg-rose-500/[0.04] p-4 text-left text-muted-foreground dark:border-rose-900/40 dark:bg-rose-950/20" + :title="item.lockedReason || 'Locked'" + > + <div class="flex items-start justify-between gap-2"> + <div class="flex min-w-0 flex-1 items-center gap-2"> + <Lock class="size-4 shrink-0 text-rose-700/70 dark:text-rose-400/80" /> + <span class="text-sm font-semibold leading-tight text-foreground/80">{{ item.title }}</span> + </div> + <div :class="workspaceHubStepBadgeClass(item)"> + <span class="text-xs font-bold leading-none">{{ item.step }}</span> + </div> + </div> + <p class="text-xs leading-snug">{{ item.subtitle }}</p> + <p class="text-xs text-rose-800/90 dark:text-rose-300/90">{{ item.lockedReason }}</p> + </div> + </template> + </div> + </CardContent> + </Card> + + <div class="grid gap-4 md:grid-cols-4"> + <Card> + <CardContent class="flex items-center gap-3 p-4"> + <div class="rounded-md bg-muted p-2"> + <GitBranch class="size-4 text-muted-foreground" /> + </div> + <div> + <div class="text-2xl font-bold">{{ dataSourceCount }}</div> + <p class="text-xs text-muted-foreground">Data Sources</p> + </div> + </CardContent> + </Card> + <Card> + <CardContent class="flex items-center gap-3 p-4"> + <div class="rounded-md bg-muted p-2"> + <Box class="size-4 text-muted-foreground" /> + </div> + <div> + <div class="text-2xl font-bold">{{ entityTypeLabels.length }}</div> + <p class="text-xs text-muted-foreground">Entity Types</p> + </div> + </CardContent> + </Card> + <Card> + <CardContent class="flex items-center gap-3 p-4"> + <div class="rounded-md bg-muted p-2"> + <Link2 class="size-4 text-muted-foreground" /> + </div> + <div> + <div class="text-2xl font-bold">{{ relationshipTypeLabels.length }}</div> + <p class="text-xs text-muted-foreground">Relationship Types</p> + </div> + </CardContent> + </Card> + <Card> + <CardContent class="flex items-center gap-3 p-4"> + <div class="rounded-md bg-muted p-2"> + <FileText class="size-4 text-muted-foreground" /> + </div> + <div> + <div class="text-2xl font-bold">{{ mutationLogRuns.length }}</div> + <p class="text-xs text-muted-foreground">Mutation Runs</p> + </div> + </CardContent> + </Card> + </div> + + <Card> + <CardHeader> + <CardTitle class="text-base">Data Sources</CardTitle> + <CardDescription>Configured repositories for this knowledge graph</CardDescription> </CardHeader> <CardContent> - <Button @click="openWorkspaceStep(suggestedNextStep.stepId)"> - {{ suggestedNextStep.actionLabel }} {{ suggestedNextStep.title }} - </Button> + <div v-if="overviewSourceRows.length === 0" class="text-sm text-muted-foreground"> + No data sources configured yet. + </div> + <div v-else class="space-y-3"> + <div + v-for="source in overviewSourceRows" + :key="source.id" + class="flex items-center justify-between rounded-lg border p-3" + > + <div class="flex min-w-0 items-center gap-3"> + <GitBranch class="size-4 shrink-0 text-muted-foreground" /> + <div class="min-w-0"> + <p class="font-medium">{{ source.name }}</p> + <p class="truncate font-mono text-xs text-muted-foreground">{{ source.url }}</p> + </div> + </div> + <Badge :variant="source.statusVariant">{{ source.status }}</Badge> + </div> + </div> </CardContent> </Card> - <div class="grid gap-4 md:grid-cols-2 xl:grid-cols-4"> - <!-- Step cards: Data Sources, Graph Management, MutationLogs, Maintain --> - <Card - v-for="card in workspaceStepCards" - :key="card.id" - class="flex flex-col" - :class="stepStatusTintClass(card.status)" - > - <CardHeader class="pb-3"> - <div class="flex items-center justify-between gap-2"> - <CardTitle class="text-base">{{ card.title }}</CardTitle> - <Badge variant="outline">{{ card.status }}</Badge> + <div class="grid gap-4 md:grid-cols-2"> + <Card> + <CardHeader> + <CardTitle class="text-base">Entity Types</CardTitle> + <CardDescription>Node types in the knowledge graph ontology</CardDescription> + </CardHeader> + <CardContent> + <div v-if="entityTypeLabels.length === 0" class="text-sm text-muted-foreground"> + No entity types defined yet. + </div> + <div v-else class="flex flex-wrap gap-2"> + <Badge v-for="label in entityTypeLabels" :key="label" variant="outline"> + {{ label }} + </Badge> </div> - <CardDescription>{{ card.statusDetail }}</CardDescription> + </CardContent> + </Card> + <Card> + <CardHeader> + <CardTitle class="text-base">Relationship Types</CardTitle> + <CardDescription>Edge types connecting entities</CardDescription> </CardHeader> - <CardContent class="mt-auto"> - <Button - class="w-full" - variant="outline" - tabindex="0" - @click="openWorkspaceStep(card.id)" - @keydown="onStepActionKeydown($event, card.id)" - > - {{ card.actionLabel }} - </Button> + <CardContent> + <div v-if="relationshipTypeLabels.length === 0" class="text-sm text-muted-foreground"> + No relationship types defined yet. + </div> + <div v-else class="flex flex-wrap gap-2"> + <Badge v-for="label in relationshipTypeLabels" :key="label" variant="outline"> + {{ label }} + </Badge> + </div> </CardContent> </Card> </div> @@ -1638,5 +1960,25 @@ watch(selectedOpsDataSourceId, () => { </div> </section> </template> + + <AlertDialog v-model:open="deleteKgDialogOpen"> + <AlertDialogContent> + <AlertDialogHeader> + <AlertDialogTitle>Delete this knowledge graph?</AlertDialogTitle> + <AlertDialogDescription> + This permanently deletes + <span class="font-medium text-foreground">{{ kgIdentity?.name ?? kgId }}</span> + and its configuration. Data sources and sync history for this graph will be removed. + </AlertDialogDescription> + </AlertDialogHeader> + <AlertDialogFooter> + <AlertDialogCancel :disabled="deletingKg">Cancel</AlertDialogCancel> + <AlertDialogAction :disabled="deletingKg" @click="handleDeleteKnowledgeGraph"> + <Loader2 v-if="deletingKg" class="mr-2 size-4 animate-spin" /> + Delete + </AlertDialogAction> + </AlertDialogFooter> + </AlertDialogContent> + </AlertDialog> </div> </template> diff --git a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts index 3f229ae86..35768bbf0 100644 --- a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts +++ b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts @@ -2,14 +2,15 @@ import { describe, it, expect } from 'vitest' import { readFileSync } from 'fs' import { resolve } from 'path' import { - commitStatusLabel, + hasUnpulledCommits, isIngestionPreparedAtHead, needsIngestionPrepare, prepStatusBadgeVariant, - prepareCommitStatusLabel, + resolveNewestUnpulledCommit, resolvePrepStatusLabel, resolveRepoUrl, shortCommitHash, + unpulledCommitStatusLabel, } from '@/utils/kgDataSourcesCommits' const phase1Vue = readFileSync( @@ -56,6 +57,14 @@ describe('KG data sources phase1 layout', () => { expect(phase1Vue).toContain('Files on branch') expect(phase1Vue).toContain('formatPreparedFileCount') }) + + it('shows unpulled commit columns', () => { + expect(phase1Vue).toContain('Newest unpulled') + expect(phase1Vue).toContain('Last extraction baseline') + expect(phase1Vue).toContain('Ingested at') + expect(phase1Vue).toContain('Branch tip') + expect(phase1Vue).toContain('resolveNewestUnpulledCommit') + }) }) describe('KG wizard parallel ingestion prep', () => { @@ -83,9 +92,15 @@ describe('kgDataSourcesCommits helpers', () => { it('maps sync statuses to prep labels', () => { expect(resolvePrepStatusLabel('ingested')).toBe('Prepared') expect(prepStatusBadgeVariant('ingested')).toBe('success') - expect(commitStatusLabel('abc', 'abc')).toBe('matches branch head') - expect(prepareCommitStatusLabel('abc', 'abc')).toBe('prepared at branch head') + expect( + resolveNewestUnpulledCommit({ + tracked_branch_head_commit: 'remote', + clone_head_commit: 'local', + }), + ).toBe('remote') + expect(unpulledCommitStatusLabel(null, 'remote')).toBe('up to date with branch') expect(needsIngestionPrepare({ tracked_branch_head_commit: 'abc', last_prepared_commit: null })).toBe(true) - expect(isIngestionPreparedAtHead({ tracked_branch_head_commit: 'abc', last_prepared_commit: 'abc' })).toBe(true) + expect(hasUnpulledCommits({ tracked_branch_head_commit: 'abc', clone_head_commit: 'abc' })).toBe(false) + expect(isIngestionPreparedAtHead({ tracked_branch_head_commit: 'abc', clone_head_commit: 'abc' })).toBe(true) }) }) diff --git a/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts b/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts new file mode 100644 index 000000000..858bafad6 --- /dev/null +++ b/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from 'vitest' +import { + buildWorkspaceHubNextStep, + buildWorkspaceHubTiles, + resolveWorkspaceHubPhaseBadge, + workspaceHubDescription, + workspaceHubStepBadgeClass, + workspaceHubTileClasses, +} from '../utils/kgManageWorkspaceHub' + +const baseStatus = { + workspace_mode: 'schema_bootstrap' as const, + transition_eligible: false, + readiness: { + has_minimum_entity_types: false, + has_minimum_relationship_types: false, + prepopulated_types_ready: false, + blocking_reasons: ['Missing entity types'], + }, +} + +const baseInput = { + kgId: 'kg-1', + dataSourceCount: 0, + preparedSourceCount: 0, + maintenanceReadyCount: 0, + mutationLogRunCount: 0, + entityTypeLabels: [] as string[], + relationshipTypeLabels: [] as string[], + workspaceStatus: baseStatus, +} + +describe('kgManageWorkspaceHub', () => { + it('returns four numbered hub tiles in workspace order', () => { + const tiles = buildWorkspaceHubTiles(baseInput) + expect(tiles).toHaveLength(4) + expect(tiles.map((tile) => tile.step)).toEqual([1, 2, 3, 4]) + expect(tiles.map((tile) => tile.key)).toEqual([ + 'data-sources', + 'graph-management', + 'mutation-logs', + 'maintain', + ]) + }) + + it('locks mutation logs and maintain when prerequisites are missing', () => { + const tiles = buildWorkspaceHubTiles(baseInput) + expect(tiles.find((tile) => tile.key === 'mutation-logs')?.enabled).toBe(false) + expect(tiles.find((tile) => tile.key === 'maintain')?.enabled).toBe(false) + }) + + it('marks sources phase complete when all sources are prepared', () => { + const tiles = buildWorkspaceHubTiles({ + ...baseInput, + dataSourceCount: 2, + preparedSourceCount: 2, + }) + const sourcesTile = tiles.find((tile) => tile.key === 'data-sources') + expect(sourcesTile?.done).toBe(true) + expect(sourcesTile?.tone).toBe('success') + expect(resolveWorkspaceHubPhaseBadge({ + ...baseInput, + dataSourceCount: 2, + preparedSourceCount: 2, + }).label).toBe('Design') + }) + + it('builds a primary next-step CTA while sources phase is incomplete', () => { + const next = buildWorkspaceHubNextStep(baseInput) + expect(next.primaryPhase).toBe(true) + expect(next.title).toBe('Data Sources') + expect(next.label).toContain('Open') + }) + + it('maps tile tones to k-extract style surface classes', () => { + expect(workspaceHubTileClasses({ enabled: true, highlight: false, tone: 'success' })).toContain('green') + expect(workspaceHubTileClasses({ enabled: true, highlight: true, tone: 'primary' })).toContain('primary') + expect(workspaceHubStepBadgeClass({ enabled: true, done: true, tone: 'success' })).toContain('green-500') + }) + + it('describes workspace guidance by phase', () => { + expect(workspaceHubDescription(baseInput)).toContain('Data sources') + expect(workspaceHubDescription({ + ...baseInput, + dataSourceCount: 1, + preparedSourceCount: 1, + })).toContain('Design') + }) +}) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index a7e3b1e24..77b270354 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -42,6 +42,10 @@ const sharedConversationPanelVue = readFileSync( resolve(__dirname, '../components/extraction/SharedConversationPanel.vue'), 'utf-8', ) +const manageWorkspaceHubTs = readFileSync( + resolve(__dirname, '../utils/kgManageWorkspaceHub.ts'), + 'utf-8', +) const baseWorkspaceStatus = { workspace_mode: 'schema_bootstrap' as const, @@ -184,13 +188,18 @@ describe('KG-MANAGE-001 - manage entry navigation', () => { }) }) -describe('KG-MANAGE-002 - workspace step card set', () => { - it('renders Project workspace section with exactly four step cards', () => { +describe('KG-MANAGE-002 - workspace hub tile set', () => { + it('renders Project workspace section with hub tiles and stats', () => { expect(manageWorkspaceVue).toContain('Project workspace') - expect(manageWorkspaceVue).toContain('workspaceStepCards') - for (const stepId of WORKSPACE_STEP_ORDER) { - expect(manageWorkspaceVue).toContain(WORKSPACE_STEP_TITLES[stepId]) - } + expect(manageWorkspaceVue).toContain('workspaceHubTiles') + expect(manageWorkspaceVue).toContain('workspaceHubTileClasses') + expect(manageWorkspaceVue).toContain('Entity Types') + expect(manageWorkspaceVue).toContain('Relationship Types') + expect(manageWorkspaceVue).toContain('Mutation Runs') + expect(manageWorkspaceHubTs).toContain('Data sources') + expect(manageWorkspaceHubTs).toContain('Design') + expect(manageWorkspaceHubTs).toContain('Mutation logs') + expect(manageWorkspaceHubTs).toContain('Maintain') }) it('buildWorkspaceStepCards returns the canonical four-card set', () => { @@ -212,10 +221,10 @@ describe('KG-MANAGE-002 - workspace step card set', () => { }) describe('KG-MANAGE-003 - suggested next step callout', () => { - it('renders Suggested next step callout above the card grid', () => { + it('renders next-step callout in the workspace hub card', () => { expect(manageWorkspaceVue).toContain('Suggested next step') - expect(manageWorkspaceVue).toContain('suggestedNextStep') - expect(manageWorkspaceVue).toContain('openWorkspaceStep') + expect(manageWorkspaceVue).toContain('workspaceHubNextStep') + expect(manageWorkspaceVue).toContain('Next step') }) it('prioritizes data sources when no sources are connected', () => { @@ -254,15 +263,16 @@ describe('KG-MANAGE-003 - suggested next step callout', () => { }) }) -describe('KG-MANAGE-004 - step card status semantics', () => { - it('renders status label, tint, detail text, and primary action per card', () => { - expect(manageWorkspaceVue).toContain('stepStatusTintClass') - expect(manageWorkspaceVue).toContain('card.status') - expect(manageWorkspaceVue).toContain('card.statusDetail') - expect(manageWorkspaceVue).toContain('card.actionLabel') +describe('KG-MANAGE-004 - workspace hub tile semantics', () => { + it('renders hub tile classes, badges, subtitles, and link labels', () => { + expect(manageWorkspaceVue).toContain('workspaceHubTileClasses') + expect(manageWorkspaceVue).toContain('workspaceHubStepBadgeClass') + expect(manageWorkspaceVue).toContain('item.subtitle') + expect(manageWorkspaceVue).toContain('item.linkLabel') + expect(manageWorkspaceVue).toContain('item.lockedReason') }) - it('maps each status label to a tint class', () => { + it('maps each status label to a tint class in graph-management rail', () => { expect(stepStatusTintClass('ready')).toContain('emerald') expect(stepStatusTintClass('in_progress')).toContain('blue') expect(stepStatusTintClass('needs_attention')).toContain('amber') @@ -540,10 +550,10 @@ describe('KG-MANAGE-017 - chat input keyboard contract', () => { }) describe('KG-MANAGE-018 - keyboard operable step and rail actions', () => { - it('supports keyboard activation for step card primary actions', () => { - expect(manageWorkspaceVue).toContain('onStepActionKeydown') - expect(manageWorkspaceVue).toContain('handleActivatableKeydown') - expect(manageWorkspaceVue).toContain('@keydown="onStepActionKeydown($event, card.id)"') + it('uses native links for workspace hub tiles', () => { + expect(manageWorkspaceVue).toContain('workspaceHubTiles') + expect(manageWorkspaceVue).toContain('<NuxtLink') + expect(manageWorkspaceVue).toContain('focus-visible:ring-2 focus-visible:ring-ring') }) it('supports keyboard activation for graph management rail selection', () => { diff --git a/src/dev-ui/app/utils/kgDataSourcesCommits.ts b/src/dev-ui/app/utils/kgDataSourcesCommits.ts index 939b55f2e..a5ea69801 100644 --- a/src/dev-ui/app/utils/kgDataSourcesCommits.ts +++ b/src/dev-ui/app/utils/kgDataSourcesCommits.ts @@ -23,31 +23,65 @@ export function commitStatusLabel( return current === remote ? 'matches branch head' : 'new commits on branch' } -export function prepareCommitStatusLabel( - prepared: string | null | undefined, - tracked: string | null | undefined, -): string { - if (!tracked) return 'branch head unknown' - if (!prepared) return 'not prepared yet' - return prepared === tracked ? 'prepared at branch head' : 'new commits to prepare' +/** Commit we have ingested (local HEAD after pull/prepare). */ +export function resolveIngestedHeadCommit(ds: { + clone_head_commit?: string | null + last_prepared_commit?: string | null + ingested_head_commit?: string | null +}): string | null { + if (ds.ingested_head_commit) return ds.ingested_head_commit + return ds.clone_head_commit ?? ds.last_prepared_commit ?? null } -export function needsIngestionPrepare(ds: { - last_prepared_commit?: string | null +/** Remote branch tip from last check (what git pull would reach). */ +export function resolveBranchTipCommit(ds: { tracked_branch_head_commit?: string | null -}): boolean { - const tracked = ds.tracked_branch_head_commit - if (!tracked) return false - return ds.last_prepared_commit !== tracked +}): string | null { + return ds.tracked_branch_head_commit ?? null } -export function isIngestionPreparedAtHead(ds: { - last_prepared_commit?: string | null +/** + * Newest commit on the branch we do not have yet. + * When never ingested, the whole branch tip is unpulled. + */ +export function resolveNewestUnpulledCommit(ds: { + newest_unpulled_commit?: string | null tracked_branch_head_commit?: string | null -}): boolean { - const tracked = ds.tracked_branch_head_commit - const prepared = ds.last_prepared_commit - return !!tracked && !!prepared && prepared === tracked + clone_head_commit?: string | null + last_prepared_commit?: string | null + ingested_head_commit?: string | null +}): string | null { + if (ds.newest_unpulled_commit !== undefined) { + return ds.newest_unpulled_commit + } + const tip = resolveBranchTipCommit(ds) + if (!tip) return null + const ingested = resolveIngestedHeadCommit(ds) + if (!ingested) return tip + return ingested === tip ? null : tip +} + +export function hasUnpulledCommits(ds: Parameters<typeof resolveNewestUnpulledCommit>[0]): boolean { + return resolveNewestUnpulledCommit(ds) !== null +} + +export function unpulledCommitStatusLabel( + unpulled: string | null | undefined, + branchTip: string | null | undefined, +): string { + if (!branchTip) return 'check branch to see remote tip' + if (!unpulled) return 'up to date with branch' + return 'new commit on branch (not ingested yet)' +} + +export function needsIngestionPrepare(ds: Parameters<typeof hasUnpulledCommits>[0]): boolean { + return hasUnpulledCommits(ds) +} + +export function isIngestionPreparedAtHead(ds: Parameters<typeof hasUnpulledCommits>[0]): boolean { + const tip = resolveBranchTipCommit(ds) + const ingested = resolveIngestedHeadCommit(ds) + return !!tip && !!ingested && ingested === tip } export function formatPreparedFileCount(count: number | null | undefined): string { diff --git a/src/dev-ui/app/utils/kgManageWorkspaceHub.ts b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts new file mode 100644 index 000000000..3566df4ed --- /dev/null +++ b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts @@ -0,0 +1,267 @@ +import { cn } from '@/lib/utils' +import { + buildManageStepUrl, + buildSuggestedNextStep, + buildWorkspaceStepCards, + resolveStepDestination, + type SuggestedNextStepView, + type WorkspaceOverviewInputs, + type WorkspaceStepId, +} from '@/utils/kgManageWorkspace' + +export type WorkspaceHubTone = 'success' | 'warning' | 'primary' | 'muted' + +export interface WorkspaceHubTile { + step: number + key: WorkspaceStepId + title: string + subtitle: string + to: string + enabled: boolean + lockedReason: string | null + highlight: boolean + tone: WorkspaceHubTone + linkLabel: string + done: boolean +} + +export interface WorkspaceHubPhaseBadge { + label: string + variant: 'default' | 'secondary' | 'success' | 'warning' +} + +export interface WorkspaceHubOverview extends WorkspaceOverviewInputs { + preparedSourceCount: number + entityTypeLabels: string[] + relationshipTypeLabels: string[] +} + +export interface WorkspaceHubSourceRow { + id: string + name: string + url: string + status: string + statusVariant: 'success' | 'secondary' | 'outline' +} + +function sourcesPhaseComplete(input: WorkspaceHubOverview): boolean { + return input.dataSourceCount > 0 && input.preparedSourceCount === input.dataSourceCount +} + +function designPhaseComplete(input: WorkspaceHubOverview): boolean { + return ( + input.workspaceStatus?.workspace_mode === 'extraction_operations' + || input.workspaceStatus?.transition_eligible === true + ) +} + +export function resolveWorkspaceHubPhaseBadge(input: WorkspaceHubOverview): WorkspaceHubPhaseBadge { + if (designPhaseComplete(input)) { + return { label: 'Operations', variant: 'success' } + } + if (sourcesPhaseComplete(input)) { + return { label: 'Design', variant: 'warning' } + } + return { label: 'Data sources', variant: 'secondary' } +} + +export function resolveSuggestedWorkspaceKey(input: WorkspaceHubOverview): WorkspaceStepId { + if (!sourcesPhaseComplete(input)) return 'data-sources' + if (!designPhaseComplete(input)) return 'graph-management' + if (input.maintenanceReadyCount > 0) return 'maintain' + if (input.mutationLogRunCount === 0) return 'graph-management' + return 'mutation-logs' +} + +export function buildWorkspaceHubTiles(input: WorkspaceHubOverview): WorkspaceHubTile[] { + const cards = buildWorkspaceStepCards(input) + const cardById = Object.fromEntries(cards.map((c) => [c.id, c])) as Record< + WorkspaceStepId, + (typeof cards)[number] + > + const highlightKey = resolveSuggestedWorkspaceKey(input) + const sourcesDone = sourcesPhaseComplete(input) + const designDone = designPhaseComplete(input) + + const dsCard = cardById['data-sources'] + const gmCard = cardById['graph-management'] + const mlCard = cardById['mutation-logs'] + const maintainCard = cardById.maintain + + const toneFor = ( + step: number, + done: boolean, + enabled: boolean, + cardStatus: (typeof cards)[number]['status'], + ): WorkspaceHubTone => { + if (done) return 'success' + if (!enabled) return 'muted' + if (cardStatus === 'needs_attention') return 'warning' + if (highlightKey === (['data-sources', 'graph-management', 'mutation-logs', 'maintain'] as const)[step - 1]) { + return 'primary' + } + return 'muted' + } + + const linkLabelFor = (action: (typeof cards)[number]['actionLabel'], done: boolean) => + action === 'Revisit' || done ? 'Revisit →' : action === 'Run' ? 'Run →' : 'Open →' + + return [ + { + step: 1, + key: 'data-sources', + title: 'Data sources', + subtitle: sourcesDone + ? `${input.dataSourceCount} source${input.dataSourceCount === 1 ? '' : 's'} · ingestion ready` + : input.dataSourceCount > 0 + ? `${input.preparedSourceCount}/${input.dataSourceCount} prepared · finish ingestion` + : 'Connect repositories and prepare ingestion context', + to: resolveStepDestination(input.kgId, 'data-sources', { + dataSourceCount: input.dataSourceCount, + }), + enabled: true, + lockedReason: null, + highlight: highlightKey === 'data-sources', + tone: toneFor(1, sourcesDone, true, dsCard.status), + linkLabel: linkLabelFor(dsCard.actionLabel, sourcesDone), + done: sourcesDone, + }, + { + step: 2, + key: 'graph-management', + title: 'Design', + subtitle: designDone + ? 'Schema validated · extraction operations available' + : sourcesDone + ? 'Design assistant, schema bootstrap, and validation' + : 'Open anytime; prepare data sources to clear later gates', + to: resolveStepDestination(input.kgId, 'graph-management'), + enabled: true, + lockedReason: null, + highlight: highlightKey === 'graph-management', + tone: toneFor(2, designDone, true, gmCard.status), + linkLabel: linkLabelFor(gmCard.actionLabel, designDone), + done: designDone, + }, + { + step: 3, + key: 'mutation-logs', + title: 'Mutation logs', + subtitle: input.mutationLogRunCount > 0 + ? `${input.mutationLogRunCount} run${input.mutationLogRunCount === 1 ? '' : 's'} recorded` + : 'Review extraction and apply runs', + to: resolveStepDestination(input.kgId, 'mutation-logs'), + enabled: input.dataSourceCount > 0, + lockedReason: input.dataSourceCount > 0 ? null : 'Connect a data source before reviewing runs.', + highlight: highlightKey === 'mutation-logs', + tone: toneFor(3, input.mutationLogRunCount > 0, input.dataSourceCount > 0, mlCard.status), + linkLabel: linkLabelFor(mlCard.actionLabel, input.mutationLogRunCount > 0), + done: input.mutationLogRunCount > 0, + }, + { + step: 4, + key: 'maintain', + title: 'Maintain', + subtitle: input.maintenanceReadyCount > 0 + ? `${input.maintenanceReadyCount} source${input.maintenanceReadyCount === 1 ? '' : 's'} need maintenance` + : 'Incremental graph updates from new commits', + to: resolveStepDestination(input.kgId, 'maintain'), + enabled: designDone, + lockedReason: designDone ? null : 'Complete design validation before maintenance.', + highlight: highlightKey === 'maintain', + tone: toneFor(4, maintainCard.status === 'ready' && input.maintenanceReadyCount === 0, designDone, maintainCard.status), + linkLabel: linkLabelFor(maintainCard.actionLabel, maintainCard.status === 'ready' && input.maintenanceReadyCount === 0), + done: maintainCard.status === 'ready' && input.maintenanceReadyCount === 0 && input.dataSourceCount > 0, + }, + ] +} + +export function buildWorkspaceHubNextStep(input: WorkspaceHubOverview): { + to: string + title: string + description: string + label: string + primaryPhase: boolean +} { + const next = buildSuggestedNextStep(input) + const actionWord = + next.actionLabel === 'Run' + ? 'Run' + : next.actionLabel === 'Revisit' + ? 'Revisit' + : 'Open' + return { + to: resolveStepDestination(input.kgId, next.stepId, { + dataSourceCount: input.dataSourceCount, + }), + title: next.title, + description: next.description, + label: `${actionWord} ${next.title}`, + primaryPhase: !sourcesPhaseComplete(input), + } +} + +export function workspaceHubTileClasses(item: { + enabled: boolean + highlight: boolean + tone: WorkspaceHubTone +}): string { + if (!item.enabled) return '' + const { tone, highlight } = item + if (tone === 'success') { + return cn( + 'border-green-500/35 bg-green-500/5 dark:border-green-500/25 dark:bg-green-950/20', + highlight && 'ring-1 ring-green-500/30', + ) + } + if (tone === 'warning') { + return cn( + 'border-amber-500/40 bg-amber-500/5 dark:border-amber-500/30 dark:bg-amber-950/25', + highlight && 'ring-1 ring-amber-500/25', + ) + } + if (tone === 'primary') { + return cn( + 'border-primary/45 bg-primary/10 ring-1 ring-primary/20', + highlight && 'ring-2 ring-primary/35', + ) + } + return cn( + 'border-border bg-card', + highlight && 'border-primary/50 bg-primary/10 ring-1 ring-primary/20', + ) +} + +export function workspaceHubStepBadgeClass(item: { + enabled: boolean + done: boolean + tone: WorkspaceHubTone +}): string { + if (!item.enabled) { + return 'flex size-7 shrink-0 items-center justify-center rounded-full bg-muted text-xs font-bold text-muted-foreground' + } + if (item.done) { + return 'flex size-7 shrink-0 items-center justify-center rounded-full bg-green-500 text-white' + } + if (item.tone === 'warning') { + return 'flex size-7 shrink-0 items-center justify-center rounded-full bg-amber-600 text-white dark:bg-amber-500' + } + if (item.tone === 'primary') { + return 'flex size-7 shrink-0 items-center justify-center rounded-full bg-primary text-xs font-bold text-primary-foreground' + } + return 'flex size-7 shrink-0 items-center justify-center rounded-full bg-muted text-xs font-bold text-muted-foreground' +} + +export function workspaceHubDescription(input: WorkspaceHubOverview): string { + if (!sourcesPhaseComplete(input)) { + return 'Finish ingestion under Data sources, then continue through Design. Green tiles mark completed gates; the highlighted tile is your current focus.' + } + if (!designPhaseComplete(input)) { + return 'Use Design for the assistant and schema bootstrap. Green tiles use Revisit; the highlighted tile is your suggested next step.' + } + return 'Continue with mutation logs or maintenance, or Revisit any completed step below.' +} + +export function buildManageOverviewUrl(kgId: string): string { + return buildManageStepUrl(kgId) +} From 81e059526c170d53052743708b8883fe626d8af2 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Thu, 28 May 2026 22:37:27 -0400 Subject: [PATCH 060/153] docs(extraction): sticky session chat specs (#738) (#743) * feat(ui): align graph management step with k-extract phase2 layout Rework the design chat, schema/session panels, and mode switcher with locked extraction modes until the workspace transitions to extraction operations. Co-authored-by: Cursor <cursoragent@cursor.com> * fix(ui): rename graph management chat title to Graph Management Assistant Co-authored-by: Cursor <cursoragent@cursor.com> * docs(extraction): specify sticky session chat turns and runtime Document Graph Management chat as NDJSON streaming turns inside sticky Claude Agent SDK containers with JobPackage gating and UI mode skills. Closes #738 Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/extraction/agent-sessions.spec.md | 14 + specs/extraction/chat-turns.spec.md | 81 ++++ specs/extraction/operations.spec.md | 19 + .../extraction/sticky-session-runtime.spec.md | 50 ++ specs/index.spec.md | 2 + specs/nfr/workload-execution.spec.md | 14 + .../extraction/SharedConversationPanel.vue | 351 ++++++++++---- .../pages/knowledge-graphs/[kgId]/manage.vue | 451 ++++++++++-------- .../kg-graph-management-artifacts.test.ts | 40 ++ .../tests/kg-graph-management-modes.test.ts | 47 ++ .../knowledge-graph-manage-workspace.test.ts | 47 +- src/dev-ui/app/utils/kgGraphManagement.ts | 34 ++ .../app/utils/kgGraphManagementArtifacts.ts | 55 +++ 13 files changed, 900 insertions(+), 305 deletions(-) create mode 100644 specs/extraction/chat-turns.spec.md create mode 100644 specs/extraction/sticky-session-runtime.spec.md create mode 100644 src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts create mode 100644 src/dev-ui/app/tests/kg-graph-management-modes.test.ts create mode 100644 src/dev-ui/app/utils/kgGraphManagementArtifacts.ts diff --git a/specs/extraction/agent-sessions.spec.md b/specs/extraction/agent-sessions.spec.md index 020b1f0f7..4f2e44a7b 100644 --- a/specs/extraction/agent-sessions.spec.md +++ b/specs/extraction/agent-sessions.spec.md @@ -27,6 +27,20 @@ The system SHALL keep sessions active until explicit reset. - WHEN the user sends follow-up messages over time - THEN prior session context remains available for continued conversation +#### Scenario: Chat turn persistence +- GIVEN a completed graph-management chat turn +- WHEN the assistant reply is emitted +- THEN user and assistant messages are persisted on the session +- AND sticky runtime metadata is updated on the session runtime context + +### Requirement: Sticky Runtime Association +The system SHALL associate active sessions with sticky container runtime leases. + +#### Scenario: Runtime metadata on session +- GIVEN a chat turn starts a or reuses a sticky container +- WHEN the turn is accepted +- THEN session runtime context records sticky container identity and status + ### Requirement: Clear Chat Reset The system SHALL provide an explicit "Clear chat" action that resets runtime context. diff --git a/specs/extraction/chat-turns.spec.md b/specs/extraction/chat-turns.spec.md new file mode 100644 index 000000000..f5679f9f9 --- /dev/null +++ b/specs/extraction/chat-turns.spec.md @@ -0,0 +1,81 @@ +# Chat Turns + +## Purpose +Graph Management chat turns orchestrate conversational extraction agent workloads inside sticky session containers. Each turn persists user and assistant messages, streams transparent activity to the UI, and gates execution until ingestion context (JobPackage) is available when required by the active graph-management mode. + +## Requirements + +### Requirement: Sticky Session Container Execution +The system SHALL execute graph-management chat turns in a sticky session container assigned to the active extraction agent session. + +#### Scenario: Reuse sticky runtime across turns +- GIVEN an active extraction agent session with a running sticky container +- WHEN the user sends a follow-up chat message +- THEN the same sticky container lease is reused until clear-chat, timeout, or reset + +#### Scenario: Start sticky runtime on first turn +- GIVEN an active session without a sticky container lease +- WHEN the user sends the first chat message +- THEN the system starts a sticky session container for that session scope +- AND records container identity in session runtime context + +### Requirement: JobPackage Context in Sticky Runtime +The system SHALL load ingestion context from JobPackage archives into the sticky session container when JobPackage access is required. + +#### Scenario: JobPackage required for extraction jobs mode +- GIVEN graph-management UI mode `Extraction Jobs` +- AND at least one data source exists for the knowledge graph +- WHEN JobPackage context is not yet prepared for all tracked sources +- THEN the chat turn enters a wait state instead of invoking the agent +- AND the UI receives wait-phase activity explaining that ingestion context is pending + +#### Scenario: JobPackage ready +- GIVEN graph-management UI mode `Extraction Jobs` +- AND prepared ingestion context exists for the knowledge graph +- WHEN the user sends a chat message +- THEN JobPackage material is available to the sticky container agent runtime +- AND the agent turn proceeds normally + +#### Scenario: Schema design without JobPackage gate +- GIVEN graph-management UI mode `Initial Schema Design` +- WHEN the user sends a chat message +- THEN JobPackage readiness is not required to start the agent turn +- AND schema-bootstrap skills remain primary framing + +### Requirement: Mode-Aware Skill Framing +The system SHALL resolve agent skills using workspace session mode and graph-management UI mode. + +#### Scenario: Three UI mode skill overlays +- GIVEN graph-management UI modes `Initial Schema Design`, `Extraction Jobs`, and `One-off Mutations` +- WHEN a chat turn starts +- THEN skill framing reflects the selected UI mode +- AND global templates plus knowledge-graph overrides still apply underneath + +### Requirement: Streaming Chat Turn Contract +The system SHALL expose chat turns over an NDJSON streaming HTTP endpoint. + +#### Scenario: Thinking transparency +- GIVEN an in-progress chat turn +- WHEN the agent performs preparatory work +- THEN the stream emits `thinking` events with recent activity lines for UI display + +#### Scenario: Wait transparency +- GIVEN JobPackage context is required but unavailable +- WHEN the user sends a chat message +- THEN the stream emits a `wait` event with phase `awaiting_job_package` +- AND completes with an assistant explanation of the wait condition + +#### Scenario: Successful completion +- GIVEN an agent turn completes successfully +- WHEN the stream finishes +- THEN a terminal `done` event includes the assistant reply +- AND user and assistant messages are persisted on the session + +### Requirement: Clear Chat Resets Runtime +The system SHALL reset sticky session runtime when clear-chat is invoked. + +#### Scenario: Clear chat terminates sticky container +- GIVEN an active session with sticky runtime state +- WHEN the user clicks Clear chat +- THEN the sticky container is reset +- AND a new clean session is started for the same scope diff --git a/specs/extraction/operations.spec.md b/specs/extraction/operations.spec.md index 40110e1ee..adb760f35 100644 --- a/specs/extraction/operations.spec.md +++ b/specs/extraction/operations.spec.md @@ -20,6 +20,25 @@ The system SHALL provide different default skill sets for bootstrap and extracti - THEN the default skill set is extraction-job-setup and minor-direct-edit oriented - AND schema edit skills remain available but are not the primary framing +### Requirement: Graph Management UI Mode Overlays +The system SHALL apply graph-management UI mode overlays on top of workspace session mode skills. + +#### Scenario: Initial schema design overlay +- GIVEN graph-management UI mode `Initial Schema Design` +- WHEN a chat turn resolves skills +- THEN schema bootstrap and validation guidance is primary + +#### Scenario: Extraction jobs overlay +- GIVEN graph-management UI mode `Extraction Jobs` +- WHEN a chat turn resolves skills +- THEN extraction job setup and sync-run guidance is primary +- AND JobPackage readiness is required before agent execution + +#### Scenario: One-off mutations overlay +- GIVEN graph-management UI mode `One-off Mutations` +- WHEN a chat turn resolves skills +- THEN scoped JSONL mutation authoring guidance is primary + ### Requirement: Skill Resolution Model The system SHALL resolve agent skills using global templates with knowledge-graph overrides. diff --git a/specs/extraction/sticky-session-runtime.spec.md b/specs/extraction/sticky-session-runtime.spec.md new file mode 100644 index 000000000..b287c1948 --- /dev/null +++ b/specs/extraction/sticky-session-runtime.spec.md @@ -0,0 +1,50 @@ +# Sticky Session Runtime + +## Purpose +Sticky session runtimes host long-lived Claude Agent SDK workloads for Graph Management chat. Each active extraction agent session receives an isolated container with mounted skills, scoped credentials, and optional JobPackage materialization for repository access. + +## Requirements + +### Requirement: Isolated Sticky Container per Session +The system SHALL run each active graph-management chat session in an isolated container. + +#### Scenario: Session-scoped isolation +- GIVEN two users with active sessions on the same knowledge graph +- WHEN both send chat messages +- THEN each session uses a distinct sticky container lease +- AND container labels include session, user, knowledge graph, and mode identifiers + +### Requirement: Claude Agent SDK Runtime +The system SHALL host Claude Agent SDK agent instances inside sticky session containers. + +#### Scenario: Agent runtime image +- GIVEN a sticky session container starts +- WHEN the container initializes +- THEN it runs an agent runtime process capable of Claude Agent SDK execution +- AND is distinct from ephemeral JobPackage worker containers used for sync extraction + +### Requirement: Skills Directory Mount +The system SHALL mount the platform skills directory into sticky session containers. + +#### Scenario: Skills available at runtime +- GIVEN a sticky session container starts +- WHEN the agent runtime initializes +- THEN SKILL.md resources from the platform skills directory are readable inside the container + +### Requirement: JobPackage Materialization +The system SHALL materialize JobPackage archives into sticky session containers when ingestion context is required. + +#### Scenario: Repository files available +- GIVEN JobPackage context is ready for the knowledge graph +- WHEN a sticky container starts or refreshes ingestion context +- THEN manifest, changeset, content, and reconstructed repository files are available under the session work directory +- AND the agent can inspect data-source content without leaving the container + +### Requirement: Scoped Runtime Credentials +The system SHALL inject short-lived credentials into sticky session containers using least-privilege tenant and knowledge-graph scope. + +#### Scenario: Credential injection at start +- GIVEN a sticky session container is started +- WHEN runtime credentials are issued +- THEN credentials are injected as environment variables or runtime files +- AND credentials are never persisted in mutation logs or session message history diff --git a/specs/index.spec.md b/specs/index.spec.md index 5cce73fa4..cee23c82a 100644 --- a/specs/index.spec.md +++ b/specs/index.spec.md @@ -65,6 +65,8 @@ AI-assisted schema and extraction workflows that emit MutationLogs for Graph app |------|-------| | [Operations](extraction/operations.spec.md) | Mode-specific agent operations and mutation-log production | | [Agent Sessions](extraction/agent-sessions.spec.md) | Session lifecycle, reset behavior, and session metrics | +| [Chat Turns](extraction/chat-turns.spec.md) | Graph-management chat streaming, wait states, and turn persistence | +| [Sticky Session Runtime](extraction/sticky-session-runtime.spec.md) | Isolated sticky containers, JobPackage context, Claude Agent SDK runtime | ### [Shared Kernel](shared-kernel/) — Cross-Cutting Contracts Capabilities shared across bounded contexts. diff --git a/specs/nfr/workload-execution.spec.md b/specs/nfr/workload-execution.spec.md index ce2149997..ed11899c5 100644 --- a/specs/nfr/workload-execution.spec.md +++ b/specs/nfr/workload-execution.spec.md @@ -57,3 +57,17 @@ The system SHALL provide required runtime context in workload containers. - THEN ingestion context resources and repository files needed for processing are available - AND the skills directory is available to the agent runtime +#### Scenario: Sticky session Claude agent runtime +- GIVEN a sticky session container for graph-management chat +- WHEN the container starts +- THEN it hosts a Claude Agent SDK agent instance isolated from the API process +- AND JobPackage material may be mounted when ingestion context is required for the active graph-management mode + +### Requirement: Graph Management UI Mode Skills +The system SHALL expose graph-management UI mode skill overlays in addition to workspace session mode skills. + +#### Scenario: UI mode overlays +- GIVEN graph-management modes `Initial Schema Design`, `Extraction Jobs`, and `One-off Mutations` +- WHEN skill instructions are resolved for a chat turn +- THEN UI mode overlays adjust assistant framing while preserving workspace session mode guardrails + diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index bbe2edcf5..2fb7482ab 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -1,9 +1,8 @@ <script setup lang="ts"> -import { computed, nextTick, ref, watch } from 'vue' -import { Loader2, RefreshCw, SendHorizontal } from 'lucide-vue-next' +import { computed, nextTick, onMounted, ref, watch } from 'vue' +import { Bot, Loader2, RefreshCw, RotateCcw, Send, Sparkles, User } from 'lucide-vue-next' import { Button } from '@/components/ui/button' import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' -import { Textarea } from '@/components/ui/textarea' import { AlertDialog, AlertDialogAction, @@ -14,7 +13,6 @@ import { AlertDialogHeader, AlertDialogTitle, } from '@/components/ui/alert-dialog' -import { handleChatInputKeydown } from '@/utils/kgManageState' interface ConversationEntry { role?: string @@ -41,6 +39,9 @@ const props = withDefaults(defineProps<{ inputDisabledReason?: string | null forbidden?: boolean forbiddenReason?: string | null + title?: string + description?: string + footerHint?: string }>(), { loading: false, clearing: false, @@ -53,6 +54,11 @@ const props = withDefaults(defineProps<{ inputDisabledReason: null, forbidden: false, forbiddenReason: null, + title: 'Graph Management Assistant', + description: + 'Design and refine schema readiness, validation, and extraction operations for this knowledge graph. Use the assistant below to drive workspace changes.', + footerHint: + 'Use Schema & artifacts and Session pointers below to inspect workspace state; send notes or questions here.', }) const emit = defineEmits<{ @@ -63,33 +69,80 @@ const emit = defineEmits<{ }>() const clearConfirmOpen = ref(false) -const timelineRef = ref<HTMLElement | null>(null) +const chatScrollRef = ref<HTMLElement | null>(null) +const textareaRef = ref<HTMLTextAreaElement | null>(null) +const composerInputId = 'graph-management-chat-input' const messageHistory = computed(() => props.session?.message_history ?? []) -const activityTimeline = computed(() => props.activityLines) - -const combinedTimelineLength = computed( - () => messageHistory.value.length + activityTimeline.value.length, -) const chatInputDisabled = computed( () => props.loading || props.clearing || props.sending || props.inputDisabled || props.forbidden, ) -const chatInputHelp = computed(() => { - if (props.forbidden) { - return props.forbiddenReason ?? 'Chat is unavailable because you lack permission for this action.' - } - if (props.inputDisabledReason) return props.inputDisabledReason - return 'Press Enter to send. Shift+Enter adds a new line.' +const thinkingDisplaySlots = computed(() => { + const src = props.activityLines.filter(Boolean) + if (src.length === 0) return [''] + return src.slice(-3) }) -watch(combinedTimelineLength, async () => { - await nextTick() - if (timelineRef.value) { - timelineRef.value.scrollTop = timelineRef.value.scrollHeight +function isUserRole(role: string | undefined): boolean { + return role === 'user' || role === 'human' +} + +function messageText(entry: ConversationEntry): string { + return entry.content ?? entry.message ?? '(empty)' +} + +function scrollToBottom() { + const el = chatScrollRef.value + if (el) { + el.scrollTop = el.scrollHeight } -}) +} + +function adjustTextareaHeight() { + const el = textareaRef.value + if (!el) return + const lh = parseFloat(getComputedStyle(el).lineHeight) + const line = Number.isFinite(lh) && lh > 0 ? lh : 21 + const minH = Math.round(line * 2.5) + const maxH = Math.round(line * 14) + el.style.height = '0' + const scrollH = el.scrollHeight + const h = Math.min(Math.max(scrollH, minH), maxH) + el.style.height = `${h}px` + el.style.overflowY = scrollH > maxH ? 'auto' : 'hidden' +} + +function handleComposerEnter(event: KeyboardEvent) { + if (event.shiftKey) return + if (chatInputDisabled.value || !props.draftMessage.trim()) return + event.preventDefault() + sendDraftMessage() +} + +function renderAssistantHtml(text: string): string { + let s = text.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>') + s = s.replace(/\*\*([^*]+)\*\*/g, '<strong class="font-semibold text-foreground">$1</strong>') + s = s.replace( + /`([^`]+)`/g, + '<code class="rounded bg-muted px-1 py-0.5 text-xs font-mono text-foreground">$1</code>', + ) + s = s.replace( + /^> (.+)$/gm, + '<p class="my-2 border-l-2 border-amber-500/60 pl-3 text-sm text-muted-foreground italic">$1</p>', + ) + s = s.replace( + /\[([^\]]+)\]\(([^)]+)\)/g, + '<a class="text-primary font-medium underline underline-offset-2 hover:text-primary/90" href="$2">$1</a>', + ) + s = s.replace(/## (.+)$/gm, '<h3 class="text-base font-semibold mt-3 mb-1 text-foreground">$1</h3>') + s = s.replace(/### (.+)$/gm, '<h4 class="text-sm font-semibold mt-2 text-foreground">$1</h4>') + s = s.replace(/^---$/gm, '<hr class="my-3 border-border" />') + s = s.replace(/\n\n+/g, '<br /><br />') + s = s.replace(/\n/g, '<br />') + return s +} function confirmClearChat() { clearConfirmOpen.value = false @@ -101,109 +154,227 @@ function sendDraftMessage() { if (!trimmed || chatInputDisabled.value) return emit('sendMessage', trimmed) emit('update:draftMessage', '') + void nextTick(() => adjustTextareaHeight()) } -function onChatInputKeydown(event: KeyboardEvent) { - handleChatInputKeydown(event, sendDraftMessage) -} +watch( + () => [messageHistory.value.length, props.activityLines.length, props.sending], + async () => { + await nextTick() + scrollToBottom() + }, +) + +watch( + () => props.draftMessage, + () => { + void nextTick(() => adjustTextareaHeight()) + }, +) + +watch( + () => props.loading, + (busy) => { + if (!busy) void nextTick(() => adjustTextareaHeight()) + }, +) + +onMounted(() => { + void nextTick(() => adjustTextareaHeight()) +}) </script> <template> - <Card> - <CardHeader> - <div class="flex flex-wrap items-start justify-between gap-2"> - <div> - <CardTitle class="text-base">Conversation</CardTitle> - <CardDescription> - Shared conversation feed for {{ modeLabel }} with server-side session resume. + <Card + id="graph-management-design-assistant" + class="overflow-hidden border-2 border-primary/25 shadow-md scroll-mt-6" + > + <CardHeader class="border-b bg-muted/30 pb-4"> + <div class="flex flex-wrap items-start gap-3"> + <div + class="flex size-10 shrink-0 items-center justify-center rounded-full border border-primary/30 bg-primary/10 text-primary" + > + <Sparkles class="size-5" aria-hidden="true" /> + </div> + <div class="min-w-0 flex-1 space-y-1"> + <CardTitle class="text-lg leading-tight">{{ title }}</CardTitle> + <CardDescription class="text-sm leading-relaxed"> + {{ description }} </CardDescription> + <p class="text-xs text-muted-foreground"> + Mode: + <span class="font-medium text-foreground">{{ modeLabel }}</span> + · Session: + <span class="font-medium text-foreground">{{ sessionStatusLabel }}</span> + </p> + </div> + <div class="flex shrink-0 flex-wrap gap-2"> + <Button + type="button" + size="sm" + variant="outline" + class="gap-1.5" + :disabled="loading" + @click="emit('refresh')" + > + <RefreshCw class="size-4" /> + Resume session + </Button> + <Button + type="button" + size="sm" + variant="outline" + class="gap-1.5" + :disabled="clearing || loading || forbidden" + @click="clearConfirmOpen = true" + > + <Loader2 v-if="clearing" class="size-4 animate-spin" /> + <RotateCcw v-else class="size-4" /> + Clear chat + </Button> </div> - <p class="text-xs text-muted-foreground"> - Session: <span class="font-medium text-foreground">{{ sessionStatusLabel }}</span> - </p> </div> </CardHeader> - <CardContent class="space-y-3"> + + <CardContent class="p-0"> <div v-if="forbidden" - class="rounded border border-destructive/40 bg-destructive/5 px-3 py-2 text-xs text-destructive" + class="border-b border-destructive/40 bg-destructive/5 px-4 py-3 text-xs text-destructive sm:px-6" role="alert" > {{ forbiddenReason ?? 'You do not have permission to use graph management chat for this knowledge graph.' }} </div> - <div class="flex items-center justify-between"> - <p class="text-xs text-muted-foreground">No local cache: conversation state is server-side only.</p> - <Button size="sm" variant="ghost" class="h-7 px-2 text-[11px]" :disabled="loading" @click="emit('refresh')"> - <RefreshCw class="mr-1 size-3.5" /> - Resume session - </Button> - </div> - - <div v-if="loading" class="flex items-center gap-2 text-xs text-muted-foreground"> - <Loader2 class="size-3.5 animate-spin" /> - Loading active conversation session... - </div> <div - v-else - ref="timelineRef" - class="space-y-2 max-h-56 overflow-auto rounded border p-2" + ref="chatScrollRef" + class="min-h-[14rem] max-h-[min(32rem,60vh)] space-y-4 overflow-y-auto bg-muted/10 px-4 py-4 sm:px-6" > <div - v-for="(entry, idx) in messageHistory" - :key="`msg-${idx}-${entry.role ?? 'unknown'}`" - class="rounded px-2 py-1 text-xs" - :class="entry.role === 'assistant' ? 'bg-muted' : 'bg-primary/10'" + v-if="loading" + class="flex flex-col items-center justify-center gap-3 py-12 text-muted-foreground" + aria-busy="true" + aria-live="polite" > - <p class="mb-0.5 font-medium">{{ entry.role ?? 'system' }}</p> - <p>{{ entry.content ?? entry.message ?? '(empty)' }}</p> + <Loader2 class="size-8 shrink-0 animate-spin" /> + <p class="text-center text-sm text-foreground/80">Loading conversation session…</p> </div> + <template v-else> + <div + v-for="(entry, idx) in messageHistory" + :key="`msg-${idx}-${entry.role ?? 'unknown'}`" + class="flex gap-3" + :class="isUserRole(entry.role) ? 'flex-row-reverse' : ''" + > + <div + class="flex size-9 shrink-0 items-center justify-center rounded-full border bg-card" + :class=" + isUserRole(entry.role) + ? 'border-primary/35 bg-primary/12 text-primary' + : 'border-slate-300/60 bg-slate-100/80 text-slate-600 dark:border-slate-700/60 dark:bg-slate-900/70 dark:text-slate-300' + " + > + <User v-if="isUserRole(entry.role)" class="size-4 text-primary" /> + <Bot v-else class="size-4 text-muted-foreground" /> + </div> + <div + class="min-w-0 max-w-[min(100%,42rem)] rounded-2xl border px-4 py-3 text-sm leading-relaxed shadow-sm" + :class=" + isUserRole(entry.role) + ? 'border-primary/25 bg-primary/[0.07] text-foreground shadow-primary/5' + : 'border-slate-300/65 bg-slate-50/95 text-foreground shadow-slate-300/20 dark:border-slate-700/70 dark:bg-slate-900/65 dark:shadow-black/20' + " + > + <p v-if="isUserRole(entry.role)" class="whitespace-pre-wrap break-words"> + {{ messageText(entry) }} + </p> + <div + v-else + class="chat-md space-y-1 break-words [&_a]:break-all [&_code]:break-all" + v-html="renderAssistantHtml(messageText(entry))" + /> + </div> + </div> - <div - v-for="(line, idx) in activityTimeline" - :key="`activity-${idx}`" - class="rounded border border-dashed px-2 py-1 text-xs text-muted-foreground" - > - {{ line }} - </div> + <div + v-if="sending" + class="flex gap-3 text-muted-foreground" + aria-live="polite" + aria-busy="true" + > + <div class="flex size-9 shrink-0 items-center justify-center rounded-full border bg-card"> + <Bot class="size-4" /> + </div> + <div + class="min-w-0 max-w-[min(100%,42rem)] flex-1 overflow-hidden rounded-2xl border border-dashed border-primary/25 bg-gradient-to-b from-slate-50/90 via-card to-card px-4 py-3 text-sm shadow-sm dark:from-slate-900/65" + > + <div class="mb-2 flex items-center gap-2 text-foreground"> + <Loader2 class="size-4 shrink-0 animate-spin text-primary" aria-hidden="true" /> + <span class="font-medium tracking-tight">Thinking...</span> + </div> + <ol class="m-0 list-none space-y-2 border-l-2 border-primary/25 pl-3"> + <li + v-for="(line, lineIdx) in thinkingDisplaySlots" + :key="`${lineIdx}-${line || 'empty'}`" + class="flex gap-2 text-xs leading-snug" + > + <span + class="w-4 shrink-0 select-none pt-0.5 text-center font-mono text-xs text-primary/45" + aria-hidden="true" + > + – + </span> + <span + class="min-w-0 flex-1 break-words font-mono text-[13px]" + :class="line ? 'text-foreground/90' : 'text-muted-foreground/35'" + > + {{ line || '—' }} + </span> + </li> + </ol> + </div> + </div> - <p - v-if="messageHistory.length === 0 && activityTimeline.length === 0" - class="text-xs text-muted-foreground" - > - No messages yet. Send a prompt or use validate/transition actions to drive session activity. - </p> + <p + v-if="messageHistory.length === 0 && !sending" + class="py-8 text-center text-sm text-muted-foreground" + > + No messages yet. Send a prompt or use validate/transition actions to drive session activity. + </p> + </template> </div> - <div class="space-y-2"> - <div class="flex items-start gap-2"> - <Textarea - :model-value="draftMessage" + <div class="border-t bg-muted/20 p-4 sm:p-6"> + <label class="sr-only" :for="composerInputId">Message to graph management assistant</label> + <div class="flex flex-col gap-3 sm:flex-row sm:items-end"> + <textarea + :id="composerInputId" + ref="textareaRef" + :value="draftMessage" + rows="1" :disabled="chatInputDisabled" :placeholder="inputPlaceholder" - class="min-h-20" - aria-label="Graph management chat input" - @update:model-value="(value) => emit('update:draftMessage', value)" - @keydown="onChatInputKeydown" + class="w-full flex-1 resize-none rounded-md border border-input bg-background px-3 py-2 text-sm leading-relaxed shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring disabled:opacity-50" + @input="emit('update:draftMessage', ($event.target as HTMLTextAreaElement).value)" + @keydown.enter="handleComposerEnter" /> <Button - variant="default" - class="shrink-0" + type="button" + class="h-10 min-h-10 w-full shrink-0 sm:w-auto sm:px-6" :disabled="chatInputDisabled || !draftMessage.trim()" - :title="chatInputHelp" + :title="inputDisabledReason ?? undefined" @click="sendDraftMessage" > - <Loader2 v-if="sending" class="size-3.5 animate-spin" /> - <SendHorizontal v-else class="size-3.5" /> - </Button> - </div> - <div class="flex flex-wrap items-center justify-between gap-2"> - <p class="text-[11px] text-muted-foreground">{{ chatInputHelp }}</p> - <Button variant="outline" :disabled="clearing || loading || forbidden" @click="clearConfirmOpen = true"> - <Loader2 v-if="clearing" class="mr-1.5 size-3.5 animate-spin" /> - Clear chat + <Loader2 v-if="sending" class="size-4 animate-spin" /> + <template v-else> + <Send class="size-4 sm:mr-2" /> + <span class="hidden sm:inline">Send</span> + </template> </Button> </div> + <p class="mt-2 text-xs text-muted-foreground"> + {{ footerHint }} + <span class="text-muted-foreground/90"> · Enter to send, Shift+Enter for a new line.</span> + </p> </div> </CardContent> </Card> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 3b3fd268d..2f9e65609 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -16,6 +16,7 @@ import { Loader2, Lock, MessageSquare, + PencilRuler, PlayCircle, ScrollText, ShieldAlert, @@ -44,17 +45,25 @@ import { buildGraphManagementRailItems, buildGraphManagementStepUrl, filterRailItemsForMode, + graphManagementModeLockReason, + isGraphManagementModeUnlocked, parseGraphManagementModeQuery, - resolveDefaultGraphManagementMode, - resolveRailSelectionForMode, + resolveEffectiveGraphManagementMode, resolveSharedSessionMode, type GraphManagementMode, + type GraphManagementModeGateInput, type GraphManagementRailItemId, } from '@/utils/kgGraphManagement' +import { + filterSchemaRailItems, + graphManagementArtifactHint, + graphManagementArtifactRowClass, + graphManagementRailItemDone, + resolveSchemaRailSelection, +} from '@/utils/kgGraphManagementArtifacts' import { buildManageStepUrl, parseManageStepQuery, - stepStatusTintClass, } from '@/utils/kgManageWorkspace' import { buildWorkspaceHubNextStep, @@ -311,10 +320,27 @@ const visibleRailItems = computed(() => filterRailItemsForMode(graphManagementRailItems.value, graphManagementMode.value), ) -const selectedRailItem = computed(() => - visibleRailItems.value.find((item) => item.id === selectedRailItemId.value) ?? null, +const schemaRailItems = computed(() => filterSchemaRailItems(visibleRailItems.value)) + +const selectedSchemaRailItem = computed(() => + schemaRailItems.value.find((item) => item.id === selectedRailItemId.value) ?? null, ) +const graphManagementModeGate = computed((): GraphManagementModeGateInput => ({ + workspaceMode: statusProjection.value?.workspace_mode ?? 'schema_bootstrap', + transitionEligible: statusProjection.value?.transition_eligible === true, +})) + +const graphManagementChatDescription = computed(() => { + if (graphManagementMode.value === 'extraction-jobs') { + return 'Coordinate extraction job setup, sync runs, and maintenance for this knowledge graph. Use the assistant below to drive operational changes.' + } + if (graphManagementMode.value === 'one-off-mutations') { + return 'Author and apply one-off graph mutations scoped to this knowledge graph. Use the assistant below for mutation guidance and workspace context.' + } + return 'Design and refine schema readiness, validation, and bootstrap transition for this knowledge graph. Use the assistant below to prepare workspace artifacts.' +}) + const canTransition = computed(() => statusProjection.value?.workspace_mode === 'schema_bootstrap' && statusProjection.value?.transition_eligible === true, @@ -795,11 +821,15 @@ async function loadSessionHistory() { function syncGraphManagementState() { if (activeStep.value !== 'graph-management') return const fromQuery = parseGraphManagementModeQuery(route.query.gm_mode) - graphManagementMode.value = fromQuery - ?? resolveDefaultGraphManagementMode( - statusProjection.value?.workspace_mode ?? 'schema_bootstrap', - ) - selectedRailItemId.value = resolveRailSelectionForMode( + const effectiveMode = resolveEffectiveGraphManagementMode( + fromQuery, + graphManagementModeGate.value, + ) + graphManagementMode.value = effectiveMode + if (fromQuery && fromQuery !== effectiveMode) { + navigateTo(buildGraphManagementStepUrl(kgId.value, effectiveMode), { replace: true }) + } + selectedRailItemId.value = resolveSchemaRailSelection( selectedRailItemId.value, graphManagementMode.value, graphManagementRailItems.value, @@ -807,8 +837,13 @@ function syncGraphManagementState() { } function setGraphManagementMode(mode: GraphManagementMode) { + if (!isGraphManagementModeUnlocked(mode, graphManagementModeGate.value)) { + const reason = graphManagementModeLockReason(mode, graphManagementModeGate.value) + toast.message('Mode locked', { description: reason ?? 'Finish schema design first.' }) + return + } graphManagementMode.value = mode - selectedRailItemId.value = resolveRailSelectionForMode( + selectedRailItemId.value = resolveSchemaRailSelection( selectedRailItemId.value, mode, graphManagementRailItems.value, @@ -816,12 +851,12 @@ function setGraphManagementMode(mode: GraphManagementMode) { navigateTo(buildGraphManagementStepUrl(kgId.value, mode), { replace: true }) } -function selectRailItem(itemId: GraphManagementRailItemId) { +function selectSchemaRailItem(itemId: GraphManagementRailItemId) { selectedRailItemId.value = itemId } -function onRailKeydown(event: KeyboardEvent, itemId: GraphManagementRailItemId) { - handleActivatableKeydown(event, () => selectRailItem(itemId)) +function onSchemaRailKeydown(event: KeyboardEvent, itemId: GraphManagementRailItemId) { + handleActivatableKeydown(event, () => selectSchemaRailItem(itemId)) } function onModeSwitchKeydown(event: KeyboardEvent, mode: GraphManagementMode) { @@ -1515,56 +1550,76 @@ watch(selectedOpsDataSourceId, () => { </Button> </div> - <Card class="graph-management-controls"> - <CardHeader class="pb-3"> - <CardTitle class="text-base">Graph Management</CardTitle> - <CardDescription> - Shared chat session with mode-specific assistant framing and workspace panels. - </CardDescription> - </CardHeader> - <CardContent class="space-y-3"> - <div - class="flex flex-wrap gap-2" - role="tablist" - aria-label="Graph management modes" - > - <Button - v-for="mode in GRAPH_MANAGEMENT_MODE_ORDER" - :key="mode" - size="sm" - role="tab" - :aria-selected="graphManagementMode === mode" - tabindex="0" - :variant="graphManagementMode === mode ? 'default' : 'outline'" - @click="setGraphManagementMode(mode)" - @keydown="onModeSwitchKeydown($event, mode)" + <Card class="graph-management-controls overflow-hidden"> + <CardHeader class="space-y-4 pb-4"> + <div class="flex flex-wrap items-start gap-3"> + <div + class="flex size-10 shrink-0 items-center justify-center rounded-lg border border-primary/30 bg-primary/10 text-primary" > - {{ GRAPH_MANAGEMENT_MODE_LABELS[mode] }} - </Button> + <PencilRuler class="size-5 shrink-0" aria-hidden="true" /> + </div> + <div class="min-w-0 flex-1 space-y-1"> + <CardTitle class="text-xl leading-tight">Graph Management</CardTitle> + <CardDescription> + Shared chat session with mode-specific assistant framing and workspace panels. + </CardDescription> + </div> </div> - <div class="flex flex-wrap items-center gap-2"> - <Badge variant="outline">{{ sessionStatusLabel }}</Badge> - <Button - variant="outline" - size="sm" - :disabled="validating || transitioning || workspaceForbidden" - :title="workspaceForbiddenReason ?? undefined" - @click="validateWorkspace" + + <div class="space-y-2"> + <p class="text-sm font-medium text-muted-foreground">Mode:</p> + <div + class="grid gap-2 sm:grid-cols-3" + role="tablist" + aria-label="Graph management modes" > - <Loader2 v-if="validating" class="mr-1.5 size-3.5 animate-spin" /> - <CheckCircle2 v-else class="mr-1.5 size-3.5" /> - Validate - </Button> - <Badge :variant="canTransition ? 'default' : 'secondary'"> - {{ canTransition ? 'Transition eligible' : 'Transition blocked' }} - </Badge> + <template v-for="mode in GRAPH_MANAGEMENT_MODE_ORDER" :key="mode"> + <Button + v-if="isGraphManagementModeUnlocked(mode, graphManagementModeGate)" + size="sm" + variant="outline" + class="h-auto min-h-9 justify-center border py-2 shadow-none transition-colors" + :class=" + graphManagementMode === mode + ? 'border-primary/70 bg-muted/50 font-medium text-foreground ring-1 ring-primary/25' + : 'border-border bg-card text-muted-foreground hover:border-muted-foreground/30 hover:bg-muted/40 hover:text-foreground' + " + role="tab" + :aria-selected="graphManagementMode === mode" + tabindex="0" + @click="setGraphManagementMode(mode)" + @keydown="onModeSwitchKeydown($event, mode)" + > + {{ GRAPH_MANAGEMENT_MODE_LABELS[mode] }} + </Button> + <div + v-else + class="flex flex-col gap-1.5 rounded-lg border border-dashed border-rose-200/80 bg-rose-500/[0.04] px-3 py-2.5 text-left text-muted-foreground dark:border-rose-900/40 dark:bg-rose-950/20" + role="tab" + :aria-selected="false" + :aria-disabled="true" + :title="graphManagementModeLockReason(mode, graphManagementModeGate) ?? undefined" + > + <div class="flex items-center gap-2"> + <Lock class="size-3.5 shrink-0 text-rose-700/80 dark:text-rose-400/90" /> + <span class="text-sm font-medium leading-tight text-foreground/80"> + {{ GRAPH_MANAGEMENT_MODE_LABELS[mode] }} + </span> + </div> + <p class="text-[11px] leading-snug text-rose-800/90 dark:text-rose-300/90"> + {{ graphManagementModeLockReason(mode, graphManagementModeGate) }} + </p> + </div> + </template> + </div> </div> - </CardContent> + </CardHeader> </Card> <SharedConversationPanel v-model:draft-message="draftMessage" :mode-label="graphManagementModeLabel" + :description="graphManagementChatDescription" :input-placeholder="graphManagementInputPlaceholder" :session-status-label="sessionStatusLabel" :session="extractionSession" @@ -1581,52 +1636,51 @@ watch(selectedOpsDataSourceId, () => { @send-message="sendChatMessage" /> - <div class="grid gap-4 xl:grid-cols-[280px_1fr]"> - <div - class="graph-management-rail rounded border" - role="listbox" - aria-label="Graph management status and artifacts" - > - <div class="border-b px-3 py-2"> - <p class="text-xs font-medium text-muted-foreground">Status & artifacts</p> - </div> - <div class="space-y-1.5 p-2"> - <button - v-for="item in visibleRailItems" - :key="item.id" - type="button" - role="option" - :aria-selected="selectedRailItemId === item.id" - tabindex="0" - class="w-full rounded border px-2 py-2 text-left text-xs transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" - :class="[ - stepStatusTintClass(item.status), - selectedRailItemId === item.id ? 'border-primary ring-1 ring-primary/30' : 'hover:bg-muted/40', - ]" - @click="selectRailItem(item.id)" - @keydown="onRailKeydown($event, item.id)" - > - <div class="flex items-center justify-between gap-2"> - <p class="font-medium">{{ item.label }}</p> - <Badge variant="outline" class="text-[10px]">{{ item.status }}</Badge> - </div> - <p class="mt-1 text-muted-foreground">{{ item.detailHint }}</p> - <p class="mt-1 text-[10px] text-muted-foreground">Updated {{ item.lastUpdated }}</p> - </button> - </div> - </div> - - <Card class="graph-management-detail"> - <CardHeader class="pb-3"> - <CardTitle class="text-base"> - {{ selectedRailItem?.label ?? 'Workspace detail' }} - </CardTitle> - <CardDescription> - Mode: - <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span> + <div class="graph-management-artifacts grid gap-6 lg:grid-cols-2 lg:items-start"> + <Card id="graph-management-schema-artifacts" class="graph-management-schema-panel scroll-mt-6"> + <CardHeader class="pb-2"> + <CardTitle class="text-sm font-semibold">Schema & artifacts</CardTitle> + <CardDescription class="text-xs"> + Workspace signals for + <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. + <template v-if="schemaRailItems.length > 1"> + Select an artifact to inspect its detail below. + </template> </CardDescription> </CardHeader> - <CardContent class="space-y-4 text-sm"> + <CardContent class="space-y-4 p-3 pt-0 text-sm"> + <div v-if="schemaRailItems.length > 1" class="space-y-1.5"> + <button + v-for="item in schemaRailItems" + :key="item.id" + type="button" + :class="graphManagementArtifactRowClass( + selectedRailItemId === item.id, + graphManagementRailItemDone(item.status), + )" + @click="selectSchemaRailItem(item.id)" + @keydown="onSchemaRailKeydown($event, item.id)" + > + <span class="font-medium leading-tight">{{ item.label }}</span> + <span class="text-xs text-muted-foreground">{{ graphManagementArtifactHint(item) }}</span> + </button> + </div> + <p + v-else-if="schemaRailItems.length === 0" + class="rounded-lg border border-dashed p-3 text-xs text-muted-foreground" + > + No schema artifacts for this mode. + </p> + + <div class="graph-management-detail space-y-4 border-t pt-4"> + <div> + <p class="text-sm font-semibold"> + {{ selectedSchemaRailItem?.label ?? 'Schema & artifacts' }} + </p> + <p class="text-xs text-muted-foreground"> + Mode: {{ graphManagementModeLabel }} + </p> + </div> <template v-if="selectedRailItemId === 'schema-readiness'"> <div class="rounded border p-3"> <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> @@ -1720,99 +1774,7 @@ watch(selectedOpsDataSourceId, () => { </div> </template> - <template v-else-if="selectedRailItemId === 'session-pointers'"> - <div class="grid gap-2 md:grid-cols-3 text-xs"> - <div class="rounded border px-3 py-2"> - <p class="text-muted-foreground">Active schema bootstrap session</p> - <p class="mt-1 break-all font-mono"> - {{ statusProjection.session_pointers.active_schema_bootstrap_session_id ?? 'None' }} - </p> - </div> - <div class="rounded border px-3 py-2"> - <p class="text-muted-foreground">Active extraction operations session</p> - <p class="mt-1 break-all font-mono"> - {{ statusProjection.session_pointers.active_extraction_operations_session_id ?? 'None' }} - </p> - </div> - <div class="rounded border px-3 py-2"> - <p class="text-muted-foreground">Most recent completed session</p> - <p class="mt-1 break-all font-mono"> - {{ statusProjection.session_pointers.most_recent_completed_session_id ?? 'None' }} - </p> - </div> - </div> - <div class="space-y-3 border-t pt-3"> - <div class="flex items-center justify-between"> - <p class="text-xs font-medium uppercase tracking-wider text-muted-foreground"> - Session History - </p> - <Button - size="sm" - variant="ghost" - class="h-6 px-2 text-[10px]" - :disabled="sessionHistoryLoading" - @click="loadSessionHistory" - > - Refresh - </Button> - </div> - <div - v-if="sessionHistoryLoading" - class="flex items-center gap-2 text-xs text-muted-foreground" - > - <Loader2 class="size-3.5 animate-spin" /> - Loading session history... - </div> - <div - v-else-if="sessionHistory.length === 0" - class="rounded border border-dashed px-3 py-4 text-xs text-muted-foreground" - > - No archived or active sessions found for this scope yet. - </div> - <div v-else class="space-y-2"> - <div - v-for="entry in sessionHistory" - :key="entry.id" - class="rounded border px-3 py-2 text-xs" - > - <div class="flex flex-wrap items-center justify-between gap-2"> - <p class="font-mono break-all">{{ entry.id }}</p> - <Badge :variant="entry.is_active ? 'default' : 'secondary'"> - {{ entry.is_active ? 'Active' : 'Archived' }} - </Badge> - </div> - <p class="mt-1 text-muted-foreground"> - Updated {{ new Date(entry.updated_at).toLocaleString() }} - <span v-if="entry.archived_at"> - · Archived {{ new Date(entry.archived_at).toLocaleString() }} - </span> - </p> - <p class="mt-1 text-muted-foreground"> - {{ entry.message_count }} message(s) - · {{ entry.run_metrics.length }} linked run(s) - </p> - <div - v-if="entry.run_metrics.length > 0" - class="mt-2 space-y-1.5 rounded border bg-muted/20 p-2" - > - <div - v-for="metric in entry.run_metrics" - :key="metric.sync_run_id" - class="flex flex-wrap items-center justify-between gap-2" - > - <span class="font-mono">{{ metric.mutation_log_id ?? metric.sync_run_id }}</span> - <span class="text-muted-foreground"> - {{ metric.token_usage_total ?? 0 }} tokens · - ${{ (metric.cost_total_usd ?? 0).toFixed(2) }} - </span> - </div> - </div> - </div> - </div> - </div> - </template> - - <template v-else-if="graphManagementMode === 'extraction-jobs'"> + <template v-else-if="selectedRailItemId === 'extraction-jobs-setup'"> <p class="text-muted-foreground"> Trigger extraction jobs, inspect run history, and view run logs without leaving this workspace. </p> @@ -1924,7 +1886,7 @@ watch(selectedOpsDataSourceId, () => { </div> </template> - <template v-else-if="graphManagementMode === 'one-off-mutations'"> + <template v-else-if="selectedRailItemId === 'mutation-authoring'"> <p class="text-muted-foreground"> Author and apply one-off JSONL mutations directly in this workspace. </p> @@ -1952,11 +1914,112 @@ watch(selectedOpsDataSourceId, () => { <template v-else> <p class="text-xs text-muted-foreground"> - Select a status or artifact item to inspect mode-specific workspace content. + Select a schema artifact to inspect mode-specific workspace content. </p> </template> + </div> </CardContent> </Card> + + <Card id="graph-management-session-pointers" class="graph-management-session-pointers scroll-mt-6 lg:sticky lg:top-4 lg:self-start"> + <CardHeader class="pb-3"> + <CardTitle class="text-base">Session pointers</CardTitle> + <CardDescription> + Active bootstrap and extraction sessions, plus archived history for this knowledge graph. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4 text-sm"> + <div class="grid gap-2 md:grid-cols-3 text-xs"> + <div class="rounded-lg border px-3 py-2"> + <p class="text-muted-foreground">Active schema bootstrap session</p> + <p class="mt-1 break-all font-mono"> + {{ statusProjection.session_pointers.active_schema_bootstrap_session_id ?? 'None' }} + </p> + </div> + <div class="rounded-lg border px-3 py-2"> + <p class="text-muted-foreground">Active extraction operations session</p> + <p class="mt-1 break-all font-mono"> + {{ statusProjection.session_pointers.active_extraction_operations_session_id ?? 'None' }} + </p> + </div> + <div class="rounded-lg border px-3 py-2"> + <p class="text-muted-foreground">Most recent completed session</p> + <p class="mt-1 break-all font-mono"> + {{ statusProjection.session_pointers.most_recent_completed_session_id ?? 'None' }} + </p> + </div> + </div> + <div class="space-y-3 border-t pt-3"> + <div class="flex items-center justify-between"> + <p class="text-xs font-medium uppercase tracking-wider text-muted-foreground"> + Session History + </p> + <Button + size="sm" + variant="ghost" + class="h-6 px-2 text-[10px]" + :disabled="sessionHistoryLoading" + @click="loadSessionHistory" + > + Refresh + </Button> + </div> + <div + v-if="sessionHistoryLoading" + class="flex items-center gap-2 text-xs text-muted-foreground" + > + <Loader2 class="size-3.5 animate-spin" /> + Loading session history... + </div> + <div + v-else-if="sessionHistory.length === 0" + class="rounded-lg border border-dashed px-3 py-4 text-xs text-muted-foreground" + > + No archived or active sessions found for this scope yet. + </div> + <div v-else class="space-y-2"> + <div + v-for="entry in sessionHistory" + :key="entry.id" + class="rounded-lg border px-3 py-2 text-xs" + > + <div class="flex flex-wrap items-center justify-between gap-2"> + <p class="font-mono break-all">{{ entry.id }}</p> + <Badge :variant="entry.is_active ? 'default' : 'secondary'"> + {{ entry.is_active ? 'Active' : 'Archived' }} + </Badge> + </div> + <p class="mt-1 text-muted-foreground"> + Updated {{ new Date(entry.updated_at).toLocaleString() }} + <span v-if="entry.archived_at"> + · Archived {{ new Date(entry.archived_at).toLocaleString() }} + </span> + </p> + <p class="mt-1 text-muted-foreground"> + {{ entry.message_count }} message(s) + · {{ entry.run_metrics.length }} linked run(s) + </p> + <div + v-if="entry.run_metrics.length > 0" + class="mt-2 space-y-1.5 rounded-lg border bg-muted/20 p-2" + > + <div + v-for="metric in entry.run_metrics" + :key="metric.sync_run_id" + class="flex flex-wrap items-center justify-between gap-2" + > + <span class="font-mono">{{ metric.mutation_log_id ?? metric.sync_run_id }}</span> + <span class="text-muted-foreground"> + {{ metric.token_usage_total ?? 0 }} tokens · + ${{ (metric.cost_total_usd ?? 0).toFixed(2) }} + </span> + </div> + </div> + </div> + </div> + </div> + </CardContent> + </Card> </div> </section> </template> diff --git a/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts b/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts new file mode 100644 index 000000000..c250bd2bb --- /dev/null +++ b/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts @@ -0,0 +1,40 @@ +import { describe, expect, it } from 'vitest' +import { + filterSchemaRailItems, + graphManagementArtifactRowClass, + graphManagementRailItemDone, + resolveSchemaRailSelection, +} from '../utils/kgGraphManagementArtifacts' +import { buildGraphManagementRailItems } from '../utils/kgGraphManagement' + +describe('kgGraphManagementArtifacts', () => { + const items = buildGraphManagementRailItems({ + workspaceMode: 'schema_bootstrap', + transitionEligible: false, + blockingReasonCount: 1, + prepopulatedGapCount: 0, + sessionUpdatedAt: '2026-01-01', + hasActiveSession: true, + }) + + it('excludes session pointers from schema artifact navigation', () => { + const schemaItems = filterSchemaRailItems(items) + expect(schemaItems.map((item) => item.id)).not.toContain('session-pointers') + expect(schemaItems.length).toBeGreaterThan(0) + }) + + it('resolves schema selection for the active mode', () => { + expect( + resolveSchemaRailSelection(null, 'initial-schema-design', items), + ).toBe('schema-readiness') + expect( + resolveSchemaRailSelection('session-pointers', 'extraction-jobs', items), + ).toBe('extraction-jobs-setup') + }) + + it('maps ready status to done artifact rows', () => { + expect(graphManagementRailItemDone('ready')).toBe(true) + expect(graphManagementArtifactRowClass(true, true)).toContain('ring-primary') + expect(graphManagementArtifactRowClass(false, true)).toContain('green') + }) +}) diff --git a/src/dev-ui/app/tests/kg-graph-management-modes.test.ts b/src/dev-ui/app/tests/kg-graph-management-modes.test.ts new file mode 100644 index 000000000..511e851c7 --- /dev/null +++ b/src/dev-ui/app/tests/kg-graph-management-modes.test.ts @@ -0,0 +1,47 @@ +import { describe, expect, it } from 'vitest' +import { + graphManagementModeLockReason, + isGraphManagementModeUnlocked, + resolveEffectiveGraphManagementMode, +} from '../utils/kgGraphManagement' + +describe('graph management mode gates', () => { + const bootstrap = { + workspaceMode: 'schema_bootstrap' as const, + transitionEligible: false, + } + + const validatedBootstrap = { + workspaceMode: 'schema_bootstrap' as const, + transitionEligible: true, + } + + const operations = { + workspaceMode: 'extraction_operations' as const, + transitionEligible: true, + } + + it('always unlocks initial schema design', () => { + expect(isGraphManagementModeUnlocked('initial-schema-design', bootstrap)).toBe(true) + }) + + it('locks extraction modes until extraction operations', () => { + expect(isGraphManagementModeUnlocked('extraction-jobs', bootstrap)).toBe(false) + expect(isGraphManagementModeUnlocked('one-off-mutations', validatedBootstrap)).toBe(false) + expect(isGraphManagementModeUnlocked('extraction-jobs', operations)).toBe(true) + expect(isGraphManagementModeUnlocked('one-off-mutations', operations)).toBe(true) + }) + + it('returns contextual lock reasons', () => { + expect(graphManagementModeLockReason('extraction-jobs', bootstrap)).toContain('validation') + expect(graphManagementModeLockReason('one-off-mutations', validatedBootstrap)).toContain( + 'Extraction/Mutations', + ) + expect(graphManagementModeLockReason('extraction-jobs', operations)).toBeNull() + }) + + it('coerces locked query modes back to initial schema design', () => { + expect(resolveEffectiveGraphManagementMode('extraction-jobs', bootstrap)).toBe('initial-schema-design') + expect(resolveEffectiveGraphManagementMode('extraction-jobs', operations)).toBe('extraction-jobs') + }) +}) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 77b270354..2b0d67374 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -354,9 +354,10 @@ describe('KG-MANAGE-015 - graph-scoped maintain step and round trip', () => { }) describe('Shared conversation panel - extraction UX contract', () => { - it('renders resume-session action and explicit server-side persistence note', () => { + it('renders phase-2 style conversational intelligence header and resume action', () => { + expect(sharedConversationPanelVue).toContain('Graph Management Assistant') expect(sharedConversationPanelVue).toContain('Resume session') - expect(sharedConversationPanelVue).toContain('No local cache: conversation state is server-side only.') + expect(sharedConversationPanelVue).toContain('Sparkles') }) it('renders clear-chat confirmation dialog before emitting clear action', () => { @@ -365,15 +366,18 @@ describe('Shared conversation panel - extraction UX contract', () => { expect(sharedConversationPanelVue).toContain("emit('clearChat')") }) - it('renders activity/thinking timeline lines and auto-scrolls timeline updates', () => { - expect(sharedConversationPanelVue).toContain('activityTimeline') - expect(sharedConversationPanelVue).toContain('timelineRef') - expect(sharedConversationPanelVue).toContain('scrollTop = timelineRef.value.scrollHeight') + it('renders bubble chat, thinking state, and auto-scroll', () => { + expect(sharedConversationPanelVue).toContain('thinkingDisplaySlots') + expect(sharedConversationPanelVue).toContain('chatScrollRef') + expect(sharedConversationPanelVue).toContain('renderAssistantHtml') + expect(sharedConversationPanelVue).toContain('scrollToBottom') + expect(sharedConversationPanelVue).toContain('el.scrollTop = el.scrollHeight') }) it('accepts mode-aware input placeholder and session status props', () => { expect(sharedConversationPanelVue).toContain('inputPlaceholder') expect(sharedConversationPanelVue).toContain('sessionStatusLabel') + expect(sharedConversationPanelVue).toContain('footerHint') }) }) @@ -399,6 +403,8 @@ describe('KG-MANAGE-007 - graph management modes', () => { } expect(manageWorkspaceVue).toContain('graphManagementMode') expect(manageWorkspaceVue).toContain('parseGraphManagementModeQuery') + expect(manageWorkspaceVue).toContain('isGraphManagementModeUnlocked') + expect(manageWorkspaceVue).toContain('graphManagementModeLockReason') }) it('defaults mode from workspace lifecycle state', () => { @@ -413,11 +419,12 @@ describe('KG-MANAGE-007 - graph management modes', () => { }) describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { - it('renders persistent status and artifact rail with keyboard selection', () => { - expect(manageWorkspaceVue).toContain('graph-management-rail') - expect(manageWorkspaceVue).toContain('buildGraphManagementRailItems') - expect(manageWorkspaceVue).toContain('role="listbox"') - expect(manageWorkspaceVue).toContain('@keydown') + it('renders side-by-side schema artifacts and session pointers panels', () => { + expect(manageWorkspaceVue).toContain('graph-management-artifacts') + expect(manageWorkspaceVue).toContain('Schema & artifacts') + expect(manageWorkspaceVue).toContain('graph-management-session-pointers') + expect(manageWorkspaceVue).toContain('graphManagementArtifactRowClass') + expect(manageWorkspaceVue).toContain('schemaRailItems') }) it('builds rail items with status and last-updated metadata', () => { @@ -442,8 +449,8 @@ describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { expect(manageWorkspaceVue).toContain('graph-management-detail') expect(manageWorkspaceVue).toContain('selectedRailItemId') expect(manageWorkspaceVue).toContain("selectedRailItemId === 'schema-readiness'") - expect(manageWorkspaceVue).toContain("graphManagementMode === 'extraction-jobs'") - expect(manageWorkspaceVue).toContain("graphManagementMode === 'one-off-mutations'") + expect(manageWorkspaceVue).toContain("selectedRailItemId === 'extraction-jobs-setup'") + expect(manageWorkspaceVue).toContain("selectedRailItemId === 'mutation-authoring'") }) it('filters rail items to the active mode', () => { @@ -541,9 +548,9 @@ describe('KG-MANAGE-016 - graph management top controls', () => { describe('KG-MANAGE-017 - chat input keyboard contract', () => { it('wires Enter-to-send and Shift+Enter newline handling in shared conversation panel', () => { - expect(sharedConversationPanelVue).toContain('handleChatInputKeydown') - expect(sharedConversationPanelVue).toContain('@keydown="onChatInputKeydown"') - expect(sharedConversationPanelVue).toContain('Shift+Enter adds a new line') + expect(sharedConversationPanelVue).toContain('handleComposerEnter') + expect(sharedConversationPanelVue).toContain('@keydown.enter="handleComposerEnter"') + expect(sharedConversationPanelVue).toContain('Shift+Enter for a new line') expect(sharedConversationPanelVue).toContain("emit('sendMessage'") expect(manageWorkspaceVue).toContain('@send-message="sendChatMessage"') }) @@ -556,11 +563,9 @@ describe('KG-MANAGE-018 - keyboard operable step and rail actions', () => { expect(manageWorkspaceVue).toContain('focus-visible:ring-2 focus-visible:ring-ring') }) - it('supports keyboard activation for graph management rail selection', () => { - expect(manageWorkspaceVue).toContain('onRailKeydown') - expect(manageWorkspaceVue).toContain('role="listbox"') - expect(manageWorkspaceVue).toContain('tabindex="0"') - expect(manageWorkspaceVue).toContain('@keydown="onRailKeydown($event, item.id)"') + it('supports keyboard activation for schema artifact navigation', () => { + expect(manageWorkspaceVue).toContain('onSchemaRailKeydown') + expect(manageWorkspaceVue).toContain('@keydown="onSchemaRailKeydown($event, item.id)"') }) it('exposes keyboard-reachable graph management mode switch tabs', () => { diff --git a/src/dev-ui/app/utils/kgGraphManagement.ts b/src/dev-ui/app/utils/kgGraphManagement.ts index 203c6ce7b..9ddecfa71 100644 --- a/src/dev-ui/app/utils/kgGraphManagement.ts +++ b/src/dev-ui/app/utils/kgGraphManagement.ts @@ -165,3 +165,37 @@ export function buildGraphManagementStepUrl( ): string { return `/knowledge-graphs/${encodeURIComponent(kgId)}/manage?step=graph-management&gm_mode=${mode}` } + +export interface GraphManagementModeGateInput { + workspaceMode: 'schema_bootstrap' | 'extraction_operations' + transitionEligible: boolean +} + +export function isGraphManagementModeUnlocked( + mode: GraphManagementMode, + input: GraphManagementModeGateInput, +): boolean { + if (mode === 'initial-schema-design') return true + return input.workspaceMode === 'extraction_operations' +} + +export function graphManagementModeLockReason( + mode: GraphManagementMode, + input: GraphManagementModeGateInput, +): string | null { + if (isGraphManagementModeUnlocked(mode, input)) return null + if (input.transitionEligible) { + return 'Schema validated — use Go to Extraction/Mutations in Schema readiness to unlock.' + } + return 'Complete schema design and pass validation to unlock.' +} + +export function resolveEffectiveGraphManagementMode( + requested: GraphManagementMode | null, + input: GraphManagementModeGateInput, +): GraphManagementMode { + const fallback = resolveDefaultGraphManagementMode(input.workspaceMode) + if (!requested) return fallback + if (isGraphManagementModeUnlocked(requested, input)) return requested + return 'initial-schema-design' +} diff --git a/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts b/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts new file mode 100644 index 000000000..a93b42ad0 --- /dev/null +++ b/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts @@ -0,0 +1,55 @@ +import { cn } from '@/lib/utils' +import { + filterRailItemsForMode, + type GraphManagementMode, + type GraphManagementRailItem, + type GraphManagementRailItemId, +} from './kgGraphManagement' +import type { StepStatusLabel } from './kgManageWorkspace' + +export function filterSchemaRailItems(items: GraphManagementRailItem[]): GraphManagementRailItem[] { + return items.filter((item) => item.id !== 'session-pointers') +} + +export function resolveSchemaRailSelection( + selectedId: GraphManagementRailItemId | null, + mode: GraphManagementMode, + items: GraphManagementRailItem[], +): GraphManagementRailItemId | null { + const schemaItems = filterSchemaRailItems(filterRailItemsForMode(items, mode)) + if (schemaItems.length === 0) return null + if (selectedId && schemaItems.some((item) => item.id === selectedId)) { + return selectedId + } + return schemaItems[0]?.id ?? null +} + +export function graphManagementRailItemDone(status: StepStatusLabel): boolean { + return status === 'ready' +} + +export function graphManagementArtifactRowClass(selected: boolean, done: boolean): string { + return cn( + 'flex w-full flex-col gap-0.5 rounded-lg border p-2.5 text-left text-sm transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring', + done + ? 'border-green-500/35 bg-green-500/5 dark:border-green-500/25 dark:bg-green-950/15' + : 'border-border bg-card hover:bg-muted/50', + selected && 'ring-2 ring-primary/30', + ) +} + +export function graphManagementArtifactHint(item: GraphManagementRailItem): string { + if (item.id === 'schema-readiness') { + return item.status === 'ready' ? 'Ready to transition' : 'Bootstrap checklist' + } + if (item.id === 'validation-diagnostics') { + return item.status === 'ready' ? 'No blocking issues' : 'Review diagnostics' + } + if (item.id === 'extraction-jobs-setup') { + return item.status === 'ready' ? 'Operations mode' : 'Complete schema first' + } + if (item.id === 'mutation-authoring') { + return item.status === 'ready' ? 'JSONL mutations' : 'Complete schema first' + } + return item.detailHint +} From 95cb9ad47174c7cb95e8792348a4151e3cf218cf Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Thu, 28 May 2026 22:40:58 -0400 Subject: [PATCH 061/153] feat(extraction): add streaming chat turn service and NDJSON endpoint (#744) Introduce sticky-session-aware chat orchestration with JobPackage gating, UI-mode skill overlays, and a tracer-bullet deterministic agent. Closes #739. Closes #740. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/api/extraction/application/__init__.py | 10 +- .../application/agent_session_service.py | 16 ++ .../application/chat_turn_service.py | 158 ++++++++++++++++++ .../application/job_package_gate.py | 53 ++++++ .../application/skill_resolution_service.py | 45 ++++- src/api/extraction/dependencies.py | 32 ++++ src/api/extraction/domain/value_objects.py | 24 +++ .../deterministic_chat_agent.py | 46 +++++ .../ingestion_readiness_reader.py | 36 ++++ src/api/extraction/ports/chat_agent.py | 23 +++ .../extraction/ports/ingestion_readiness.py | 17 ++ src/api/extraction/presentation/models.py | 8 + src/api/extraction/presentation/routes.py | 39 ++++- .../application/test_chat_turn_service.py | 151 +++++++++++++++++ .../application/test_job_package_gate.py | 38 +++++ 15 files changed, 692 insertions(+), 4 deletions(-) create mode 100644 src/api/extraction/application/chat_turn_service.py create mode 100644 src/api/extraction/application/job_package_gate.py create mode 100644 src/api/extraction/infrastructure/deterministic_chat_agent.py create mode 100644 src/api/extraction/infrastructure/ingestion_readiness_reader.py create mode 100644 src/api/extraction/ports/chat_agent.py create mode 100644 src/api/extraction/ports/ingestion_readiness.py create mode 100644 src/api/tests/unit/extraction/application/test_chat_turn_service.py create mode 100644 src/api/tests/unit/extraction/application/test_job_package_gate.py diff --git a/src/api/extraction/application/__init__.py b/src/api/extraction/application/__init__.py index fd5d9c04c..407be92a9 100644 --- a/src/api/extraction/application/__init__.py +++ b/src/api/extraction/application/__init__.py @@ -5,9 +5,15 @@ """ from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.application.chat_turn_service import ExtractionChatTurnService from extraction.application.skill_resolution_service import ( ExtractionSkillResolutionService, + ResolvedExtractionSkillPack, ) -__all__ = ["ExtractionAgentSessionService", "ExtractionSkillResolutionService"] - +__all__ = [ + "ExtractionAgentSessionService", + "ExtractionChatTurnService", + "ExtractionSkillResolutionService", + "ResolvedExtractionSkillPack", +] diff --git a/src/api/extraction/application/agent_session_service.py b/src/api/extraction/application/agent_session_service.py index fdda14574..d22f47a5d 100644 --- a/src/api/extraction/application/agent_session_service.py +++ b/src/api/extraction/application/agent_session_service.py @@ -17,6 +17,7 @@ IExtractionAgentSessionRepository, IExtractionSessionRunMetricsReader, ) +from extraction.ports.runtime import IStickySessionRuntimeManager @dataclass(frozen=True) @@ -35,10 +36,12 @@ def __init__( repository: IExtractionAgentSessionRepository, skill_resolution_service: ExtractionSkillResolutionService | None = None, run_metrics_reader: IExtractionSessionRunMetricsReader | None = None, + sticky_runtime_manager: IStickySessionRuntimeManager | None = None, ) -> None: self._repository = repository self._skill_resolution_service = skill_resolution_service self._run_metrics_reader = run_metrics_reader + self._sticky_runtime_manager = sticky_runtime_manager @staticmethod def _build_bootstrap_intake_prompt() -> str: @@ -96,6 +99,12 @@ async def get_or_create_active_session( await self._repository.save(session) return session + async def save_session(self, session: ExtractionAgentSession) -> ExtractionAgentSession: + """Persist session mutations after a chat turn.""" + session.updated_at = datetime.now(UTC) + await self._repository.save(session) + return session + async def clear_chat( self, user_id: str, @@ -108,6 +117,13 @@ async def clear_chat( mode=mode, ) if active is not None: + if self._sticky_runtime_manager is not None: + self._sticky_runtime_manager.reset_runtime( + session_id=active.id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode.value, + ) active.archive() await self._repository.save(active) diff --git a/src/api/extraction/application/chat_turn_service.py b/src/api/extraction/application/chat_turn_service.py new file mode 100644 index 000000000..64081bf9d --- /dev/null +++ b/src/api/extraction/application/chat_turn_service.py @@ -0,0 +1,158 @@ +"""Orchestrates graph-management chat turns with sticky runtime and streaming events.""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from datetime import UTC, datetime +from typing import Any + +from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.application.job_package_gate import resolve_job_package_gate +from extraction.application.skill_resolution_service import ExtractionSkillResolutionService +from extraction.domain.value_objects import ( + ExtractionSessionMode, + GraphManagementUiMode, + SessionJobPackagePhase, +) +from extraction.ports.chat_agent import IExtractionChatAgent +from extraction.ports.ingestion_readiness import IIngestionReadinessReader +from extraction.ports.runtime import IStickySessionRuntimeManager + + +class ExtractionChatTurnService: + """Coordinates sticky runtime, JobPackage gating, and agent execution.""" + + def __init__( + self, + *, + session_service: ExtractionAgentSessionService, + skill_resolution_service: ExtractionSkillResolutionService, + ingestion_readiness_reader: IIngestionReadinessReader, + sticky_runtime_manager: IStickySessionRuntimeManager, + chat_agent: IExtractionChatAgent, + ) -> None: + self._session_service = session_service + self._skill_resolution_service = skill_resolution_service + self._ingestion_readiness_reader = ingestion_readiness_reader + self._sticky_runtime_manager = sticky_runtime_manager + self._chat_agent = chat_agent + + async def stream_chat_turn( + self, + *, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, + message: str, + ) -> AsyncIterator[dict[str, Any]]: + trimmed = message.strip() + if not trimmed: + yield { + "type": "done", + "ok": False, + "error": { + "code": "EMPTY_MESSAGE", + "message": "Message must not be empty.", + }, + } + return + + session = await self._session_service.get_or_create_active_session( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + + resolved_skills = await self._skill_resolution_service.resolve_for_graph_management_turn( + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ui_mode=ui_mode, + ) + session.runtime_context["agent_configuration"] = { + "system_prompt": resolved_skills.system_prompt, + "prompt_hierarchy": list(resolved_skills.prompt_hierarchy), + "guardrails": list(resolved_skills.guardrails), + "skills": dict(resolved_skills.skills), + "graph_management_ui_mode": ui_mode.value, + } + + lease = self._sticky_runtime_manager.get_or_start_runtime( + session_id=session.id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode.value, + ) + session.runtime_context["sticky_runtime"] = { + "container_id": lease.container_id, + "status": lease.status, + "expires_at": lease.expires_at.isoformat(), + } + + yield { + "type": "thinking", + "recent": [ + "Contacting Graph Management Assistant…", + f"Sticky container {lease.container_id[:8]} active", + ], + } + + readiness = await self._ingestion_readiness_reader.read_for_knowledge_graph( + knowledge_graph_id=knowledge_graph_id, + ) + gate = resolve_job_package_gate(ui_mode=ui_mode, readiness=readiness) + session.runtime_context["job_package"] = { + "phase": gate.phase.value, + "data_source_count": readiness.data_source_count, + "prepared_source_count": readiness.prepared_source_count, + } + + session.message_history.append({"role": "user", "content": trimmed}) + session.updated_at = datetime.now(UTC) + + if gate.phase == SessionJobPackagePhase.AWAITING_PREPARE: + wait_message = gate.wait_message or "Waiting for JobPackage ingestion context." + session.runtime_context["activity_lines"] = [wait_message] + yield { + "type": "wait", + "phase": gate.phase.value, + "message": wait_message, + } + yield { + "type": "thinking", + "recent": ["Waiting for JobPackage ingestion context…", wait_message], + } + assistant_reply = ( + f"**Waiting for ingestion context**\n\n{wait_message}\n\n" + "I'll respond with full repository-aware guidance once JobPackage " + "material is prepared for this knowledge graph." + ) + session.message_history.append({"role": "assistant", "content": assistant_reply}) + session.updated_at = datetime.now(UTC) + await self._session_service.save_session(session) + yield {"type": "done", "ok": True, "reply": assistant_reply, "wait": True} + return + + session.runtime_context["job_package"]["phase"] = SessionJobPackagePhase.READY.value + thinking_lines: list[str] = [] + assistant_reply: str | None = None + async for event in self._chat_agent.stream_turn( + session=session, + user_message=trimmed, + ui_mode=ui_mode, + ): + if event.get("type") == "thinking": + recent = event.get("recent") + if isinstance(recent, list): + thinking_lines = [str(line) for line in recent if str(line).strip()] + session.runtime_context["activity_lines"] = thinking_lines + if event.get("type") == "done": + if event.get("ok") is True and event.get("reply"): + assistant_reply = str(event["reply"]) + yield event + + if assistant_reply: + session.message_history.append({"role": "assistant", "content": assistant_reply}) + session.updated_at = datetime.now(UTC) + session.runtime_context.pop("activity_lines", None) + await self._session_service.save_session(session) diff --git a/src/api/extraction/application/job_package_gate.py b/src/api/extraction/application/job_package_gate.py new file mode 100644 index 000000000..4f88b33ff --- /dev/null +++ b/src/api/extraction/application/job_package_gate.py @@ -0,0 +1,53 @@ +"""Pure helpers for JobPackage readiness gating in chat turns.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from extraction.domain.value_objects import ( + GraphManagementUiMode, + IngestionReadinessSnapshot, + SessionJobPackagePhase, +) + + +@dataclass(frozen=True) +class JobPackageGateDecision: + """Resolved JobPackage gate for one chat turn.""" + + phase: SessionJobPackagePhase + wait_message: str | None = None + + +def resolve_job_package_gate( + *, + ui_mode: GraphManagementUiMode, + readiness: IngestionReadinessSnapshot, +) -> JobPackageGateDecision: + """Return whether a chat turn must wait for JobPackage context.""" + if ui_mode in { + GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + GraphManagementUiMode.ONE_OFF_MUTATIONS, + }: + return JobPackageGateDecision(phase=SessionJobPackagePhase.NOT_REQUIRED) + + if readiness.data_source_count == 0: + return JobPackageGateDecision( + phase=SessionJobPackagePhase.AWAITING_PREPARE, + wait_message=( + "Waiting for a connected data source. Add and prepare data sources " + "under Data sources before extraction job chat can run." + ), + ) + + if readiness.prepared_source_count < readiness.data_source_count: + return JobPackageGateDecision( + phase=SessionJobPackagePhase.AWAITING_PREPARE, + wait_message=( + "Waiting for JobPackage ingestion context. Prepare all data sources " + f"({readiness.prepared_source_count}/{readiness.data_source_count} ready) " + "so the sticky session container can load repository files." + ), + ) + + return JobPackageGateDecision(phase=SessionJobPackagePhase.READY) diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 0cf5f137f..11e420157 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -4,7 +4,7 @@ from dataclasses import dataclass -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode from extraction.ports.repositories import IExtractionSkillOverrideRepository @@ -87,6 +87,27 @@ class ResolvedExtractionSkillPack: } +_UI_MODE_SKILL_OVERLAYS: dict[GraphManagementUiMode, dict[str, str]] = { + GraphManagementUiMode.INITIAL_SCHEMA_DESIGN: { + "ui_mode_framing": ( + "Focus on schema bootstrap: entity/relationship modeling, intake, and " + "prepopulation guidance before extraction jobs." + ), + }, + GraphManagementUiMode.EXTRACTION_JOBS: { + "ui_mode_framing": ( + "Focus on extraction job setup, JobPackage-aware file targeting, and " + "incremental sync planning." + ), + }, + GraphManagementUiMode.ONE_OFF_MUTATIONS: { + "ui_mode_framing": ( + "Focus on scoped one-off graph mutations with mutation-log auditability." + ), + }, +} + + class ExtractionSkillResolutionService: """Resolve session skills from global templates + KG overrides.""" @@ -123,3 +144,25 @@ async def resolve_for_session( skills=resolved, ) + async def resolve_for_graph_management_turn( + self, + *, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, + ) -> ResolvedExtractionSkillPack: + """Resolve base session skills plus graph-management UI mode overlay.""" + base = await self.resolve_for_session( + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + overlay = dict(_UI_MODE_SKILL_OVERLAYS.get(ui_mode, {})) + merged_skills = dict(base.skills) + merged_skills.update(overlay) + return ResolvedExtractionSkillPack( + system_prompt=base.system_prompt, + prompt_hierarchy=base.prompt_hierarchy, + guardrails=base.guardrails, + skills=merged_skills, + ) + diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index a4771b42c..b2aaf1a09 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -8,8 +8,11 @@ from extraction.application import ( ExtractionAgentSessionService, + ExtractionChatTurnService, ExtractionSkillResolutionService, ) +from extraction.infrastructure.deterministic_chat_agent import DeterministicExtractionChatAgent +from extraction.infrastructure.ingestion_readiness_reader import SqlIngestionReadinessReader from extraction.infrastructure.repositories import ( ExtractionAgentSessionRepository, ExtractionSessionRunMetricsReader, @@ -40,6 +43,9 @@ def get_ephemeral_extraction_worker_launcher() -> IEphemeralExtractionWorkerLaun def get_extraction_agent_session_service( session: Annotated[AsyncSession, Depends(get_write_session)], + sticky_runtime_manager: Annotated[ + IStickySessionRuntimeManager, Depends(get_sticky_session_runtime_manager) + ], ) -> ExtractionAgentSessionService: """Get ExtractionAgentSessionService instance.""" skill_resolution_service = ExtractionSkillResolutionService( @@ -49,4 +55,30 @@ def get_extraction_agent_session_service( repository=ExtractionAgentSessionRepository(session=session), skill_resolution_service=skill_resolution_service, run_metrics_reader=ExtractionSessionRunMetricsReader(session=session), + sticky_runtime_manager=sticky_runtime_manager, + ) + + +def get_extraction_chat_turn_service( + session: Annotated[AsyncSession, Depends(get_write_session)], + sticky_runtime_manager: Annotated[ + IStickySessionRuntimeManager, Depends(get_sticky_session_runtime_manager) + ], +) -> ExtractionChatTurnService: + """Get ExtractionChatTurnService instance.""" + skill_resolution_service = ExtractionSkillResolutionService( + override_repository=ExtractionSkillOverrideRepository() + ) + session_service = ExtractionAgentSessionService( + repository=ExtractionAgentSessionRepository(session=session), + skill_resolution_service=skill_resolution_service, + run_metrics_reader=ExtractionSessionRunMetricsReader(session=session), + sticky_runtime_manager=sticky_runtime_manager, + ) + return ExtractionChatTurnService( + session_service=session_service, + skill_resolution_service=skill_resolution_service, + ingestion_readiness_reader=SqlIngestionReadinessReader(session=session), + sticky_runtime_manager=sticky_runtime_manager, + chat_agent=DeterministicExtractionChatAgent(), ) diff --git a/src/api/extraction/domain/value_objects.py b/src/api/extraction/domain/value_objects.py index 906c77c22..cf498a8d7 100644 --- a/src/api/extraction/domain/value_objects.py +++ b/src/api/extraction/domain/value_objects.py @@ -21,6 +21,30 @@ class BootstrapIntakePath(StrEnum): GUIDED_CO_DESIGN = "guided_co_design" +class GraphManagementUiMode(StrEnum): + """Graph-management UI mode overlay for chat skill framing.""" + + INITIAL_SCHEMA_DESIGN = "initial-schema-design" + EXTRACTION_JOBS = "extraction-jobs" + ONE_OFF_MUTATIONS = "one-off-mutations" + + +class SessionJobPackagePhase(StrEnum): + """JobPackage readiness phase for sticky session chat turns.""" + + NOT_REQUIRED = "not_required" + AWAITING_PREPARE = "awaiting_job_package" + READY = "ready" + + +@dataclass(frozen=True) +class IngestionReadinessSnapshot: + """Read-only ingestion prepare counts for a knowledge graph.""" + + data_source_count: int + prepared_source_count: int + + @dataclass(frozen=True) class ExtractionSessionRunMetric: """Run-level metrics linked to an extraction session.""" diff --git a/src/api/extraction/infrastructure/deterministic_chat_agent.py b/src/api/extraction/infrastructure/deterministic_chat_agent.py new file mode 100644 index 000000000..d1ebbd7eb --- /dev/null +++ b/src/api/extraction/infrastructure/deterministic_chat_agent.py @@ -0,0 +1,46 @@ +"""Deterministic chat agent for tracer-bullet chat turn execution.""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from typing import Any + +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import GraphManagementUiMode + + +class DeterministicExtractionChatAgent: + """Tracer-bullet agent that simulates thinking lines and a structured reply.""" + + async def stream_turn( + self, + *, + session: ExtractionAgentSession, + user_message: str, + ui_mode: GraphManagementUiMode, + ) -> AsyncIterator[dict[str, Any]]: + yield { + "type": "thinking", + "recent": [ + "Starting sticky session Claude agent runtime…", + f"Applying {ui_mode.value} skill overlay", + ], + } + yield { + "type": "thinking", + "recent": [ + "Starting sticky session Claude agent runtime…", + f"Applying {ui_mode.value} skill overlay", + "Reviewing session message history", + ], + } + skills = session.runtime_context.get("agent_configuration", {}).get("skills", {}) + skill_keys = ", ".join(sorted(skills.keys())[:3]) or "default skills" + reply = ( + f"**Graph Management Assistant ({ui_mode.value})**\n\n" + f"I received your message and loaded skills: {skill_keys}.\n\n" + f"> {user_message.strip()}\n\n" + "This is a tracer-bullet reply. The sticky container runtime will invoke " + "the Claude Agent SDK with JobPackage context in a follow-up change." + ) + yield {"type": "done", "ok": True, "reply": reply} diff --git a/src/api/extraction/infrastructure/ingestion_readiness_reader.py b/src/api/extraction/infrastructure/ingestion_readiness_reader.py new file mode 100644 index 000000000..a89908379 --- /dev/null +++ b/src/api/extraction/infrastructure/ingestion_readiness_reader.py @@ -0,0 +1,36 @@ +"""SQL reader for ingestion prepare counts without importing Management.""" + +from __future__ import annotations + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +from extraction.domain.value_objects import IngestionReadinessSnapshot + + +class SqlIngestionReadinessReader: + """Reads prepared data source counts from the shared data_sources table.""" + + def __init__(self, *, session: AsyncSession) -> None: + self._session = session + + async def read_for_knowledge_graph( + self, *, knowledge_graph_id: str + ) -> IngestionReadinessSnapshot: + result = await self._session.execute( + text( + """ + SELECT + COUNT(*) AS total, + COUNT(*) FILTER (WHERE last_prepared_commit IS NOT NULL) AS prepared + FROM data_sources + WHERE knowledge_graph_id = :knowledge_graph_id + """ + ), + {"knowledge_graph_id": knowledge_graph_id}, + ) + row = result.one() + return IngestionReadinessSnapshot( + data_source_count=int(row.total or 0), + prepared_source_count=int(row.prepared or 0), + ) diff --git a/src/api/extraction/ports/chat_agent.py b/src/api/extraction/ports/chat_agent.py new file mode 100644 index 000000000..5729f4b4e --- /dev/null +++ b/src/api/extraction/ports/chat_agent.py @@ -0,0 +1,23 @@ +"""Port contract for graph-management chat agent execution.""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from typing import Any, Protocol + +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import GraphManagementUiMode + + +class IExtractionChatAgent(Protocol): + """Runs one conversational turn inside a sticky session runtime.""" + + def stream_turn( + self, + *, + session: ExtractionAgentSession, + user_message: str, + ui_mode: GraphManagementUiMode, + ) -> AsyncIterator[dict[str, Any]]: + """Yield NDJSON-style event dictionaries ending with a terminal done event.""" + ... diff --git a/src/api/extraction/ports/ingestion_readiness.py b/src/api/extraction/ports/ingestion_readiness.py new file mode 100644 index 000000000..fa75f8cb9 --- /dev/null +++ b/src/api/extraction/ports/ingestion_readiness.py @@ -0,0 +1,17 @@ +"""Port for reading ingestion prepare readiness without importing Management.""" + +from __future__ import annotations + +from typing import Protocol + +from extraction.domain.value_objects import IngestionReadinessSnapshot + + +class IIngestionReadinessReader(Protocol): + """Read-only ingestion prepare counts for JobPackage gating.""" + + async def read_for_knowledge_graph( + self, *, knowledge_graph_id: str + ) -> IngestionReadinessSnapshot: + """Return data source totals and prepared counts for one knowledge graph.""" + ... diff --git a/src/api/extraction/presentation/models.py b/src/api/extraction/presentation/models.py index 214deb3d4..e912f19f2 100644 --- a/src/api/extraction/presentation/models.py +++ b/src/api/extraction/presentation/models.py @@ -13,6 +13,7 @@ BootstrapIntakePath, ExtractionSessionMode, ExtractionSessionRunMetric, + GraphManagementUiMode, ) @@ -129,3 +130,10 @@ class BootstrapIntakePathSelectionRequest(BaseModel): default=None, description="Optional user summary of capabilities and schema goals", ) + + +class ExtractionChatTurnRequest(BaseModel): + """Request model for a graph-management chat turn.""" + + message: str = Field(min_length=1) + graph_management_ui_mode: GraphManagementUiMode = GraphManagementUiMode.INITIAL_SCHEMA_DESIGN diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index 0db6810af..7ccae2a17 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -2,15 +2,22 @@ from __future__ import annotations +import json from typing import Annotated from fastapi import APIRouter, Depends, HTTPException, status +from fastapi.responses import StreamingResponse from extraction.application import ExtractionAgentSessionService -from extraction.dependencies import get_extraction_agent_session_service +from extraction.application.chat_turn_service import ExtractionChatTurnService +from extraction.dependencies import ( + get_extraction_agent_session_service, + get_extraction_chat_turn_service, +) from extraction.domain.value_objects import ExtractionSessionMode from extraction.presentation.models import ( BootstrapIntakePathSelectionRequest, + ExtractionChatTurnRequest, ExtractionSessionHistoryItemResponse, ExtractionSessionHistoryResponse, ExtractionSessionListResponse, @@ -155,6 +162,36 @@ async def clear_chat( return ExtractionSessionResponse.from_domain(session) +@router.post( + "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}/chat", +) +async def stream_chat_turn( + knowledge_graph_id: str, + mode: ExtractionSessionMode, + request: ExtractionChatTurnRequest, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[ExtractionChatTurnService, Depends(get_extraction_chat_turn_service)], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], +) -> StreamingResponse: + await _assert_kg_edit_permission( + authz=authz, + current_user=current_user, + knowledge_graph_id=knowledge_graph_id, + ) + + async def event_stream(): + async for event in service.stream_chat_turn( + user_id=current_user.user_id.value, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ui_mode=request.graph_management_ui_mode, + message=request.message, + ): + yield json.dumps(event) + "\n" + + return StreamingResponse(event_stream(), media_type="application/x-ndjson") + + @router.post( "/knowledge-graphs/{knowledge_graph_id}/sessions/schema_bootstrap/active/intake-path", response_model=ExtractionSessionResponse, diff --git a/src/api/tests/unit/extraction/application/test_chat_turn_service.py b/src/api/tests/unit/extraction/application/test_chat_turn_service.py new file mode 100644 index 000000000..a807b62a2 --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_chat_turn_service.py @@ -0,0 +1,151 @@ +"""Unit tests for ExtractionChatTurnService.""" + +from __future__ import annotations + +from dataclasses import replace + +import pytest + +from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.application.chat_turn_service import ExtractionChatTurnService +from extraction.application.skill_resolution_service import ExtractionSkillResolutionService +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import ( + ExtractionSessionMode, + GraphManagementUiMode, + IngestionReadinessSnapshot, +) +from extraction.infrastructure.deterministic_chat_agent import DeterministicExtractionChatAgent +from extraction.infrastructure.workload_runtime import InMemoryStickySessionRuntimeManager + + +class _InMemoryAgentSessionRepository: + def __init__(self) -> None: + self._sessions: dict[str, ExtractionAgentSession] = {} + + async def save(self, session: ExtractionAgentSession) -> None: + self._sessions[session.id] = replace(session) + + async def get_by_id(self, session_id: str) -> ExtractionAgentSession | None: + session = self._sessions.get(session_id) + return replace(session) if session else None + + async def find_active_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ) -> ExtractionAgentSession | None: + for session in self._sessions.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.mode == mode + and session.archived_at is None + ): + return replace(session) + return None + + async def list_by_scope( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode | None = None, + ) -> list[ExtractionAgentSession]: + return [] + + +class _StaticIngestionReadinessReader: + def __init__(self, snapshot: IngestionReadinessSnapshot) -> None: + self._snapshot = snapshot + + async def read_for_knowledge_graph( + self, *, knowledge_graph_id: str + ) -> IngestionReadinessSnapshot: + return self._snapshot + + +class _StaticSkillResolutionService: + async def resolve_for_graph_management_turn(self, **kwargs): + return type( + "_Resolved", + (), + { + "system_prompt": "system", + "prompt_hierarchy": ("platform",), + "guardrails": ("scope",), + "skills": {"ui_mode_framing": "test overlay"}, + }, + )() + + +@pytest.mark.asyncio +async def test_stream_chat_turn_persists_assistant_reply() -> None: + repo = _InMemoryAgentSessionRepository() + sticky = InMemoryStickySessionRuntimeManager() + session_service = ExtractionAgentSessionService(repository=repo) + service = ExtractionChatTurnService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader( + IngestionReadinessSnapshot(1, 1), + ), + sticky_runtime_manager=sticky, + chat_agent=DeterministicExtractionChatAgent(), + ) + + events = [ + event + async for event in service.stream_chat_turn( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + message="Help me design entity types", + ) + ] + + assert events[-1]["type"] == "done" + assert events[-1]["ok"] is True + active = await repo.find_active_by_scope("user-1", "kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP) + assert active is not None + assert active.message_history[-2]["role"] == "user" + assert active.message_history[-1]["role"] == "assistant" + assert active.runtime_context["sticky_runtime"]["container_id"] + + +@pytest.mark.asyncio +async def test_stream_chat_turn_wait_when_job_package_unprepared() -> None: + repo = _InMemoryAgentSessionRepository() + sticky = InMemoryStickySessionRuntimeManager() + session_service = ExtractionAgentSessionService(repository=repo) + service = ExtractionChatTurnService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader( + IngestionReadinessSnapshot(2, 0), + ), + sticky_runtime_manager=sticky, + chat_agent=DeterministicExtractionChatAgent(), + ) + + events = [ + event + async for event in service.stream_chat_turn( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + message="Run extraction on repo files", + ) + ] + + assert any(event.get("type") == "wait" for event in events) + done = events[-1] + assert done["ok"] is True + assert done.get("wait") is True + active = await repo.find_active_by_scope( + "user-1", "kg-1", ExtractionSessionMode.EXTRACTION_OPERATIONS + ) + assert active is not None + assert active.runtime_context["job_package"]["phase"] == "awaiting_job_package" diff --git a/src/api/tests/unit/extraction/application/test_job_package_gate.py b/src/api/tests/unit/extraction/application/test_job_package_gate.py new file mode 100644 index 000000000..96106e496 --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_job_package_gate.py @@ -0,0 +1,38 @@ +"""Unit tests for JobPackage gate resolution.""" + +from __future__ import annotations + +from extraction.application.job_package_gate import ( + IngestionReadinessSnapshot, + resolve_job_package_gate, +) +from extraction.domain.value_objects import ( + GraphManagementUiMode, + SessionJobPackagePhase, +) + + +def test_schema_design_does_not_require_job_package() -> None: + decision = resolve_job_package_gate( + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + readiness=IngestionReadinessSnapshot(0, 0), + ) + assert decision.phase == SessionJobPackagePhase.NOT_REQUIRED + + +def test_extraction_jobs_waits_without_prepared_sources() -> None: + decision = resolve_job_package_gate( + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + readiness=IngestionReadinessSnapshot(data_source_count=2, prepared_source_count=1), + ) + assert decision.phase == SessionJobPackagePhase.AWAITING_PREPARE + assert decision.wait_message is not None + assert "JobPackage" in decision.wait_message + + +def test_extraction_jobs_ready_when_all_prepared() -> None: + decision = resolve_job_package_gate( + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + readiness=IngestionReadinessSnapshot(data_source_count=2, prepared_source_count=2), + ) + assert decision.phase == SessionJobPackagePhase.READY From 8dfb95c6b1ae546b9afc86005605cb72b638d066 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Thu, 28 May 2026 22:41:21 -0400 Subject: [PATCH 062/153] feat(ui): wire Graph Management Assistant to streaming chat API (#745) Stream NDJSON chat turns with thinking/wait activity lines and reload session history after each turn. Closes #741. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../pages/knowledge-graphs/[kgId]/manage.vue | 82 +++++++++++++++---- .../app/tests/kg-extraction-chat.test.ts | 43 ++++++++++ .../knowledge-graph-manage-workspace.test.ts | 1 + src/dev-ui/app/utils/kgExtractionChat.ts | 82 +++++++++++++++++++ 4 files changed, 190 insertions(+), 18 deletions(-) create mode 100644 src/dev-ui/app/tests/kg-extraction-chat.test.ts create mode 100644 src/dev-ui/app/utils/kgExtractionChat.ts diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 2f9e65609..47e6995d8 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -96,8 +96,12 @@ import { type MutationLogEntryPreviewPage, type MutationLogRunRecord, } from '@/utils/kgMutationLogs' +import { streamExtractionChatTurn } from '@/utils/kgExtractionChat' import { useGraphApi } from '@/composables/api/useGraphApi' +const runtimeConfig = useRuntimeConfig() +const { accessToken } = useAuth() + interface WorkspaceReadinessStatus { has_minimum_entity_types: boolean has_minimum_relationship_types: boolean @@ -180,7 +184,7 @@ interface ExtractionSessionHistoryItem { const route = useRoute() const { hasTenant, tenantVersion } = useTenant() const { extractErrorMessage } = useErrorHandler() -const { apiFetch } = useApiClient() +const { apiFetch, currentTenantId } = useApiClient() const graphApi = useGraphApi() const kgId = computed(() => String(route.params.kgId ?? '')) const kgIdentity = ref<KnowledgeGraphIdentity | null>(null) @@ -441,12 +445,7 @@ const nextSteps = computed(() => { return steps }) -const sessionActivityLines = computed(() => { - const context = extractionSession.value?.runtime_context ?? {} - const candidate = context.activity_lines ?? context.ndjson_activity_lines ?? context.thinking_lines - if (!Array.isArray(candidate)) return [] - return candidate.filter((line): line is string => typeof line === 'string' && line.trim().length > 0) -}) +const sessionActivityLines = ref<string[]>([]) async function loadKgIdentity() { if (!hasTenant.value || !kgId.value) return @@ -777,6 +776,7 @@ async function loadExtractionSession() { extractionSession.value = await apiFetch<ExtractionSessionResponse>( `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/active`, ) + syncActivityLinesFromSession() sessionForbidden.value = false sessionForbiddenReason.value = null } catch (err) { @@ -871,7 +871,19 @@ function onMutationRunKeydown(event: KeyboardEvent, runId: string) { handleActivatableKeydown(event, () => selectMutationLogRun(runId)) } -function sendChatMessage(message: string) { +function syncActivityLinesFromSession() { + const context = extractionSession.value?.runtime_context ?? {} + const candidate = context.activity_lines ?? context.ndjson_activity_lines ?? context.thinking_lines + if (Array.isArray(candidate)) { + sessionActivityLines.value = candidate.filter( + (line): line is string => typeof line === 'string' && line.trim().length > 0, + ) + } else { + sessionActivityLines.value = [] + } +} + +async function sendChatMessage(message: string) { if (sessionForbidden.value || !shouldApplyMutationResult(sessionForbidden.value)) { toast.error('Chat unavailable', { description: sessionForbiddenReason.value @@ -880,21 +892,55 @@ function sendChatMessage(message: string) { return } + const trimmed = message.trim() + if (!trimmed || !kgId.value) return + sendingChat.value = true - try { - const nextHistory = appendLocalChatMessage(extractionSession.value, message) - extractionSession.value = { - ...(extractionSession.value ?? { - id: 'local-session', - runtime_context: {}, - updated_at: new Date().toISOString(), - }), - message_history: nextHistory, + sessionActivityLines.value = ['Contacting Graph Management Assistant…'] + draftMessage.value = '' + + const optimisticHistory = appendLocalChatMessage(extractionSession.value, trimmed) + extractionSession.value = { + ...(extractionSession.value ?? { + id: 'pending-session', + runtime_context: {}, updated_at: new Date().toISOString(), + }), + message_history: optimisticHistory, + updated_at: new Date().toISOString(), + } + + try { + for await (const event of streamExtractionChatTurn({ + apiBaseUrl: String(runtimeConfig.public.apiBaseUrl ?? ''), + accessToken: accessToken.value, + tenantId: currentTenantId.value, + kgId: kgId.value, + sessionMode: sharedSessionMode.value, + uiMode: graphManagementMode.value, + message: trimmed, + })) { + if (event.type === 'thinking' && Array.isArray(event.recent)) { + sessionActivityLines.value = event.recent.filter(Boolean) + } + if (event.type === 'wait') { + sessionActivityLines.value = event.message + ? [event.message] + : ['Waiting for JobPackage ingestion context…'] + } + if (event.type === 'done' && event.ok !== true) { + throw new Error(event.error?.message ?? 'Graph Management Assistant returned an error.') + } } - draftMessage.value = '' + await loadExtractionSession() + } catch (err) { + toast.error('Failed to send message', { + description: extractErrorMessage(err), + }) + await loadExtractionSession() } finally { sendingChat.value = false + syncActivityLinesFromSession() } } diff --git a/src/dev-ui/app/tests/kg-extraction-chat.test.ts b/src/dev-ui/app/tests/kg-extraction-chat.test.ts new file mode 100644 index 000000000..efdfd0a37 --- /dev/null +++ b/src/dev-ui/app/tests/kg-extraction-chat.test.ts @@ -0,0 +1,43 @@ +import { describe, expect, it } from 'vitest' +import { streamExtractionChatTurn } from '../utils/kgExtractionChat' + +describe('kgExtractionChat', () => { + it('targets the extraction chat NDJSON endpoint with UI mode in body', async () => { + const originalFetch = globalThis.fetch + const calls: Array<{ url: string; init?: RequestInit }> = [] + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + calls.push({ url: String(input), init }) + const body = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode('{"type":"done","ok":true,"reply":"hi"}\n')) + controller.close() + }, + }) + return new Response(body, { status: 200, headers: { 'Content-Type': 'application/x-ndjson' } }) + }) as typeof fetch + + try { + const events = [] + for await (const event of streamExtractionChatTurn({ + apiBaseUrl: 'http://api.test', + accessToken: 'token', + tenantId: 'tenant-1', + kgId: 'kg-1', + sessionMode: 'schema_bootstrap', + uiMode: 'initial-schema-design', + message: 'Hello', + })) { + events.push(event) + } + + expect(events).toEqual([{ type: 'done', ok: true, reply: 'hi' }]) + expect(calls[0]?.url).toContain('/extraction/knowledge-graphs/kg-1/sessions/schema_bootstrap/chat') + expect(JSON.parse(String(calls[0]?.init?.body))).toEqual({ + message: 'Hello', + graph_management_ui_mode: 'initial-schema-design', + }) + } finally { + globalThis.fetch = originalFetch + } + }) +}) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 2b0d67374..aa4ef8086 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -552,6 +552,7 @@ describe('KG-MANAGE-017 - chat input keyboard contract', () => { expect(sharedConversationPanelVue).toContain('@keydown.enter="handleComposerEnter"') expect(sharedConversationPanelVue).toContain('Shift+Enter for a new line') expect(sharedConversationPanelVue).toContain("emit('sendMessage'") + expect(manageWorkspaceVue).toContain('streamExtractionChatTurn') expect(manageWorkspaceVue).toContain('@send-message="sendChatMessage"') }) }) diff --git a/src/dev-ui/app/utils/kgExtractionChat.ts b/src/dev-ui/app/utils/kgExtractionChat.ts new file mode 100644 index 000000000..9ed1cd13a --- /dev/null +++ b/src/dev-ui/app/utils/kgExtractionChat.ts @@ -0,0 +1,82 @@ +/** Stream graph-management chat turns over NDJSON. */ + +import type { GraphManagementMode } from '@/utils/kgGraphManagement' + +export interface ExtractionChatStreamEvent { + type: 'thinking' | 'wait' | 'done' + recent?: string[] + phase?: string + message?: string + ok?: boolean + reply?: string | null + wait?: boolean + error?: { code: string; message: string } +} + +export interface StreamExtractionChatOptions { + apiBaseUrl: string + accessToken: string | null + tenantId: string | null + kgId: string + sessionMode: 'schema_bootstrap' | 'extraction_operations' + uiMode: GraphManagementMode + message: string +} + +export async function* streamExtractionChatTurn( + options: StreamExtractionChatOptions, +): AsyncGenerator<ExtractionChatStreamEvent> { + const headers: Record<string, string> = { + 'Content-Type': 'application/json', + Accept: 'application/x-ndjson', + } + if (options.accessToken) { + headers.Authorization = `Bearer ${options.accessToken}` + } + if (options.tenantId) { + headers['X-Tenant-ID'] = options.tenantId + } + + const response = await fetch( + `${options.apiBaseUrl}/extraction/knowledge-graphs/${encodeURIComponent(options.kgId)}/sessions/${options.sessionMode}/chat`, + { + method: 'POST', + headers, + body: JSON.stringify({ + message: options.message, + graph_management_ui_mode: options.uiMode, + }), + }, + ) + + if (!response.ok) { + const body = await response.text().catch(() => '') + throw new Error(body || `${response.status} ${response.statusText}`) + } + + const reader = response.body?.getReader() + if (!reader) { + throw new Error('No response body from Graph Management Assistant') + } + + const decoder = new TextDecoder() + let buffer = '' + + while (true) { + const { done, value } = await reader.read() + if (done) break + buffer += decoder.decode(value, { stream: true }) + const parts = buffer.split('\n') + buffer = parts.pop() ?? '' + for (const line of parts) { + const trimmed = line.trim() + if (!trimmed) continue + yield JSON.parse(trimmed) as ExtractionChatStreamEvent + } + } + + const tail = buffer.trim() + if (tail) { + yield JSON.parse(tail) as ExtractionChatStreamEvent + } +} From 687004b2e8bafec2e9e14fb683b7a33e31600d62 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Thu, 28 May 2026 23:34:44 -0400 Subject: [PATCH 063/153] feat(extraction): add Claude Agent SDK sticky session runtime (#742) (#746) Ship kartograph-agent-runtime container image with NDJSON turn API, mount skills and JobPackage workspaces, inject chat-scoped workload tokens, and delegate graph-management chat turns to the remote runtime when container backend is enabled. Closes #742. Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 12 + src/agent-runtime/Dockerfile | 20 + .../kartograph_agent_runtime/__init__.py | 1 + .../kartograph_agent_runtime/__main__.py | 21 + .../kartograph_agent_runtime/executor.py | 112 ++ .../kartograph_agent_runtime/server.py | 44 + .../kartograph_agent_runtime/settings.py | 23 + .../kartograph_agent_runtime/tools.py | 45 + src/agent-runtime/pyproject.toml | 21 + src/agent-runtime/tests/test_executor.py | 33 + src/agent-runtime/uv.lock | 1446 +++++++++++++++++ .../application/chat_turn_service.py | 52 +- src/api/extraction/dependencies.py | 22 +- .../container_workload_runtime.py | 52 + .../prepared_job_package_reader.py | 37 + .../remote_sticky_container_chat_agent.py | 78 + .../sticky_session_bootstrap_builder.py | 64 + .../sticky_session_workdir_materializer.py | 67 + .../infrastructure/workload_runtime.py | 51 +- .../workload_runtime_factory.py | 30 + .../workload_runtime_settings.py | 11 +- .../extraction/ports/prepared_job_packages.py | 15 + src/api/extraction/ports/runtime.py | 14 + .../ports/sticky_session_bootstrap.py | 22 + src/api/extraction/ports/workload_graph.py | 31 + src/api/extraction/presentation/__init__.py | 3 +- src/api/extraction/presentation/routes.py | 1 + .../extraction/presentation/workload_auth.py | 69 + .../presentation/workload_routes.py | 99 ++ .../extraction_workload/dependencies.py | 31 + .../extraction_workload/graph_reader.py | 61 + src/api/main.py | 6 +- .../container_runtime/cli_runtime.py | 4 + .../shared_kernel/container_runtime/ports.py | 2 + .../application/test_chat_turn_service.py | 9 + ...test_sticky_session_container_bootstrap.py | 49 + ...est_sticky_session_workdir_materializer.py | 59 + .../test_workload_credential_issuer.py | 23 + .../container_runtime/test_cli_runtime.py | 28 + 39 files changed, 2731 insertions(+), 37 deletions(-) create mode 100644 src/agent-runtime/Dockerfile create mode 100644 src/agent-runtime/kartograph_agent_runtime/__init__.py create mode 100644 src/agent-runtime/kartograph_agent_runtime/__main__.py create mode 100644 src/agent-runtime/kartograph_agent_runtime/executor.py create mode 100644 src/agent-runtime/kartograph_agent_runtime/server.py create mode 100644 src/agent-runtime/kartograph_agent_runtime/settings.py create mode 100644 src/agent-runtime/kartograph_agent_runtime/tools.py create mode 100644 src/agent-runtime/pyproject.toml create mode 100644 src/agent-runtime/tests/test_executor.py create mode 100644 src/agent-runtime/uv.lock create mode 100644 src/api/extraction/infrastructure/prepared_job_package_reader.py create mode 100644 src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py create mode 100644 src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py create mode 100644 src/api/extraction/infrastructure/sticky_session_workdir_materializer.py create mode 100644 src/api/extraction/ports/prepared_job_packages.py create mode 100644 src/api/extraction/ports/sticky_session_bootstrap.py create mode 100644 src/api/extraction/ports/workload_graph.py create mode 100644 src/api/extraction/presentation/workload_auth.py create mode 100644 src/api/extraction/presentation/workload_routes.py create mode 100644 src/api/infrastructure/extraction_workload/dependencies.py create mode 100644 src/api/infrastructure/extraction_workload/graph_reader.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py diff --git a/compose.dev.yaml b/compose.dev.yaml index ab8e5bf51..51fb575c0 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -1,5 +1,12 @@ # Development overrides for compose.yaml services: + agent-runtime: + build: + context: ./src/agent-runtime + dockerfile: Dockerfile + image: kartograph-agent-runtime:dev + profiles: ["build-only"] + api: # Run as root in dev to handle host file permissions (any umask) user: "${UID}:${GID}" @@ -7,6 +14,11 @@ services: UV_CACHE_DIR: /tmp/uv-cache KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND: container KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_ENGINE: auto + KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_NETWORK: kartograph_kartograph + KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_IMAGE: kartograph-agent-runtime:dev + KARTOGRAPH_EXTRACTION_RUNTIME_API_BASE_URL: http://api:8000 + KARTOGRAPH_EXTRACTION_RUNTIME_JOB_PACKAGE_WORK_DIR: /tmp/kartograph/job_packages + KARTOGRAPH_EXTRACTION_RUNTIME_SKILLS_DIR: /app/skills volumes: # Mount the entire app directory (minus venv) for hot-reload - ./src/api:/app:z diff --git a/src/agent-runtime/Dockerfile b/src/agent-runtime/Dockerfile new file mode 100644 index 000000000..54172815e --- /dev/null +++ b/src/agent-runtime/Dockerfile @@ -0,0 +1,20 @@ +FROM registry.access.redhat.com/ubi9/python-312:latest + +WORKDIR /runtime + +COPY --from=ghcr.io/astral-sh/uv:0.9.18 /uv /uvx /bin/ + +COPY pyproject.toml /runtime/pyproject.toml +COPY kartograph_agent_runtime /runtime/kartograph_agent_runtime + +RUN uv sync --no-dev + +ENV PATH="/runtime/.venv/bin:$PATH" \ + PYTHONUNBUFFERED=1 + +EXPOSE 8787 + +HEALTHCHECK --interval=15s --timeout=3s --start-period=10s --retries=5 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8787/health').read()" || exit 1 + +CMD ["python", "-m", "kartograph_agent_runtime"] diff --git a/src/agent-runtime/kartograph_agent_runtime/__init__.py b/src/agent-runtime/kartograph_agent_runtime/__init__.py new file mode 100644 index 000000000..89889fd90 --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/__init__.py @@ -0,0 +1 @@ +"""Kartograph sticky session Claude Agent SDK runtime.""" diff --git a/src/agent-runtime/kartograph_agent_runtime/__main__.py b/src/agent-runtime/kartograph_agent_runtime/__main__.py new file mode 100644 index 000000000..13ce20acb --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/__main__.py @@ -0,0 +1,21 @@ +"""CLI entrypoint for sticky session agent runtime.""" + +from __future__ import annotations + +import uvicorn + +from kartograph_agent_runtime.settings import AgentRuntimeSettings + + +def main() -> None: + settings = AgentRuntimeSettings() + uvicorn.run( + "kartograph_agent_runtime.server:app", + host=settings.host, + port=settings.port, + log_level="info", + ) + + +if __name__ == "__main__": + main() diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py new file mode 100644 index 000000000..935cf44e7 --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -0,0 +1,112 @@ +"""Turn execution for sticky session chat using Claude Agent SDK or fallback mode.""" + +from __future__ import annotations + +import os +from collections.abc import AsyncIterator +from typing import Any + +from kartograph_agent_runtime.settings import AgentRuntimeSettings +from kartograph_agent_runtime.tools import RuntimeTooling + + +def _build_system_prompt(agent_configuration: dict[str, Any]) -> str: + system_prompt = str(agent_configuration.get("system_prompt") or "").strip() + guardrails = agent_configuration.get("guardrails") or [] + skills = agent_configuration.get("skills") or {} + skill_lines = "\n".join(f"- {key}: {value}" for key, value in sorted(skills.items())) + guardrail_lines = "\n".join(f"- {item}" for item in guardrails if str(item).strip()) + sections = [section for section in (system_prompt, guardrail_lines, skill_lines) if section] + return "\n\n".join(sections) or "You are the Graph Management Assistant." + + +async def stream_turn_events( + *, + settings: AgentRuntimeSettings, + message: str, + ui_mode: str, + agent_configuration: dict[str, Any], + message_history: list[dict[str, Any]], +) -> AsyncIterator[dict[str, Any]]: + yield { + "type": "thinking", + "recent": [ + "Starting Claude Agent SDK runtime…", + f"Applying {ui_mode} skill overlay", + f"Workspace mounted at {settings.workspace_dir}", + ], + } + + if settings.anthropic_api_key: + async for event in _stream_with_claude_sdk( + settings=settings, + message=message, + ui_mode=ui_mode, + agent_configuration=agent_configuration, + message_history=message_history, + ): + yield event + return + + tooling = RuntimeTooling(settings=settings) + skill_keys = ", ".join(sorted(agent_configuration.get("skills", {}).keys())[:4]) or "default" + reply = ( + f"**Graph Management Assistant ({ui_mode})**\n\n" + f"I received your message with skills: {skill_keys}.\n\n" + f"> {message.strip()}\n\n" + "Claude Agent SDK is configured for this container. Set `ANTHROPIC_API_KEY` " + "to enable live model execution. Graph and mutation tools are wired via " + f"`{settings.api_base_url}` using the injected workload token." + ) + if message.lower().startswith("search graph:"): + slug = message.split(":", 1)[1].strip() + try: + graph_result = await tooling.search_graph_by_slug(slug=slug) + reply += f"\n\nGraph search returned {graph_result.get('count', 0)} node(s)." + except Exception as exc: # noqa: BLE001 + reply += f"\n\nGraph search failed: {exc}" + yield {"type": "done", "ok": True, "reply": reply} + + +async def _stream_with_claude_sdk( + *, + settings: AgentRuntimeSettings, + message: str, + ui_mode: str, + agent_configuration: dict[str, Any], + message_history: list[dict[str, Any]], +) -> AsyncIterator[dict[str, Any]]: + from claude_agent_sdk import ClaudeAgentOptions, query + + os.environ.setdefault("ANTHROPIC_API_KEY", settings.anthropic_api_key) + system_prompt = _build_system_prompt(agent_configuration) + history_lines = [ + f"{entry.get('role', 'unknown')}: {entry.get('content', '')}" + for entry in message_history[-6:] + if isinstance(entry, dict) + ] + prompt = message + if history_lines: + prompt = "Recent conversation:\n" + "\n".join(history_lines) + f"\n\nUser: {message}" + + yield { + "type": "thinking", + "recent": [ + "Claude Agent SDK query started…", + f"Mode overlay: {ui_mode}", + "Tools: graph read enclave, mutation emitter", + ], + } + + chunks: list[str] = [] + options = ClaudeAgentOptions(system_prompt=system_prompt) + async for sdk_message in query(prompt=prompt, options=options): + text = getattr(sdk_message, "result", None) or getattr(sdk_message, "content", None) + if isinstance(text, str) and text.strip(): + chunks.append(text.strip()) + + reply = chunks[-1] if chunks else ( + "Claude Agent SDK completed without a textual response. " + "Retry with a more specific graph-management request." + ) + yield {"type": "done", "ok": True, "reply": reply} diff --git a/src/agent-runtime/kartograph_agent_runtime/server.py b/src/agent-runtime/kartograph_agent_runtime/server.py new file mode 100644 index 000000000..7df4322f5 --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/server.py @@ -0,0 +1,44 @@ +"""HTTP server for sticky session agent runtime.""" + +from __future__ import annotations + +import json +from collections.abc import AsyncIterator +from typing import Any + +from fastapi import FastAPI +from fastapi.responses import StreamingResponse +from pydantic import BaseModel, Field + +from kartograph_agent_runtime.executor import stream_turn_events +from kartograph_agent_runtime.settings import AgentRuntimeSettings + +app = FastAPI(title="Kartograph Agent Runtime", version="0.1.0") +settings = AgentRuntimeSettings() + + +class TurnRequest(BaseModel): + message: str = Field(min_length=1) + ui_mode: str = Field(default="initial-schema-design") + agent_configuration: dict[str, Any] = Field(default_factory=dict) + message_history: list[dict[str, Any]] = Field(default_factory=list) + + +@app.get("/health") +async def health() -> dict[str, str]: + return {"status": "ok", "session_id": settings.session_id} + + +@app.post("/v1/turn") +async def stream_turn(request: TurnRequest) -> StreamingResponse: + async def event_stream() -> AsyncIterator[str]: + async for event in stream_turn_events( + settings=settings, + message=request.message, + ui_mode=request.ui_mode, + agent_configuration=request.agent_configuration, + message_history=request.message_history, + ): + yield json.dumps(event) + "\n" + + return StreamingResponse(event_stream(), media_type="application/x-ndjson") diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py new file mode 100644 index 000000000..f37c53ee8 --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -0,0 +1,23 @@ +"""Agent runtime settings loaded from container environment.""" + +from __future__ import annotations + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class AgentRuntimeSettings(BaseSettings): + """Runtime configuration for sticky session agent containers.""" + + model_config = SettingsConfigDict(extra="ignore") + + host: str = Field(default="0.0.0.0") + port: int = Field(default=8787) + api_base_url: str = Field(default="http://api:8000", alias="KARTOGRAPH_API_BASE_URL") + workload_token: str = Field(default="", alias="KARTOGRAPH_WORKLOAD_TOKEN") + tenant_id: str = Field(default="", alias="KARTOGRAPH_TENANT_ID") + knowledge_graph_id: str = Field(default="", alias="KARTOGRAPH_KNOWLEDGE_GRAPH_ID") + session_id: str = Field(default="", alias="KARTOGRAPH_SESSION_ID") + skills_dir: str = Field(default="/app/skills", alias="KARTOGRAPH_SKILLS_DIR") + workspace_dir: str = Field(default="/workspace", alias="KARTOGRAPH_WORKSPACE_DIR") + anthropic_api_key: str = Field(default="", alias="ANTHROPIC_API_KEY") diff --git a/src/agent-runtime/kartograph_agent_runtime/tools.py b/src/agent-runtime/kartograph_agent_runtime/tools.py new file mode 100644 index 000000000..1b544fcce --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/tools.py @@ -0,0 +1,45 @@ +"""Tool wiring for graph read enclave and mutation emitters.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import httpx + +from kartograph_agent_runtime.settings import AgentRuntimeSettings + + +@dataclass(frozen=True) +class RuntimeTooling: + """HTTP-backed tools available to the Claude agent runtime.""" + + settings: AgentRuntimeSettings + + async def search_graph_by_slug( + self, *, slug: str, entity_type: str | None = None + ) -> dict[str, Any]: + headers = {"X-Workload-Token": self.settings.workload_token} + params: dict[str, str] = {"slug": slug} + if entity_type: + params["entity_type"] = entity_type + url = f"{self.settings.api_base_url.rstrip('/')}/extraction/workloads/graph/search-by-slug" + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=headers, params=params) + response.raise_for_status() + return response.json() + + async def propose_mutation( + self, *, operation: str, summary: str, payload: dict[str, Any] | None = None + ) -> dict[str, Any]: + headers = {"X-Workload-Token": self.settings.workload_token} + url = f"{self.settings.api_base_url.rstrip('/')}/extraction/workloads/mutations/propose" + body = { + "operation": operation, + "summary": summary, + "payload": payload or {}, + } + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.post(url, headers=headers, json=body) + response.raise_for_status() + return response.json() diff --git a/src/agent-runtime/pyproject.toml b/src/agent-runtime/pyproject.toml new file mode 100644 index 000000000..e1de64bc5 --- /dev/null +++ b/src/agent-runtime/pyproject.toml @@ -0,0 +1,21 @@ +[project] +name = "kartograph-agent-runtime" +version = "0.1.0" +description = "Sticky session Claude Agent SDK runtime for Kartograph graph management chat" +requires-python = ">=3.12" +dependencies = [ + "claude-agent-sdk>=0.2.87", + "fastapi[standard]>=0.123.9", + "httpx>=0.28.1", + "pydantic-settings>=2.12.0", +] + +[dependency-groups] +dev = [ + "pytest>=9.0.1", + "pytest-asyncio>=1.3.0", +] + +[tool.pytest.ini_options] +asyncio_mode = "strict" +pythonpath = ["."] diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py new file mode 100644 index 000000000..d754d2599 --- /dev/null +++ b/src/agent-runtime/tests/test_executor.py @@ -0,0 +1,33 @@ +"""Unit tests for agent runtime executor fallback mode.""" + +from __future__ import annotations + +import pytest + +from kartograph_agent_runtime.executor import stream_turn_events +from kartograph_agent_runtime.settings import AgentRuntimeSettings + + +@pytest.mark.asyncio +async def test_stream_turn_events_without_api_key_returns_done_reply() -> None: + settings = AgentRuntimeSettings( + KARTOGRAPH_WORKLOAD_TOKEN="token", + KARTOGRAPH_API_BASE_URL="http://api:8000", + ANTHROPIC_API_KEY="", + ) + + events = [ + event + async for event in stream_turn_events( + settings=settings, + message="Design entity types", + ui_mode="initial-schema-design", + agent_configuration={"skills": {"schema_modeling": "guide"}}, + message_history=[], + ) + ] + + assert events[0]["type"] == "thinking" + assert events[-1]["type"] == "done" + assert events[-1]["ok"] is True + assert "Graph Management Assistant" in events[-1]["reply"] diff --git a/src/agent-runtime/uv.lock b/src/agent-runtime/uv.lock new file mode 100644 index 000000000..55db0662d --- /dev/null +++ b/src/agent-runtime/uv.lock @@ -0,0 +1,1446 @@ +version = 1 +revision = 3 +requires-python = ">=3.12" +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version < '3.13'", +] + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anyio" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, +] + +[[package]] +name = "attrs" +version = "26.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9a/8e/82a0fe20a541c03148528be8cac2408564a6c9a0cc7e9171802bc1d26985/attrs-26.1.0.tar.gz", hash = "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32", size = 952055, upload-time = "2026-03-19T14:22:25.026Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, +] + +[[package]] +name = "certifi" +version = "2026.5.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/ce/ee2ecad540810a79593028e88299baeae54d346cc7a0d94b6199988b89b1/certifi-2026.5.20.tar.gz", hash = "sha256:69dea482ab64caa7b9f6aba1c6bf48bb6a5448d1c0f1b17ab42ad8c763a5344d", size = 135422, upload-time = "2026-05-20T11:46:50.073Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/59/8c/57e832b7af6d7c5abe66eb3fbe3a3a32f4d11ea23a1aa7131371035be991/certifi-2026.5.20-py3-none-any.whl", hash = "sha256:3c52e209ba0a4ad7aebe60436a4ab349c39e1e602e8c134221e546902ad25897", size = 134134, upload-time = "2026-05-20T11:46:48.578Z" }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" }, + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" }, + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, +] + +[[package]] +name = "claude-agent-sdk" +version = "0.2.87" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "mcp" }, + { name = "sniffio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/26/dc/e2afd59a1dd6484b6500245fa2331a0d8c0b68e6c180bc29d8ce9540f38a/claude_agent_sdk-0.2.87.tar.gz", hash = "sha256:56f02a49a97f7be37e0cd7323494d1c09e52fb0db7ab94f53bba8a230bb4bd0e", size = 252063, upload-time = "2026-05-23T04:19:25Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/4e/b83c4c6ec1e0b63e9d4d58ba9a5abfd9936c55b8ee4c06b88f5e93bdfd70/claude_agent_sdk-0.2.87-py3-none-macosx_11_0_arm64.whl", hash = "sha256:52204a9609dec3aa96032afd48c07d72e05d13311faf614978f17b61326e6e31", size = 63037960, upload-time = "2026-05-23T04:19:29.056Z" }, + { url = "https://files.pythonhosted.org/packages/13/d7/5fb02260c5b95c66e108c35e046d4d66011921251f7896274b6b21594f14/claude_agent_sdk-0.2.87-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:1713e34e50b830ecac54386d39af14e3a2775f833f1ef715eb53566eaa1b6325", size = 65095745, upload-time = "2026-05-23T04:19:32.533Z" }, + { url = "https://files.pythonhosted.org/packages/1d/84/1061f6580bbbc78de629467abf051cdbbabe71b982297b401e3fde65c7e0/claude_agent_sdk-0.2.87-py3-none-manylinux_2_17_aarch64.whl", hash = "sha256:e9e23119d2a02ad1ea1a2707214db98f5baf2c8809577186629843ddfcb8ec18", size = 72725120, upload-time = "2026-05-23T04:19:36.539Z" }, + { url = "https://files.pythonhosted.org/packages/04/50/449f5044d76d9de18cf6a9f4b1c9386a74f41b4e2da5312df245d9dd23ef/claude_agent_sdk-0.2.87-py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:5ac525d9ae3481296df5639d005e12ce2b6b0427426991f35da64db30be25c6e", size = 72875504, upload-time = "2026-05-23T04:19:40.839Z" }, + { url = "https://files.pythonhosted.org/packages/80/dd/3f9d7c491d5a98138d293192b31cc9ed792d3552b3a7e276163d7fe2d43a/claude_agent_sdk-0.2.87-py3-none-win_amd64.whl", hash = "sha256:f34973669a1efaeb1543e7b22d7b22feefd8af2fae3adfd39181635077dae432", size = 73514880, upload-time = "2026-05-23T04:19:44.65Z" }, +] + +[[package]] +name = "click" +version = "8.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9b/98/518d8e5081007684232226f475082b30087d0f585e8457db087298259f49/click-8.4.1.tar.gz", hash = "sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96", size = 353007, upload-time = "2026-05-22T04:08:37.769Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/0d/67e5b4109ea4a837e80daa87c2c696711955e40449a97e8926672534def2/click-8.4.1-py3-none-any.whl", hash = "sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2", size = 116639, upload-time = "2026-05-22T04:08:35.26Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "cryptography" +version = "48.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/a9/db8f313fdcd85d767d4973515e1db101f9c71f95fced83233de224673757/cryptography-48.0.0.tar.gz", hash = "sha256:5c3932f4436d1cccb036cb0eaef46e6e2db91035166f1ad6505c3c9d5a635920", size = 832984, upload-time = "2026-05-04T22:59:38.133Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/3d/01f6dd9190170a5a241e0e98c2d04be3664a9e6f5b9b872cde63aff1c3dd/cryptography-48.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:0c558d2cdffd8f4bbb30fc7134c74d2ca9a476f830bb053074498fbc86f41ed6", size = 8001587, upload-time = "2026-05-04T22:57:36.803Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6e/e90527eef33f309beb811cf7c982c3aeffcce8e3edb178baa4ca3ae4a6fa/cryptography-48.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f5333311663ea94f75dd408665686aaf426563556bb5283554a3539177e03b8c", size = 4690433, upload-time = "2026-05-04T22:57:40.373Z" }, + { url = "https://files.pythonhosted.org/packages/90/04/673510ed51ddff56575f306cf1617d80411ee76831ccd3097599140efdfe/cryptography-48.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7995ef305d7165c3f11ae07f2517e5a4f1d5c18da1376a0a9ed496336b69e5f3", size = 4710620, upload-time = "2026-05-04T22:57:42.935Z" }, + { url = "https://files.pythonhosted.org/packages/14/d5/e9c4ef932c8d800490c34d8bd589d64a31d5890e27ec9e9ad532be893294/cryptography-48.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:40ba1f85eaa6959837b1d51c9767e230e14612eea4ef110ee8854ada22da1bf5", size = 4696283, upload-time = "2026-05-04T22:57:45.294Z" }, + { url = "https://files.pythonhosted.org/packages/0c/29/174b9dfb60b12d59ecfc6cfa04bc88c21b42a54f01b8aae09bb6e51e4c7f/cryptography-48.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:369a6348999f94bbd53435c894377b20ab95f25a9065c283570e70150d8abc3c", size = 5296573, upload-time = "2026-05-04T22:57:47.933Z" }, + { url = "https://files.pythonhosted.org/packages/95/38/0d29a6fd7d0d1373f0c0c88a04ba20e359b257753ac497564cd660fc1d55/cryptography-48.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a0e692c683f4df67815a2d258b324e66f4738bd7a96a218c826dce4f4bd05d8f", size = 4743677, upload-time = "2026-05-04T22:57:50.067Z" }, + { url = "https://files.pythonhosted.org/packages/30/be/eef653013d5c63b6a490529e0316f9ac14a37602965d4903efed1399f32b/cryptography-48.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:18349bbc56f4743c8b12dc32e2bccb2cf83ee8b69a3bba74ef8ae857e26b3d25", size = 4330808, upload-time = "2026-05-04T22:57:52.301Z" }, + { url = "https://files.pythonhosted.org/packages/84/9e/500463e87abb7a0a0f9f256ec21123ecde0a7b5541a15e840ea54551fd81/cryptography-48.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7e8eac43dfca5c4cccc6dad9a80504436fca53bb9bc3100a2386d730fbe6b602", size = 4695941, upload-time = "2026-05-04T22:57:54.603Z" }, + { url = "https://files.pythonhosted.org/packages/e3/dc/7303087450c2ec9e7fbb750e17c2abfbc658f23cbd0e54009509b7cc4091/cryptography-48.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9ccdac7d40688ecb5a3b4a604b8a88c8002e3442d6c60aead1db2a89a041560c", size = 5252579, upload-time = "2026-05-04T22:57:57.207Z" }, + { url = "https://files.pythonhosted.org/packages/d0/c0/7101d3b7215edcdc90c45da544961fd8ed2d6448f77577460fa75a8443f7/cryptography-48.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:bd72e68b06bb1e96913f97dd4901119bc17f39d4586a5adf2d3e47bc2b9d58b5", size = 4743326, upload-time = "2026-05-04T22:57:59.535Z" }, + { url = "https://files.pythonhosted.org/packages/ac/d8/5b833bad13016f562ab9d063d68199a4bd121d18458e439515601d3357ec/cryptography-48.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:59baa2cb386c4f0b9905bd6eb4c2a79a69a128408fd31d32ca4d7102d4156321", size = 4826672, upload-time = "2026-05-04T22:58:01.996Z" }, + { url = "https://files.pythonhosted.org/packages/98/e1/7074eb8bf3c135558c73fc2bcf0f5633f912e6fb87e868a55c454080ef09/cryptography-48.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9249e3cd978541d665967ac2cb2787fd6a62bddf1e75b3e347a594d7dacf4f74", size = 4972574, upload-time = "2026-05-04T22:58:03.968Z" }, + { url = "https://files.pythonhosted.org/packages/04/70/e5a1b41d325f797f39427aa44ef8baf0be500065ab6d8e10369d850d4a4f/cryptography-48.0.0-cp311-abi3-win32.whl", hash = "sha256:9c459db21422be75e2809370b829a87eb37f74cd785fc4aa9ea1e5f43b47cda4", size = 3294868, upload-time = "2026-05-04T22:58:06.467Z" }, + { url = "https://files.pythonhosted.org/packages/f4/ac/8ac51b4a5fc5932eb7ee5c517ba7dc8cd834f0048962b6b352f00f41ebf9/cryptography-48.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:5b012212e08b8dd5edc78ef54da83dd9892fd9105323b3993eff6bea65dc21d7", size = 3817107, upload-time = "2026-05-04T22:58:08.845Z" }, + { url = "https://files.pythonhosted.org/packages/6b/84/70e3feea9feea87fd7cbe77efb2712ae1e3e6edf10749dc6e95f4e60e455/cryptography-48.0.0-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:3cb07a3ed6431663cd321ea8a000a1314c74211f823e4177fefa2255e057d1ec", size = 7986556, upload-time = "2026-05-04T22:58:11.172Z" }, + { url = "https://files.pythonhosted.org/packages/89/6e/18e07a618bb5442ba10cf4df16e99c071365528aa570dfcb8c02e25a303b/cryptography-48.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c7378637d7d88016fa6791c159f698b3d3eed28ebf844ac36b9dc04a14dae18", size = 4684776, upload-time = "2026-05-04T22:58:13.712Z" }, + { url = "https://files.pythonhosted.org/packages/be/6a/4ea3b4c6c6759794d5ee2103c304a5076dc4b19ae1f9fe47dba439e159e9/cryptography-48.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc90c0b39b2e3c65ef52c804b72e3c58f8a04ab2a1871272798e5f9572c17d20", size = 4698121, upload-time = "2026-05-04T22:58:16.448Z" }, + { url = "https://files.pythonhosted.org/packages/2f/59/6ff6ad6cae03bb887da2a5860b2c9805f8dac969ef01ce563336c49bd1d1/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:76341972e1eff8b4bea859f09c0d3e64b96ce931b084f9b9b7db8ef364c30eff", size = 4690042, upload-time = "2026-05-04T22:58:18.544Z" }, + { url = "https://files.pythonhosted.org/packages/ca/b4/fc334ed8cfd705aca282fe4d8f5ae64a8e0f74932e9feecb344610cf6e4d/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:55b7718303bf06a5753dcdccf2f3945cf18ad7bffde41b61226e4db31ab89a9c", size = 5282526, upload-time = "2026-05-04T22:58:20.75Z" }, + { url = "https://files.pythonhosted.org/packages/11/08/9f8c5386cc4cd90d8255c7cdd0f5baf459a08502a09de30dc51f553d38dc/cryptography-48.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:a64697c641c7b1b2178e573cbc31c7c6684cd56883a478d75143dbb7118036db", size = 4733116, upload-time = "2026-05-04T22:58:23.627Z" }, + { url = "https://files.pythonhosted.org/packages/b8/77/99307d7574045699f8805aa500fa0fb83422d115b5400a064ddd306d7750/cryptography-48.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:561215ea3879cb1cbbf272867e2efda62476f240fb58c64de6b393ae19246741", size = 4316030, upload-time = "2026-05-04T22:58:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/fd/36/a608b98337af3cb2aff4818e406649d30572b7031918b04c87d979495348/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:ad64688338ed4bc1a6618076ba75fd7194a5f1797ac60b47afe926285adb3166", size = 4689640, upload-time = "2026-05-04T22:58:27.747Z" }, + { url = "https://files.pythonhosted.org/packages/dd/a6/825010a291b4438aecc1f568bc428189fc1175515223632477c07dc0a6df/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:906cbf0670286c6e0044156bc7d4af9cbb0ef6db9f73e52c3ec56ba6bdde5336", size = 5237657, upload-time = "2026-05-04T22:58:29.848Z" }, + { url = "https://files.pythonhosted.org/packages/b9/09/4e76a09b4caa29aad535ddc806f5d4c5d01885bd978bd984fbc6ca032cae/cryptography-48.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:ea8990436d914540a40ab24b6a77c0969695ed52f4a4874c5137ccf7045a7057", size = 4732362, upload-time = "2026-05-04T22:58:32.009Z" }, + { url = "https://files.pythonhosted.org/packages/18/78/444fa04a77d0cb95f417dda20d450e13c56ba8e5220fc892a1658f44f882/cryptography-48.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c18684a7f0cc9a3cb60328f496b8e3372def7c5d2df39ac267878b05565aaaae", size = 4819580, upload-time = "2026-05-04T22:58:34.254Z" }, + { url = "https://files.pythonhosted.org/packages/38/85/ea67067c70a1fd4be2c63d35eeed82658023021affccc7b17705f8527dd2/cryptography-48.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9be5aafa5736574f8f15f262adc81b2a9869e2cfe9014d52a44633905b40d52c", size = 4963283, upload-time = "2026-05-04T22:58:36.376Z" }, + { url = "https://files.pythonhosted.org/packages/75/54/cc6d0f3deac3e81c7f847e8a189a12b6cdd65059b43dad25d4316abd849a/cryptography-48.0.0-cp314-cp314t-win32.whl", hash = "sha256:c17dfe85494deaeddc5ce251aebd1d60bbe6afc8b62071bb0b469431a000124f", size = 3270954, upload-time = "2026-05-04T22:58:38.791Z" }, + { url = "https://files.pythonhosted.org/packages/49/67/cc947e288c0758a4e5473d1dcb743037ab7785541265a969240b8885441a/cryptography-48.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27241b1dc9962e056062a8eef1991d02c3a24569c95975bd2322a8a52c6e5e12", size = 3797313, upload-time = "2026-05-04T22:58:40.746Z" }, + { url = "https://files.pythonhosted.org/packages/f2/63/61d4a4e1c6b6bab6ce1e213cd36a24c415d90e76d78c5eb8577c5541d2e8/cryptography-48.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:58d00498e8933e4a194f3076aee1b4a97dfec1a6da444535755822fe5d8b0b86", size = 7983482, upload-time = "2026-05-04T22:58:43.769Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ac/f5b5995b87770c693e2596559ffafe195b4033a57f14a82268a2842953f3/cryptography-48.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:614d0949f4790582d2cc25553abd09dd723025f0c0e7c67376a1d77196743d6e", size = 4683266, upload-time = "2026-05-04T22:58:46.064Z" }, + { url = "https://files.pythonhosted.org/packages/ec/c6/8b14f67e18338fbc4adb76f66c001f5c3610b3e2d1837f268f47a347dbbb/cryptography-48.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ce4bfae76319a532a2dc68f82cc32f5676ee792a983187dac07183690e5c66f", size = 4696228, upload-time = "2026-05-04T22:58:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/ea/73/f808fbae9514bd91b47875b003f13e284c8c6bdfd904b7944e803937eec1/cryptography-48.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:2eb992bbd4661238c5a397594c83f5b4dc2bc5b848c365c8f991b6780efcc5c7", size = 4689097, upload-time = "2026-05-04T22:58:50.9Z" }, + { url = "https://files.pythonhosted.org/packages/93/01/d86632d7d28db8ae83221995752eeb6639ffb374c2d22955648cf8d52797/cryptography-48.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:22a5cb272895dce158b2cacdfdc3debd299019659f42947dbdac6f32d68fe832", size = 5283582, upload-time = "2026-05-04T22:58:53.017Z" }, + { url = "https://files.pythonhosted.org/packages/02/e1/50edc7a50334807cc4791fc4a0ce7468b4a1416d9138eab358bfc9a3d70b/cryptography-48.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2b4d59804e8408e2fea7d1fbaf218e5ec984325221db76e6a241a9abd6cdd95c", size = 4730479, upload-time = "2026-05-04T22:58:55.611Z" }, + { url = "https://files.pythonhosted.org/packages/6f/af/99a582b1b1641ff5911ac559beb45097cf79efd4ead4657f578ef1af2d47/cryptography-48.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:984a20b0f62a26f48a3396c72e4bc34c66e356d356bf370053066b3b6d54634a", size = 4326481, upload-time = "2026-05-04T22:58:57.607Z" }, + { url = "https://files.pythonhosted.org/packages/90/ee/89aa26a06ef0a7d7611788ffd571a7c50e368cc6a4d5eef8b4884e866edb/cryptography-48.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5a5ed8fde7a1d09376ca0b40e68cd59c69fe23b1f9768bd5824f54681626032a", size = 4688713, upload-time = "2026-05-04T22:59:00.077Z" }, + { url = "https://files.pythonhosted.org/packages/70/ba/bcb1b0bb7a33d4c7c0c4d4c7874b4a62ae4f56113a5f4baefa362dfb1f0f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:8cd666227ef7af430aa5914a9910e0ddd703e75f039cef0825cd0da71b6b711a", size = 5238165, upload-time = "2026-05-04T22:59:02.317Z" }, + { url = "https://files.pythonhosted.org/packages/c9/70/ca4003b1ce5ca3dc3186ada51908c8a9b9ff7d5cab83cc0d43ee14ec144f/cryptography-48.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9071196d81abc88b3516ac8cdfad32e2b66dd4a5393a8e68a961e9161ddc6239", size = 4729947, upload-time = "2026-05-04T22:59:05.255Z" }, + { url = "https://files.pythonhosted.org/packages/44/a0/4ec7cf774207905aef1a8d11c3750d5a1db805eb380ee4e16df317870128/cryptography-48.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1e2d54c8be6152856a36f0882ab231e70f8ec7f14e93cf87db8a2ed056bf160c", size = 4822059, upload-time = "2026-05-04T22:59:07.802Z" }, + { url = "https://files.pythonhosted.org/packages/1e/75/a2e55f99c16fcac7b5d6c1eb19ad8e00799854d6be5ca845f9259eae1681/cryptography-48.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a5da777e32ffed6f85a7b2b3f7c5cbc88c146bfcd0a1d7baf5fcc6c52ee35dd4", size = 4960575, upload-time = "2026-05-04T22:59:09.851Z" }, + { url = "https://files.pythonhosted.org/packages/b8/23/6e6f32143ab5d8b36ca848a502c4bcd477ae75b9e1677e3530d669062578/cryptography-48.0.0-cp39-abi3-win32.whl", hash = "sha256:77a2ccbbe917f6710e05ba9adaa25fb5075620bf3ea6fb751997875aff4ae4bd", size = 3279117, upload-time = "2026-05-04T22:59:12.019Z" }, + { url = "https://files.pythonhosted.org/packages/9d/9a/0fea98a70cf1749d41d738836f6349d97945f7c89433a259a6c2642eefeb/cryptography-48.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:16cd65b9330583e4619939b3a3843eec1e6e789744bb01e7c7e2e62e33c239c8", size = 3792100, upload-time = "2026-05-04T22:59:14.884Z" }, +] + +[[package]] +name = "detect-installer" +version = "0.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5f/ce/6897d812825e9d4c53e3c7112726e800cc5231b013b2223bf64f653ff362/detect_installer-0.1.0.tar.gz", hash = "sha256:00ad7ba0a36e3cf7d08a40d3643011746dbc112597c7d475cc91c416710ca4e7", size = 3049, upload-time = "2026-02-23T10:40:22.567Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/34/8cc73273414405086c58852916e4031812a6a30fe04c057e37ad99397b7f/detect_installer-0.1.0-py3-none-any.whl", hash = "sha256:034fb20fd665c36e6ba52b8821525ea07fb4f7f938cac459df889fb33801528a", size = 4539, upload-time = "2026-02-23T10:40:23.807Z" }, +] + +[[package]] +name = "dnspython" +version = "2.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" }, +] + +[[package]] +name = "email-validator" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" }, +] + +[[package]] +name = "fastapi" +version = "0.136.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/2d/ff8d91d7b564d464629a0fd50a4489c97fcb836ac230bf3a7269232a9b1f/fastapi-0.136.3.tar.gz", hash = "sha256:e487fae93ad408e6f47641ee4dfe389864fd7bec92e547ea8498fc13f43e83ab", size = 396410, upload-time = "2026-05-23T18:53:15.192Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/82/45359b62a067409bd929ae8a56b8ed13e5a8c8a61194b3c236920999ab83/fastapi-0.136.3-py3-none-any.whl", hash = "sha256:3d2a69bdf04b7e9f3afa292c3bc7a98816bbfafa10bc9b45f3f3700d2f761620", size = 117481, upload-time = "2026-05-23T18:53:16.924Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "email-validator" }, + { name = "fastapi-cli", extra = ["standard"] }, + { name = "fastar" }, + { name = "httpx" }, + { name = "jinja2" }, + { name = "pydantic-extra-types" }, + { name = "pydantic-settings" }, + { name = "python-multipart" }, + { name = "uvicorn", extra = ["standard"] }, +] + +[[package]] +name = "fastapi-cli" +version = "0.0.24" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "rich-toolkit" }, + { name = "typer" }, + { name = "uvicorn", extra = ["standard"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6e/58/74797ae9e4610cfa0c6b34c8309096d3b20bb29be3b8b5fbf1004d10fa5f/fastapi_cli-0.0.24.tar.gz", hash = "sha256:1afc9c9e21d7ebc8a3ca5e31790cd8d837742be7e4f8b9236e99cb3451f0de00", size = 19043, upload-time = "2026-02-24T10:45:10.476Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/4b/68f9fe268e535d79c76910519530026a4f994ce07189ac0dded45c6af825/fastapi_cli-0.0.24-py3-none-any.whl", hash = "sha256:4a1f78ed798f106b4fee85ca93b85d8fe33c0a3570f775964d37edb80b8f0edc", size = 12304, upload-time = "2026-02-24T10:45:09.552Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "fastapi-cloud-cli" }, + { name = "uvicorn", extra = ["standard"] }, +] + +[[package]] +name = "fastapi-cloud-cli" +version = "0.18.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "detect-installer" }, + { name = "fastar" }, + { name = "httpx" }, + { name = "pydantic", extra = ["email"] }, + { name = "rich-toolkit" }, + { name = "rignore" }, + { name = "sentry-sdk" }, + { name = "typer" }, + { name = "uvicorn", extra = ["standard"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7f/1d/57221a834b0f62dfa510c2b3db6e9b682cfbc280cef41919a8811ce1ff89/fastapi_cloud_cli-0.18.0.tar.gz", hash = "sha256:95f7a79200e3a90a005e068a4d8ede49d4b04accb095ccd4fd47da998fc28c74", size = 53320, upload-time = "2026-05-22T09:53:54.462Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/1e/1d54aabf71c003e89e73df92c3dfded311228e68db7cea5db90b3e0ef2b5/fastapi_cloud_cli-0.18.0-py3-none-any.whl", hash = "sha256:1f136fc651b0b6e2f4a9679e23c56e1c3be3405e74469c14ba6e2d5b87fdc113", size = 37087, upload-time = "2026-05-22T09:53:53.001Z" }, +] + +[[package]] +name = "fastar" +version = "0.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/0f/0aeb3fc50046617702acc0078b277b58367fd62eb727b9ec733ae0e8bbcc/fastar-0.11.0.tar.gz", hash = "sha256:aa7f100f7313c03fdb20f1385927ba95671071ba308ad0c1763fef295e1895ce", size = 70238, upload-time = "2026-04-13T17:11:17.143Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/06/a5773706afc8bd496769786590bbc56d2d0ee419a299cc12ea3f5717fcf3/fastar-0.11.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3c51f1c2cdddbd1420d2897ace7738e36c65e17f6ae84e0bfe763f8d1068bb97", size = 708394, upload-time = "2026-04-13T17:09:57.269Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a6/d5e2a4e48495616440a21eed07558219ca90243ad00b0502586f95bd4833/fastar-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0d9d6b052baf5380baea866675dab6ccd04ec2460d12b1c46f10ce3f4ee6a820", size = 628417, upload-time = "2026-04-13T17:09:42.145Z" }, + { url = "https://files.pythonhosted.org/packages/ab/69/9816d69ac8265c9e50456637a487ccfb7a9c566efd9dbcd673df9c2558c2/fastar-0.11.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:bd2f05666d4df7e14885b5c38fefd92a785917387513d33d837ff42ec143a22f", size = 863950, upload-time = "2026-04-13T17:09:11.506Z" }, + { url = "https://files.pythonhosted.org/packages/5b/0d/f88daad53aff2e754b6b5ff2a7113f72447a34f6ef17cc23ca99988117b7/fastar-0.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1e6e74aba1ae77ca4aedcaf1697cd413319f4c88a5ccbe5b42c709517c5097e", size = 760737, upload-time = "2026-04-13T17:07:55.958Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a6/82ef4ecd969d50d92ed3ed9dbd8fe77faa24be5e5736f716edc9f4ce8d62/fastar-0.11.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38ef77fe940bbc9b37a98bd838727f844b11731cd39358a2640ff864fb385086", size = 757603, upload-time = "2026-04-13T17:08:10.623Z" }, + { url = "https://files.pythonhosted.org/packages/03/35/50249f0d827251f8ac511495e2eacccebda80a00a0ad73e9615b8113b84f/fastar-0.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8955e61b32d6aff82c983217abf80933fd823b0e727586fc72f08043d996fd59", size = 923952, upload-time = "2026-04-13T17:08:25.526Z" }, + { url = "https://files.pythonhosted.org/packages/7b/d8/faee41659e9c379d906d24eaee6d6833ac8cfef0a5df480e5c2a8d3efb33/fastar-0.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:483532442cdb08fbff0169510224eae0836f2f672cea6aacb52847d90fefdc46", size = 816574, upload-time = "2026-04-13T17:08:56.076Z" }, + { url = "https://files.pythonhosted.org/packages/22/47/0448ea7992b997dad2bf004bfd98eca74b5858630eae080b50c7b17d9ddc/fastar-0.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef5a6071121e05d8287fc75bccb054bcbac8bb0501200a0c0a8feeace5303ea4", size = 819382, upload-time = "2026-04-13T17:09:26.66Z" }, + { url = "https://files.pythonhosted.org/packages/33/ef/0d63eb43586831b7a6f8b22c4d77125a7c594423af1f4f090fa9541b9b40/fastar-0.11.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:e45e598af5afe8412197d4786efd6cf29be02e7d3d4f6a3461149eae5d7e94f1", size = 885254, upload-time = "2026-04-13T17:08:40.9Z" }, + { url = "https://files.pythonhosted.org/packages/01/25/edd584675d69e49a165052c3ee886df1c5d574f3e7d813c990306387c623/fastar-0.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2e160919b1c47ddb8538e7e8eb4cd527281b40f0bf75110a75993838ef61f286", size = 971239, upload-time = "2026-04-13T17:10:12.997Z" }, + { url = "https://files.pythonhosted.org/packages/a5/37/e8bb24f506ba2b08fbaf36c5800e843bd4d542954e9331f00418e2d23349/fastar-0.11.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:4bb4dc0fc8f7a6807febcebce8a2f3626ba4955a9263d81ecc630aad83be84c0", size = 1035185, upload-time = "2026-04-13T17:10:30.207Z" }, + { url = "https://files.pythonhosted.org/packages/9a/bf/be753736296338149ee4cb3e92e2b5423d6ba17c7b951d15218fd7e99bbf/fastar-0.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4ec95af56aa173f6e320e1183001bf108ba59beaf13edd1fc8200648db203588", size = 1072191, upload-time = "2026-04-13T17:10:47.072Z" }, + { url = "https://files.pythonhosted.org/packages/d2/cd/a81c1aaafb5a22ce57c98ae22f39c89413ed53e4ee6e1b1444b0bd666a6c/fastar-0.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:136cf342735464091c39dc3708168f9fdeb9ebea40b1ead937c61afaf46143d9", size = 1028054, upload-time = "2026-04-13T17:11:04.293Z" }, + { url = "https://files.pythonhosted.org/packages/ec/88/1ce4eed3d70627c95f49ca017f6bbbf2ddcc4b0c601d293259de7689bc20/fastar-0.11.0-cp312-cp312-win32.whl", hash = "sha256:35f23c11b556cc4d3704587faacbc0037f7bdf6c4525cd1d09c70bda4b1c6809", size = 454198, upload-time = "2026-04-13T17:11:45.168Z" }, + { url = "https://files.pythonhosted.org/packages/8f/1d/26ce92f4331cd61a69840db9ca6115829805eec24f285481a854f578e917/fastar-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:920bc56c3c0b8a8ca492904941d1883c1c947c858cd93343356c29122a38f44c", size = 486697, upload-time = "2026-04-13T17:11:31.084Z" }, + { url = "https://files.pythonhosted.org/packages/ed/96/e6eda4480559c69b05d466e7b5ea9170e81fef3795a73e059959a3258319/fastar-0.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:395248faf89e8a6bd5dc1fd544c8465113b627cb6d7c8b296796b60ebea33593", size = 462591, upload-time = "2026-04-13T17:11:20.577Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d6/3be260037e86fb694e88d47f583bac3a0188c99cee1a6b257ac26cb6b53c/fastar-0.11.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:33f544b08b4541b678e53749b4552a44720d96761fb79c172b005b1089c443ed", size = 707975, upload-time = "2026-04-13T17:09:58.866Z" }, + { url = "https://files.pythonhosted.org/packages/e1/cd/7867aefb1784662554a335f2952c75a50f0c70585ed0d2210d6cc15e5627/fastar-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:91c1c792447e4a642745f347ff9847c52af39633071c57ee67ed53c157fc3506", size = 628460, upload-time = "2026-04-13T17:09:43.776Z" }, + { url = "https://files.pythonhosted.org/packages/e5/2b/d11d84bdd5e0e377771b955755771e3460b290da5809cb78c1b735ee2228/fastar-0.11.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:881247e6b6eaea59fc6569f9b61447aa6b9fc2ee864e048b4643d69c52745805", size = 863054, upload-time = "2026-04-13T17:09:13.048Z" }, + { url = "https://files.pythonhosted.org/packages/25/39/d3f428b318fa940b1b6e785b8d54fc895dfb5d5b945ef8d5442ffa904fb2/fastar-0.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:863b7929845c9fec92ef6c8d59579cf46af5136655e5342f8df5cebe46cab06c", size = 760247, upload-time = "2026-04-13T17:07:57.396Z" }, + { url = "https://files.pythonhosted.org/packages/9e/04/03949aee82aabb8ede06ac5a4a5579ffaf98a8fe59ce958494508ff15513/fastar-0.11.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:96b4a57df12bf3211662627a3ea29d62ecb314a2434a0d0843f9fc23e47536e5", size = 756512, upload-time = "2026-04-13T17:08:12.415Z" }, + { url = "https://files.pythonhosted.org/packages/3f/0c/2ca1ae0a3828ca51047962d932b80daca2522db73e8cb9d040cb6ebe28d5/fastar-0.11.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceef1c2c4df7b7b8ebd3f5d718bbf457b9bbdf25ce0bd07870211ec4fbd9aff4", size = 922183, upload-time = "2026-04-13T17:08:27.187Z" }, + { url = "https://files.pythonhosted.org/packages/65/68/7fe808b1f73a68e686f25434f538c6dc10ef4dfb3db0ace22cd861744bf8/fastar-0.11.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8e545918441910a779659d4759ad0eef349e935fbdb4668a666d3681567eb05", size = 816394, upload-time = "2026-04-13T17:08:57.657Z" }, + { url = "https://files.pythonhosted.org/packages/1f/17/07d086080f8a83b8d7966955e29bcdbd6a060f5bd949dc9d5abd3658cead/fastar-0.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28095bb8f821e85fc2764e1a55f03e5e2876dee2abe7cd0ee9420d929905d643", size = 818983, upload-time = "2026-04-13T17:09:28.46Z" }, + { url = "https://files.pythonhosted.org/packages/fb/e2/2c4edf0910af2e814ff6d65b77a91196d472ca8a9fb2033bd983f6856caa/fastar-0.11.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0fafb95ecbe70f666a5e9b35dd63974ccdc9bb3d99ccdbd4014a823ec3e659b5", size = 884689, upload-time = "2026-04-13T17:08:42.763Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ba/04fdcbd6558e60de4ced3b55230fac47675d181252582b2fcec3c74608e5/fastar-0.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:af48fed039b94016629dcdad1c95c90c486326dd068de2b0a4df419ee09b6821", size = 970677, upload-time = "2026-04-13T17:10:15.124Z" }, + { url = "https://files.pythonhosted.org/packages/df/b3/2b860a9658550167dbd5824c85e88d0b4b912bf493e42a6322544d6e483d/fastar-0.11.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:74cd96163f39b8638ab4e8d49708ca887959672a22871d8170d01f067319533b", size = 1034026, upload-time = "2026-04-13T17:10:32.318Z" }, + { url = "https://files.pythonhosted.org/packages/b7/9b/fa42ea1188b144bac4b1b60753dfd449974a4d5eda132029ee7711569f94/fastar-0.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4e8b993cb5613bab495ed482810bedc0986633fcb9a3b55c37ec88e0d6714f6a", size = 1071147, upload-time = "2026-04-13T17:10:48.833Z" }, + { url = "https://files.pythonhosted.org/packages/95/c8/d2e501556dca9f1fbc9246111a31792fb49ad908fa4927f34938a97a3604/fastar-0.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dfe39d91fc28e37e06162d94afe01050220edb7df554acb5b702b5503e564816", size = 1028377, upload-time = "2026-04-13T17:11:06.374Z" }, + { url = "https://files.pythonhosted.org/packages/db/33/5f11f23eca0a569cd052507bc45dda2e5468697f8665728d25be44120f7d/fastar-0.11.0-cp313-cp313-win32.whl", hash = "sha256:c5f63d4d99ff4bfb37c659982ec413358bdee747005348756cc50a04d412d989", size = 454089, upload-time = "2026-04-13T17:11:46.821Z" }, + { url = "https://files.pythonhosted.org/packages/da/2f/35ff03c939cba7a255a9132367873fec6c355fd06a7f84fedcbaf4c8129f/fastar-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:8690ed1928d31ded3ada308e1086525fb3871f5fa81e1b69601a3f7774004583", size = 486312, upload-time = "2026-04-13T17:11:32.86Z" }, + { url = "https://files.pythonhosted.org/packages/ef/71/ee9246cbfcbfd4144558f35e7e9a306ffe0a7564730a5188c45f21d2dab8/fastar-0.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:d977ded9d98a0719a305e0a4d5ee811f1d3e856d853a50acb8ae833c3cd6d5d2", size = 461975, upload-time = "2026-04-13T17:11:22.589Z" }, + { url = "https://files.pythonhosted.org/packages/7a/cd/3644c48ecac456f928c12d47ec3bed36c36555b17c3859856f1ff860265d/fastar-0.11.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:71375bd6f03c2a43eb47bd949ea38ff45434917f9cdac79675c5b9f60de4fa73", size = 707860, upload-time = "2026-04-13T17:10:00.371Z" }, + { url = "https://files.pythonhosted.org/packages/69/ca/dee04476ae3626b2b040a60ad84628f77e1ffd8444232f2426b0ca1e0d7e/fastar-0.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:eddfd9cab16e19ae247fe44bf992cb403ccfe27d3931d6de29a4695d95ad386c", size = 628216, upload-time = "2026-04-13T17:09:45.355Z" }, + { url = "https://files.pythonhosted.org/packages/dc/5e/9395c7353d079cb4f5be0f7982ce0dc9f2e7dec5fd175eef466729d6023a/fastar-0.11.0-cp314-cp314-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7c371f1d4386c699018bb64eb2fa785feacf32785559049d2bb72fe4af023f53", size = 864378, upload-time = "2026-04-13T17:09:14.611Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ba/1e4f67148223ff219612b6281a6000357abbcc2417964fa5c83f11d68fce/fastar-0.11.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cad7fa41e3e66554387481c1a09365e4638becd322904932674159d5f4046728", size = 760921, upload-time = "2026-04-13T17:07:59.138Z" }, + { url = "https://files.pythonhosted.org/packages/0f/82/09d11fb6d12f17993ffaf32ffd30c3c121a11e2966e84f19fb6f66430118/fastar-0.11.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cf36652fa71b83761717c9899b98732498f8a2cb6327ff16bbf07f6be85c3437", size = 757012, upload-time = "2026-04-13T17:08:14.186Z" }, + { url = "https://files.pythonhosted.org/packages/52/1f/5aeeacc4cb65615e2c9292cd9c5b0cd6fb6d2e6ee472ca6adc6c1b1b22ef/fastar-0.11.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f68ff8c17833053da4841720e95edde80ce45bb994b6b7d51418dddaac70ee47", size = 924510, upload-time = "2026-04-13T17:08:28.741Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1a/1e5bdabbeaf2e856928956292609f2ff6a650f94480fb8afaca30229e483/fastar-0.11.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4563ed37a12ea1cdc398af8571258d24b988bf342b7b3bf5451bd5891243280c", size = 816602, upload-time = "2026-04-13T17:08:59.461Z" }, + { url = "https://files.pythonhosted.org/packages/87/24/f960147910da3bed41a3adfcb026e17d5f50f4cf467a3324237a7088f61a/fastar-0.11.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cee63c9875cba3b70dc44338c560facc5d6e763047dcc4a30501f9a68cf5f890", size = 819452, upload-time = "2026-04-13T17:09:29.926Z" }, + { url = "https://files.pythonhosted.org/packages/cc/f4/3e77d7901d5707fd7f8a352e153c8ae09ea974e6fabad0b7c4eb9944b8d4/fastar-0.11.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:bd76bfffae6d0a91f4ac4a612f721e7aec108db97dccdd120ae063cd66959f27", size = 885254, upload-time = "2026-04-13T17:08:44.285Z" }, + { url = "https://files.pythonhosted.org/packages/47/01/1585edd5ec47782ae93cd94edf05828e0ab02ef00aec00aea4194a600464/fastar-0.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8f5b707501ec01c1bc0518f741f01d322e50c9adc19a451aa24f67a2316e9397", size = 971496, upload-time = "2026-04-13T17:10:17.024Z" }, + { url = "https://files.pythonhosted.org/packages/f1/e9/6874c9d1236ded565a0bed54b320ac9f165f287b1d89490fb70f9f323c81/fastar-0.11.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:37c0b5a88a657839aad98b0a6c9e4ac4c2c15d6b49c44ee3935c6b08e9d3e479", size = 1034685, upload-time = "2026-04-13T17:10:34.063Z" }, + { url = "https://files.pythonhosted.org/packages/14/d8/4ab20613ce2983427aee958e39be878dba874aa227c530a845e32429c4f6/fastar-0.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:6c55f536c62a6efb180c1af0d5182948bff576bbfe6276e8e1359c9c7d2215d8", size = 1072675, upload-time = "2026-04-13T17:10:50.53Z" }, + { url = "https://files.pythonhosted.org/packages/1f/ae/5ac3b7c20ce4b08f011dd2b979f96caabe64f9b10b157f211ea91bdfadca/fastar-0.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3082eeca59e189b9039335862f4c2780c0c8871d656bfdf559db4414a105b251", size = 1029330, upload-time = "2026-04-13T17:11:08.138Z" }, + { url = "https://files.pythonhosted.org/packages/8a/e7/37cd6a1d4e288292170b64e19d79ecce2a7de8bb76790323399a2abc4619/fastar-0.11.0-cp314-cp314-win32.whl", hash = "sha256:b201a0a4e29f9fec2a177e13154b8725ec65ab9f83bd6415483efaa2aa18344b", size = 453940, upload-time = "2026-04-13T17:11:48.713Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1c/795c878b1ee29d79021cf8ed81f18f2b25ccde58453b0d34b9bdc7e025ea/fastar-0.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:868fddb26072a43e870a8819134b9f80ee602931be5a76e6fb873e04da343637", size = 486334, upload-time = "2026-04-13T17:11:34.882Z" }, + { url = "https://files.pythonhosted.org/packages/ff/a4/113f104301df8bddcc0b3775b611a30cb7610baa3add933c7ccac9386467/fastar-0.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:3db39c9cc42abb0c780a26b299f24dfbc8be455985e969e15336d70d7b2f833b", size = 461534, upload-time = "2026-04-13T17:11:24.329Z" }, + { url = "https://files.pythonhosted.org/packages/5a/a6/5c5f2c2c8e0c63e56a5636ebc7721589c889e94c0092cec7eb28ae7207e6/fastar-0.11.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:49c3299dec5e125e7ebaa27545714da9c7391777366015427e0ae62d548b442b", size = 707156, upload-time = "2026-04-13T17:10:02.176Z" }, + { url = "https://files.pythonhosted.org/packages/df/f7/982c01b61f0fc135ad2b16d01e6d0ee53cf8791e68827f5f7c5a65b2e5b1/fastar-0.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3328ed1ed56d31f5198350b17dd60449b8d6b9d47abb4688bab6aef4450a165b", size = 627032, upload-time = "2026-04-13T17:09:46.978Z" }, + { url = "https://files.pythonhosted.org/packages/2b/c3/38f1dac77ae0c71c37b176277c96d830796b8ce2fe69705f917829b53829/fastar-0.11.0-cp314-cp314t-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:bd3eca3bbfec84a614bcb4143b4ad4f784d0895babc26cfc88436af88ca23c7a", size = 864403, upload-time = "2026-04-13T17:09:16.58Z" }, + { url = "https://files.pythonhosted.org/packages/6e/f0/e69c363bdb3e5a5848e937b662b5469581ee6682c51bc1c0556494773929/fastar-0.11.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff86a967acb0d621dd24063dda090daa67bf4993b9570e97fe156de88a9006ca", size = 759480, upload-time = "2026-04-13T17:08:00.599Z" }, + { url = "https://files.pythonhosted.org/packages/3b/29/4d8737590c2a6357d614d7cc7288e8f68e7e449680b8922997cc4349e65e/fastar-0.11.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:86eaf7c0e985d93a7734168be2fb232b2a8cca53e41431c2782d7c12b12c03b1", size = 756219, upload-time = "2026-04-13T17:08:15.699Z" }, + { url = "https://files.pythonhosted.org/packages/bb/ec/400de7b3b7d48801908f19cf5462177104395799472671b3e8152b2b04ca/fastar-0.11.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91f07b0b8eb67e2f177733a1f884edad7dfb9f8977ffef15927b20cb9604027d", size = 923669, upload-time = "2026-04-13T17:08:30.574Z" }, + { url = "https://files.pythonhosted.org/packages/5d/01/8926c53da923fed7ab4b96e7fbf7f73b663beb4f02095b654d6fab46f9ad/fastar-0.11.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f85c896885eb4abf1a635d54dea22cac6ae48d04fc2ea26ae652fcf1febe1220", size = 815729, upload-time = "2026-04-13T17:09:01.204Z" }, + { url = "https://files.pythonhosted.org/packages/89/f0/5fef4c7946e352651b504b1a4235dac3505e7cfd24020788ab50552e84bf/fastar-0.11.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:075c07095c8de4b774ba8f28b9c0a02b1a2cd254da50cbe464dd3bb2432e9158", size = 819812, upload-time = "2026-04-13T17:09:31.907Z" }, + { url = "https://files.pythonhosted.org/packages/b3/c8/0ebc3298b4a45e7bddc50b169ae6a6f5b80c939394d4befe6e60de535ee7/fastar-0.11.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:07f028933820c65750baf3383b807ecce1cd9385cf00ce192b79d263ad6b856c", size = 884074, upload-time = "2026-04-13T17:08:45.802Z" }, + { url = "https://files.pythonhosted.org/packages/ae/9f/7baa4cdff8d6fbca41fa5c764b48a941fed8a9ec6c4cc92de65895a28299/fastar-0.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:039f875efa0f01fa43c20bf4e2fc7305489c61d0ac76eda991acfba7820a0e63", size = 969450, upload-time = "2026-04-13T17:10:18.667Z" }, + { url = "https://files.pythonhosted.org/packages/d4/dc/1ebbfb58a47056ba866494f19efbcdd2ba2897096b94f36e796594b4d05b/fastar-0.11.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:fff12452a9a5c6814a012445f26365541cc3d99dcca61f09762e6a389f7a32ea", size = 1033775, upload-time = "2026-04-13T17:10:36.165Z" }, + { url = "https://files.pythonhosted.org/packages/c2/5f/ce4e3914066f08c99eb8c32952cc07c1a013e81b1db1b0f598130bf6b974/fastar-0.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:2bf733e09f942b6fa876efe30a90508d1f4caef5630c00fb2a84fba355873712", size = 1072158, upload-time = "2026-04-13T17:10:52.497Z" }, + { url = "https://files.pythonhosted.org/packages/03/2a/6bca72992c84151c387cc6558f3867f5ebe5fb3684ee6fa9b76280ba4b8e/fastar-0.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d1531fa848fdd3677d2dce0a4b436ea64d9ae38fb8babe2ddbc180dd153cb7a3", size = 1028577, upload-time = "2026-04-13T17:11:09.934Z" }, + { url = "https://files.pythonhosted.org/packages/83/18/7a7c15657a3da5569b26fc51cde6a80f8d84cb54b3b1aea6d74a103db4ad/fastar-0.11.0-cp314-cp314t-win32.whl", hash = "sha256:5744551bc67c6fc6581cbd0e34a0fd6e2cd0bd30b43e94b1c3119cf35064b162", size = 453601, upload-time = "2026-04-13T17:11:53.726Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d8/331b59a6de279f3ad75c10c02c40a12f21d64a437d9c3d6f1af2dcbd7a76/fastar-0.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f4ce44e3b56c47cf38244b98d29f269b259740a580c47a2552efa5b96a5458fb", size = 486436, upload-time = "2026-04-13T17:11:40.089Z" }, + { url = "https://files.pythonhosted.org/packages/6b/fd/5390ec4f49100f3ecb9968a392f9e6d039f1e3fe0ecd28443716ff01e589/fastar-0.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:76c1359314355eafbc6989f20fb1ad565a3d10200117923b9da765a17e2f6f11", size = 461049, upload-time = "2026-04-13T17:11:25.918Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httptools" +version = "0.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/e5/d471fcb0e14523fe1c3f4ba58ca52480e7bd70ad7109a3846bc75892f7fb/httptools-0.8.0.tar.gz", hash = "sha256:6b2a32f18d97e16e90827d7a819ffa8dbd8cc245fc4e1fa9d1095b54ef4bd999", size = 271342, upload-time = "2026-05-25T22:17:48.841Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/88/1d21a36da8f5cb0fa49eafd4b169eba5608d57e75bbcf61845cbc6243216/httptools-0.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:880490234c10f70a9830743097e8958d6e4b9f5a0ffc24515023afeef984054d", size = 208247, upload-time = "2026-05-25T22:17:07.843Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/cc4feea2945cb3051038f090c9b36bd5b8a9d7f5a894a506a8983e33fd1c/httptools-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5931891fb7b441b8a3853cf1b85c82c903defce084dd5f6771ca46e31bf862c5", size = 113064, upload-time = "2026-05-25T22:17:09.136Z" }, + { url = "https://files.pythonhosted.org/packages/e3/a6/febbb8b8db0f58b38e44ad6cb946e6a255ae49b55f2e8543408fb7501ccd/httptools-0.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b15fc622b0f869d19207c4089a501d9bcc63ca5e071ffdd2f03f922df882dcb2", size = 523851, upload-time = "2026-05-25T22:17:10.106Z" }, + { url = "https://files.pythonhosted.org/packages/b7/e4/f90a0df0b83beff265b7e3b65f2a4cefd95792d4be0ac3e16049f2acd3c2/httptools-0.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:425f83884fd6343828d8c565f046cb72b6d19063f6924093e11bcd8e1548cd09", size = 518842, upload-time = "2026-05-25T22:17:11.218Z" }, + { url = "https://files.pythonhosted.org/packages/9e/2d/0c9ac76dd2c893841fbf6498d6acec4f2442e1b7067f6e3e316a80e494e8/httptools-0.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ef7c3c97f4311c7be57e2986629df89d49cb434dbff78eafcd48c2bff986b15a", size = 501238, upload-time = "2026-05-25T22:17:12.728Z" }, + { url = "https://files.pythonhosted.org/packages/ca/42/906adc91ae3a5fa9c59c0a2f21c139725bd7e5b41ae6acd485cd14123ebf/httptools-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a1afd7c9fbff0d9f5d489c4ce2768bd09c84a46ddefc7161e6aa82ae35c85745", size = 509567, upload-time = "2026-05-25T22:17:13.842Z" }, + { url = "https://files.pythonhosted.org/packages/05/0b/4240efeb672751ee5b9b380cb0e3fdc050bc05f68adc7a8aefc4fcd9a69a/httptools-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:cd96f29b4bab1d42fa6e3d008711c75e0f79e94e06827330160e3a304227f150", size = 90918, upload-time = "2026-05-25T22:17:15.155Z" }, + { url = "https://files.pythonhosted.org/packages/5e/e5/8cfcabc5546e8022f168be28bcdaa128a240a0befdd03b59d558b4f18bd6/httptools-0.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:614ceea8ea606848bece2338ac03b3ce5324bcb4be8dc7d377ed708012fa4db8", size = 205148, upload-time = "2026-05-25T22:17:16.333Z" }, + { url = "https://files.pythonhosted.org/packages/2a/0e/0fb14848c19a686c8062ff9067c1a48793e3224b47bc5b201535b6036fce/httptools-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2d689918c15a013c65ef52d9fd495d766893ab831a2c8d89f2ac5940a5df847c", size = 111368, upload-time = "2026-05-25T22:17:17.586Z" }, + { url = "https://files.pythonhosted.org/packages/2e/1b/46f1cecf06b9bbde8e4b8c88034ac7908989e5ff7a3a388ef38392949c1f/httptools-0.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:eb3028cca2fc0a6d720e52ef61d8ebb62fcbfeb1de56874546d858d3f25a26b7", size = 486447, upload-time = "2026-05-25T22:17:18.564Z" }, + { url = "https://files.pythonhosted.org/packages/77/00/258bfc0837221f81d9725c45f9b948a6a6b2994a147a4fb66e85100c668f/httptools-0.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:88bdd940f2b5d487b4d032c6afa5489a7dc4694410d43de3c38c4fb3af0dc45d", size = 482448, upload-time = "2026-05-25T22:17:19.912Z" }, + { url = "https://files.pythonhosted.org/packages/04/ab/d1cef3b5523f4d272a70f42a776c3169a2dddfe3a54de4b2ce4a36341528/httptools-0.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6a43c9dd399758ccc0531acb0a3c4a6c299ee893ee9400e9c893b7bdcfae0681", size = 464460, upload-time = "2026-05-25T22:17:20.882Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/5d1d072442277bb2b3434e0e60690b8e8c23840ef7de8b6ea54040a536d3/httptools-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0770728beb05094c809b98e814edff5fef69d26ad7d21185f2f6d5884a0ba683", size = 471312, upload-time = "2026-05-25T22:17:22.085Z" }, + { url = "https://files.pythonhosted.org/packages/0d/66/b96623b27e51a68199ef4efdda0613cced9233fe3062ac74e50749c5ad37/httptools-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:7685df791fad561384bfb139e77fde27a1ffd93134e016f95a0db424ffbf77b1", size = 90117, upload-time = "2026-05-25T22:17:23.074Z" }, + { url = "https://files.pythonhosted.org/packages/1a/12/fa3fbf5f9517b273edea2dc982aa82a8c634091e67c590792b729017bc6f/httptools-0.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:de242a49b5d18e0a8776e654e9f6bf6d89f3875a5c35b425a0e7ce940feb3fd6", size = 206183, upload-time = "2026-05-25T22:17:24.004Z" }, + { url = "https://files.pythonhosted.org/packages/30/fc/5e7c4cb443370f2090a3aba0453a07384d29ff66b7435bb90e77e1037599/httptools-0.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:159e9ab5f701ccd42e555a12f1ad8ff69702910fc1c996cf2bb66e5fcb7a231b", size = 112079, upload-time = "2026-05-25T22:17:25.216Z" }, + { url = "https://files.pythonhosted.org/packages/ba/53/771bd891eb0f236f32145d6a1775777ec85745f3cc983a1f23d1a3b8ddfe/httptools-0.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c4a9f1707e4823d54dfec6c33fa3697d302aed536ed352a7ebb5a061ddb869d0", size = 481596, upload-time = "2026-05-25T22:17:26.186Z" }, + { url = "https://files.pythonhosted.org/packages/62/42/94e15bc68ce3d423243c45d7f1b0c7561f13844f97dc52ae23182fb65628/httptools-0.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d76ad7b951387e3632c8716a9bb03ac5b45c5f16119aa409db0459520887944e", size = 480865, upload-time = "2026-05-25T22:17:27.542Z" }, + { url = "https://files.pythonhosted.org/packages/1c/7c/fe2980fc03723272e30f135b62360b075f513dfe7cc73aef36c7f04012bd/httptools-0.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a3b7387147361c3fd47a0bde763c5c91b5b4cd4dc9989b8ece84ff436c99843b", size = 463189, upload-time = "2026-05-25T22:17:28.546Z" }, + { url = "https://files.pythonhosted.org/packages/15/1b/47fc5fff68acd1bfa20b4734059c9a06cadb88119dcd5258b5b0d21d91c8/httptools-0.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f256d6ce930c52ca1cb2a960b7da03548c454e7d28b06059ad41bfe789036ce0", size = 466610, upload-time = "2026-05-25T22:17:29.816Z" }, + { url = "https://files.pythonhosted.org/packages/60/bd/07b13c93ffd9bec9546e0d43f8e19378dd696dbd278511406bc07371ef1f/httptools-0.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:19d1ee275bb59ba2643ba9a3a1e51cc0c788caf2b8df506368e03f56fdd08527", size = 92705, upload-time = "2026-05-25T22:17:31.133Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c4/121648f68ce066d7bd762d6b6d97e620847642d38d54f3d90ff11d947629/httptools-0.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:de1ed58a974e75d56560acc7e7fed01a454994429456f65209789992e41f2568", size = 215023, upload-time = "2026-05-25T22:17:32.401Z" }, + { url = "https://files.pythonhosted.org/packages/b9/b0/312a062ae741ae3e8baa8c8bf20be81b2e67337b259ab4349bebc7b6142e/httptools-0.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e93c227b595c6926c1acee96891dd9da4be338cfbe82e5cd3bb9d8dd7dc4ac0b", size = 117405, upload-time = "2026-05-25T22:17:33.742Z" }, + { url = "https://files.pythonhosted.org/packages/fc/37/fccd705f795386bb05bf413012fecff2a33e5aa8c2f069096de3e9fd8702/httptools-0.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2a021c3a8e65cc125390d72f59b968afca3bdcaff25bd67965e0a055a14946ca", size = 558497, upload-time = "2026-05-25T22:17:34.732Z" }, + { url = "https://files.pythonhosted.org/packages/bd/39/f172e8003576de35f5ba77ff417cf0e34429d35dc014deef15afa337a72c/httptools-0.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48774d39cbb70e2b1f71f88852a3087ae1d3a1eb80482bb48c13067ab080c14f", size = 571585, upload-time = "2026-05-25T22:17:35.813Z" }, + { url = "https://files.pythonhosted.org/packages/3e/b9/f5564760af99f3dbbf3f9104dc00e5da27e96cf433c6bdcf77617f70bf3f/httptools-0.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:88eead8ec8680a9f146c655bc88445a325bd7921cfd8194c7337e9467282427d", size = 543297, upload-time = "2026-05-25T22:17:37.08Z" }, + { url = "https://files.pythonhosted.org/packages/99/67/8d9f2c313618e161b82f3873188e7196126da1d6e29688df40eb3997c77a/httptools-0.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2c032fa028f46871ec7e1fc59fc15e8023eab3e6bbe6ece786a1611719a5d081", size = 539535, upload-time = "2026-05-25T22:17:38.032Z" }, + { url = "https://files.pythonhosted.org/packages/48/63/b906c01e53f50d432c0defe43ce52764a111dc1bdd028bafbeb54dcfd008/httptools-0.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:384c17174464c8e873398b7af24f0b1f44d992c820328413951a625323155d77", size = 108209, upload-time = "2026-05-25T22:17:39.473Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "httpx-sse" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" }, +] + +[[package]] +name = "idna" +version = "3.17" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b9/28/99c51f664567218d824af024c0251650fb27e4ca066df188dab0769c5b91/idna-3.17.tar.gz", hash = "sha256:5eb0cb53bc467c12eadcf6de83163ad8527cec9416f44b9b61b19caedad2b87f", size = 196048, upload-time = "2026-05-28T14:32:38.55Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/a7/f76514cc40ad6234098ecdebda08732d75964776c51a42845b7da10649e2/idna-3.17-py3-none-any.whl", hash = "sha256:466e48829084efe2548012b855df21540b96f2e20e51bd124c851536556a592c", size = 65316, upload-time = "2026-05-28T14:32:37.035Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "jsonschema" +version = "4.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + +[[package]] +name = "kartograph-agent-runtime" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "claude-agent-sdk" }, + { name = "fastapi", extra = ["standard"] }, + { name = "httpx" }, + { name = "pydantic-settings" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, + { name = "pytest-asyncio" }, +] + +[package.metadata] +requires-dist = [ + { name = "claude-agent-sdk", specifier = ">=0.2.87" }, + { name = "fastapi", extras = ["standard"], specifier = ">=0.123.9" }, + { name = "httpx", specifier = ">=0.28.1" }, + { name = "pydantic-settings", specifier = ">=2.12.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "pytest", specifier = ">=9.0.1" }, + { name = "pytest-asyncio", specifier = ">=1.3.0" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/ff/7841249c247aa650a76b9ee4bbaeae59370dc8bfd2f6c01f3630c35eb134/markdown_it_py-4.2.0.tar.gz", hash = "sha256:04a21681d6fbb623de53f6f364d352309d4094dd4194040a10fd51833e418d49", size = 82454, upload-time = "2026-05-07T12:08:28.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/81/4da04ced5a082363ecfa159c010d200ecbd959ae410c10c0264a38cac0f5/markdown_it_py-4.2.0-py3-none-any.whl", hash = "sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a", size = 91687, upload-time = "2026-05-07T12:08:27.182Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + +[[package]] +name = "mcp" +version = "1.27.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "httpx" }, + { name = "httpx-sse" }, + { name = "jsonschema" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "python-multipart" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "sse-starlette" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, + { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/83/d1efe7c2980d8a3afa476f4e3d42d53dd54c0ab94c27bee5d755b45c8b73/mcp-1.27.1.tar.gz", hash = "sha256:0f47e1820f8f8f941466b39749eb1d1839a04caddca2bc60e9d46e8a99914924", size = 608458, upload-time = "2026-05-08T16:50:12.601Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/73/42d9596facebdb533b7f0b86c1b0364ef350d1f8ba78b1052e8a58b48b65/mcp-1.27.1-py3-none-any.whl", hash = "sha256:1af3c4203b329430fde7a87b4fcb6392a041f5cb851fd68fc674016ab4e7c06f", size = 216260, upload-time = "2026-05-08T16:50:10.547Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "packaging" +version = "26.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, +] + +[[package]] +name = "pydantic" +version = "2.13.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/18/a5/b60d21ac674192f8ab0ba4e9fd860690f9b4a6e51ca5df118733b487d8d6/pydantic-2.13.4.tar.gz", hash = "sha256:c40756b57adaa8b1efeeced5c196f3f3b7c435f90e84ea7f443901bec8099ef6", size = 844775, upload-time = "2026-05-06T13:43:05.343Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/7b/122376b1fd3c62c1ed9dc80c931ace4844b3c55407b6fb2d199377c9736f/pydantic-2.13.4-py3-none-any.whl", hash = "sha256:45a282cde31d808236fd7ea9d919b128653c8b38b393d1c4ab335c62924d9aba", size = 472262, upload-time = "2026-05-06T13:43:02.641Z" }, +] + +[package.optional-dependencies] +email = [ + { name = "email-validator" }, +] + +[[package]] +name = "pydantic-core" +version = "2.46.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/56/921726b776ace8d8f5db44c4ef961006580d91dc52b803c489fafd1aa249/pydantic_core-2.46.4.tar.gz", hash = "sha256:62f875393d7f270851f20523dd2e29f082bcc82292d66db2b64ea71f64b6e1c1", size = 471464, upload-time = "2026-05-06T13:37:06.98Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/8c/af022f0af448d7747c5154288d46b5f2bc5f17366eaa0e23e9aa04d59f3b/pydantic_core-2.46.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3245406455a5d98187ec35530fd772b1d799b26667980872c8d4614991e2c4a2", size = 2106158, upload-time = "2026-05-06T13:38:57.215Z" }, + { url = "https://files.pythonhosted.org/packages/19/95/6195171e385007300f0f5574592e467c568becce2d937a0b6804f218bc49/pydantic_core-2.46.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:962ccbab7b642487b1d8b7df90ef677e03134cf1fd8880bf698649b22a69371f", size = 1951724, upload-time = "2026-05-06T13:37:02.697Z" }, + { url = "https://files.pythonhosted.org/packages/8e/bc/f47d1ff9cbb1620e1b5b697eef06010035735f07820180e74178226b27b3/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8233f2947cf85404441fd7e0085f53b10c93e0ee78611099b5c7237e36aacbf7", size = 1975742, upload-time = "2026-05-06T13:37:09.448Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/9b9a5b0306345664a2da6410877af6e8082481b5884b3ddd78d47c6013ce/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a233125ac121aa3ffba9a2b59edfc4a985a76092dc8279586ab4b71390875e7", size = 2052418, upload-time = "2026-05-06T13:37:38.234Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b7/a65fec226f5d78fc39f4a13c4cc0c768c22b113438f60c14adc9d2865038/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b712b53160b79a5850310b912a5ef8e57e56947c8ad690c227f5c9d7e561712", size = 2232274, upload-time = "2026-05-06T13:38:27.753Z" }, + { url = "https://files.pythonhosted.org/packages/68/f0/92039db98b907ef49269a8271f67db9cb78ae2fc68062ef7e4e77adb5f61/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9401557acd873c3a7f3eb9383edef8ac4968f9510e340f4808d427e75667e7b4", size = 2309940, upload-time = "2026-05-06T13:38:05.353Z" }, + { url = "https://files.pythonhosted.org/packages/5f/97/2aab507d3d00ca626e8e57c1eac6a79e4e5fbcc63eb99733ff55d1717f65/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:926c9541b14b12b1681dca8a0b75feb510b06c6341b70a8e500c2fdcff837cce", size = 2094516, upload-time = "2026-05-06T13:39:10.577Z" }, + { url = "https://files.pythonhosted.org/packages/22/37/a8aca44d40d737dde2bc05b3c6c07dff0de07ce6f82e9f3167aeaf4d5dea/pydantic_core-2.46.4-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:56cb4851bcaf3d117eddcef4fe66afd750a50274b0da8e22be256d10e5611987", size = 2136854, upload-time = "2026-05-06T13:40:22.59Z" }, + { url = "https://files.pythonhosted.org/packages/24/99/fcef1b79238c06a8cbec70819ac722ba76e02bc8ada9b0fd66eba40da01b/pydantic_core-2.46.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c68fcd102d71ea85c5b2dfac3f4f8476eff42a9e078fd5faefff6d145063536b", size = 2180306, upload-time = "2026-05-06T13:40:10.666Z" }, + { url = "https://files.pythonhosted.org/packages/ae/6c/fc44000918855b42779d007ae63b0532794739027b2f417321cddbc44f6a/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b2f69dec1725e79a012d920df1707de5caf7ed5e08f3be4435e25803efc47458", size = 2190044, upload-time = "2026-05-06T13:40:43.231Z" }, + { url = "https://files.pythonhosted.org/packages/6b/65/d9cadc9f1920d7a127ad2edba16c1db7916e59719285cd6c94600b0080ba/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:8d0820e8192167f80d88d64038e609c31452eeca865b4e1d9950a27a4609b00b", size = 2329133, upload-time = "2026-05-06T13:39:57.365Z" }, + { url = "https://files.pythonhosted.org/packages/d0/cf/c873d91679f3a30bcf5e7ac280ce5573483e72295307685120d0d5ad3416/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fbdb89b3e1c94a30cc5edfce477c6e6a5dc4d8f84665b455c27582f211a1c72c", size = 2374464, upload-time = "2026-05-06T13:38:06.976Z" }, + { url = "https://files.pythonhosted.org/packages/47/bd/6f2fc8188f31bf10590f1e98e7b306336161fac930a8c514cd7bd828c7dc/pydantic_core-2.46.4-cp312-cp312-win32.whl", hash = "sha256:9aa768456404a8bf48a4406685ac2bec8e72b62c69313734fa3b73cf33b3a894", size = 1974823, upload-time = "2026-05-06T13:40:47.985Z" }, + { url = "https://files.pythonhosted.org/packages/40/8c/985c1d41ea1107c2534abd9870e4ed5c8e7669b5c308297835c001e7a1c4/pydantic_core-2.46.4-cp312-cp312-win_amd64.whl", hash = "sha256:e9c26f834c65f5752f3f06cb08cb86a913ceb7274d0db6e267808a708b46bc89", size = 2072919, upload-time = "2026-05-06T13:39:21.153Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ba/f463d006e0c47373ca7ec5e1a261c59dc01ef4d62b2657af925fb0deee3a/pydantic_core-2.46.4-cp312-cp312-win_arm64.whl", hash = "sha256:4fc73cb559bdb54b1134a706a2802a4cddd27a0633f5abb7e53056268751ac6a", size = 2027604, upload-time = "2026-05-06T13:39:03.753Z" }, + { url = "https://files.pythonhosted.org/packages/51/a2/5d30b469c5267a17b39dec53208222f76a8d351dfac4af661888c5aee77d/pydantic_core-2.46.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:5d5902252db0d3cedf8d4a1bc68f70eeb430f7e4c7104c8c476753519b423008", size = 2106306, upload-time = "2026-05-06T13:37:48.029Z" }, + { url = "https://files.pythonhosted.org/packages/c1/81/4fa520eaffa8bd7d1525e644cd6d39e7d60b1592bc5b516693c7340b50f1/pydantic_core-2.46.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94f0688e7b8d0a67abf40e57a7eaaecd17cc9586706a31b76c031f63df052b4", size = 1951906, upload-time = "2026-05-06T13:37:17.012Z" }, + { url = "https://files.pythonhosted.org/packages/03/d5/fd02da45b659668b05923b17ba3a0100a0a3d5541e3bd8fcc4ecb711309e/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f027324c56cd5406ca49c124b0db10e56c69064fec039acc571c29020cc87c76", size = 1976802, upload-time = "2026-05-06T13:37:35.113Z" }, + { url = "https://files.pythonhosted.org/packages/21/f2/95727e1368be3d3ed485eaab7adbd7dda408f33f7a36e8b48e0144002b91/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e739fee756ba1010f8bcccb534252e85a35fe45ae92c295a06059ce58b74ccd3", size = 2052446, upload-time = "2026-05-06T13:37:12.313Z" }, + { url = "https://files.pythonhosted.org/packages/9c/86/5d99feea3f77c7234b8718075b23db11532773c1a0dbd9b9490215dc2eeb/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d56801be94b86a9da183e5f3766e6310752b99ff647e38b09a9500d88e46e76", size = 2232757, upload-time = "2026-05-06T13:39:01.149Z" }, + { url = "https://files.pythonhosted.org/packages/d2/3a/508ac615935ef7588cf6d9e9b91309fdc2da751af865e02a9098de88258c/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2412e734dcb48da14d4e4006b82b46b74f2518b8a26ee7e58c6844a6cd6d03c4", size = 2309275, upload-time = "2026-05-06T13:37:41.406Z" }, + { url = "https://files.pythonhosted.org/packages/07/f8/41db9de19d7987d6b04715a02b3b40aea467000275d9d758ffaa31af7d50/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9551187363ffc0de2a00b2e47c25aeaeb1020b69b668762966df15fc5659dd5a", size = 2094467, upload-time = "2026-05-06T13:39:18.847Z" }, + { url = "https://files.pythonhosted.org/packages/2c/e2/f35033184cb11d0052daf4416e8e10a502ea2ac006fc4f459aee872727d1/pydantic_core-2.46.4-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0186750b482eefa11d7f435892b09c5c606193ef3375bcf94aa00ae6bfb66262", size = 2134417, upload-time = "2026-05-06T13:40:17.944Z" }, + { url = "https://files.pythonhosted.org/packages/7e/7b/6ceeb1cc90e193862f444ebe373d8fdf613f0a82572dde03fb10734c6c71/pydantic_core-2.46.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5855698a4856556d86e8e6cd8434bc3ac0314ee8e12089ae0e143f64c6256e4e", size = 2179782, upload-time = "2026-05-06T13:40:32.618Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f2/c8d7773ede6af08036423a00ae0ceffce266c3c52a096c435d68c896083f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cbaf13819775b7f769bf4a1f066cb6df7a28d4480081a589828ef190226881cd", size = 2188782, upload-time = "2026-05-06T13:36:51.018Z" }, + { url = "https://files.pythonhosted.org/packages/59/31/0c864784e31f09f05cdd87606f08923b9c9e7f6e51dd27f20f62f975ce9f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:633147d34cf4550417f12e2b1a0383973bdf5cdfde212cb09e9a581cf10820be", size = 2328334, upload-time = "2026-05-06T13:40:37.764Z" }, + { url = "https://files.pythonhosted.org/packages/c2/eb/4f6c8a41efa30baa755590f4141abf3a8c370fab610915733e74134a7270/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:82cf5301172168103724d49a1444d3378cb20cdee30b116a1bd6031236298a5d", size = 2372986, upload-time = "2026-05-06T13:39:34.152Z" }, + { url = "https://files.pythonhosted.org/packages/5b/24/b375a480d53113860c299764bfe9f349a3dc9108b3adc0d7f0d786492ebf/pydantic_core-2.46.4-cp313-cp313-win32.whl", hash = "sha256:9fa8ae11da9e2b3126c6426f147e0fba88d96d65921799bb30c6abd1cb2c97fb", size = 1973693, upload-time = "2026-05-06T13:37:55.072Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e8/cff247591966f2d22ec8c003cd7587e27b7ba7b81ab2fb888e3ab75dc285/pydantic_core-2.46.4-cp313-cp313-win_amd64.whl", hash = "sha256:6b3ace8194b0e5204818c92802dcdca7fc6d88aabbb799d7c795540d9cd6d292", size = 2071819, upload-time = "2026-05-06T13:38:49.139Z" }, + { url = "https://files.pythonhosted.org/packages/c6/1a/f4aee670d5670e9e148e0c82c7db98d780be566c6e6a97ee8035528ca0b3/pydantic_core-2.46.4-cp313-cp313-win_arm64.whl", hash = "sha256:184c081504d17f1c1066e430e117142b2c77d9448a97f7b65c6ac9fd9aee238d", size = 2027411, upload-time = "2026-05-06T13:40:45.796Z" }, + { url = "https://files.pythonhosted.org/packages/8d/74/228a26ddad29c6672b805d9fd78e8d251cd04004fa7eed0e622096cd0250/pydantic_core-2.46.4-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:428e04521a40150c85216fc8b85e8d39fece235a9cf5e383761238c7fa9b96fb", size = 2102079, upload-time = "2026-05-06T13:38:41.019Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/8970b150a4b4365623ae00fc88603491f763c627311ae8031e3111356d6e/pydantic_core-2.46.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23ace664830ee0bfe014a0c7bc248b1f7f25ed7ad103852c317624a1083af462", size = 1952179, upload-time = "2026-05-06T13:36:59.812Z" }, + { url = "https://files.pythonhosted.org/packages/95/30/5211a831ae054928054b2f79731661087a2bc5c01e825c672b3a4a8f1b3e/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce5c1d2a8b27468f433ca974829c44060b8097eedc39933e3c206a90ee49c4a9", size = 1978926, upload-time = "2026-05-06T13:37:39.933Z" }, + { url = "https://files.pythonhosted.org/packages/57/e9/689668733b1eb67adeef047db3c2e8788fcf65a7fd9c9e2b46b7744fe245/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7283d57845ecf5a163403eb0702dfc220cc4fbdd18919cb5ccea4f95ee1cdab4", size = 2046785, upload-time = "2026-05-06T13:38:01.995Z" }, + { url = "https://files.pythonhosted.org/packages/60/d9/6715260422ff50a2109878fd24d948a6c3446bb2664f34ee78cd972b3acd/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8daafc69c93ee8a0204506a3b6b30f586ef54028f52aeeeb5c4cfc5184fd5914", size = 2228733, upload-time = "2026-05-06T13:40:50.371Z" }, + { url = "https://files.pythonhosted.org/packages/18/ae/fdb2f64316afca925640f8e70bb1a564b0ec2721c1389e25b8eb4bf9a299/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd2213145bcc2ba85884d0ac63d222fece9209678f77b9b4d76f054c561adb28", size = 2307534, upload-time = "2026-05-06T13:37:21.531Z" }, + { url = "https://files.pythonhosted.org/packages/89/1d/8eff589b45bb8190a9d12c49cfad0f176a5cbd1534908a6b5125e2886239/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a5f930472650a82629163023e630d160863fce524c616f4e5186e5de9d9a49b", size = 2099732, upload-time = "2026-05-06T13:39:31.942Z" }, + { url = "https://files.pythonhosted.org/packages/06/d5/ee5a3366637fee41dee51a1fc91562dcf12ddbc68fda34e6b253da2324bb/pydantic_core-2.46.4-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:c1b3f518abeca3aa13c712fd202306e145abf59a18b094a6bafb2d2bbf59192c", size = 2129627, upload-time = "2026-05-06T13:37:25.033Z" }, + { url = "https://files.pythonhosted.org/packages/94/33/2414be571d2c6a6c4d08be21f9292b6d3fdb08949a97b6dfe985017821db/pydantic_core-2.46.4-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a7dd0b3ee80d90150e3495a3a13ac34dbcbfd4f012996a6a1d8900e91b5c0fb", size = 2179141, upload-time = "2026-05-06T13:37:14.046Z" }, + { url = "https://files.pythonhosted.org/packages/7b/79/7daa95be995be0eecc4cf75064cb33f9bbbfe3fe0158caf2f0d4a996a5c7/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:3fb702cd90b0446a3a1c5e470bfa0dd23c0233b676a9099ddcc964fa6ca13898", size = 2184325, upload-time = "2026-05-06T13:36:53.615Z" }, + { url = "https://files.pythonhosted.org/packages/9f/cb/d0a382f5c0de8a222dc61c65348e0ce831b1f68e0a018450d31c2cace3a5/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:b8458003118a712e66286df6a707db01c52c0f52f7db8e4a38f0da1d3b94fc4e", size = 2323990, upload-time = "2026-05-06T13:40:29.971Z" }, + { url = "https://files.pythonhosted.org/packages/05/db/d9ba624cc4a5aced1598e88c04fdbd8310c8a69b9d38b9a3d39ce3a61ed7/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:372429a130e469c9cd698925ce5fc50940b7a1336b0d82038e63d5bbc4edc519", size = 2369978, upload-time = "2026-05-06T13:37:23.027Z" }, + { url = "https://files.pythonhosted.org/packages/f2/20/d15df15ba918c423461905802bfd2981c3af0bfa0e40d05e13edbfa48bc3/pydantic_core-2.46.4-cp314-cp314-win32.whl", hash = "sha256:85bb3611ff1802f3ee7fdd7dbff26b56f343fb432d57a4728fdd49b6ef35e2f4", size = 1966354, upload-time = "2026-05-06T13:38:03.499Z" }, + { url = "https://files.pythonhosted.org/packages/fc/b6/6b8de4c0a7d7ab3004c439c80c5c1e0a3e8d78bbae19379b01960383d9e5/pydantic_core-2.46.4-cp314-cp314-win_amd64.whl", hash = "sha256:811ff8e9c313ab425368bcbb36e5c4ebd7108c2bbf4e4089cfbb0b01eff63fac", size = 2072238, upload-time = "2026-05-06T13:39:40.807Z" }, + { url = "https://files.pythonhosted.org/packages/32/36/51eb763beec1f4cf59b1db243a7dcc39cbb41230f050a09b9d69faaf0a48/pydantic_core-2.46.4-cp314-cp314-win_arm64.whl", hash = "sha256:bfec22eab3c8cc2ceec0248aec886624116dc079afa027ecc8ad4a7e62010f8a", size = 2018251, upload-time = "2026-05-06T13:37:26.72Z" }, + { url = "https://files.pythonhosted.org/packages/e8/91/855af51d625b23aa987116a19e231d2aaef9c4a415273ddc189b79a45fee/pydantic_core-2.46.4-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:af8244b2bef6aaad6d92cda81372de7f8c8d36c9f0c3ea36e827c60e7d9467a0", size = 2099593, upload-time = "2026-05-06T13:39:47.682Z" }, + { url = "https://files.pythonhosted.org/packages/fb/1b/8784a54c65edb5f49f0a14d6977cf1b209bba85a4c77445b255c2de58ab3/pydantic_core-2.46.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a4330cdbc57162e4b3aa303f588ba752257694c9c9be3e7ebb11b4aca659b5d", size = 1935226, upload-time = "2026-05-06T13:40:40.428Z" }, + { url = "https://files.pythonhosted.org/packages/e8/e7/1955d28d1afc56dd4b3ad7cc0cf39df1b9852964cf16e5d13912756d6d6b/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29c61fc04a3d840155ff08e475a04809278972fe6aef51e2720554e96367e34b", size = 1974605, upload-time = "2026-05-06T13:37:32.029Z" }, + { url = "https://files.pythonhosted.org/packages/93/e2/3fedbf0ba7a22850e6e9fd78117f1c0f10f950182344d8a6c535d468fdd8/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c50f2528cf200c5eed56faf3f4e22fcd5f38c157a8b78576e6ba3168ec35f000", size = 2030777, upload-time = "2026-05-06T13:38:55.239Z" }, + { url = "https://files.pythonhosted.org/packages/f8/61/46be275fcaaba0b4f5b9669dd852267ce1ff616592dccf7a7845588df091/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0cbe8b01f948de4286c74cdd6c667aceb38f5c1e26f0693b3983d9d74887c65e", size = 2236641, upload-time = "2026-05-06T13:37:08.096Z" }, + { url = "https://files.pythonhosted.org/packages/60/db/12e93e46a8bac9988be3c016860f83293daea8c716c029c9ace279036f2f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:617d7e2ca7dcb8c5cf6bcb8c59b8832c94b36196bbf1cbd1bfb56ed341905edd", size = 2286404, upload-time = "2026-05-06T13:40:20.221Z" }, + { url = "https://files.pythonhosted.org/packages/e2/4a/4d8b19008f38d31c53b8219cfedc2e3d5de5fe99d90076b7e767de29274f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7027560ee92211647d0d34e3f7cd6f50da56399d26a9c8ad0da286d3869a53f3", size = 2109219, upload-time = "2026-05-06T13:38:12.153Z" }, + { url = "https://files.pythonhosted.org/packages/88/70/3cbc40978fefb7bb09c6708d40d4ad1a5d70fd7213c3d17f971de868ec1f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:f99626688942fb746e545232e7726926f3be91b5975f8b55327665fafda991c7", size = 2110594, upload-time = "2026-05-06T13:40:02.971Z" }, + { url = "https://files.pythonhosted.org/packages/9d/20/b8d36736216e29491125531685b2f9e61aa5b4b2599893f8268551da3338/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc3e9034a63de20e15e8ade85358bc6efc614008cab72898b4b4952bea0509ff", size = 2159542, upload-time = "2026-05-06T13:39:27.506Z" }, + { url = "https://files.pythonhosted.org/packages/1d/a2/367df868eb584dacf6bf82a389272406d7178e301c4ac82545ab98bc2dd9/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:97e7cf2be5c77b7d1a9713a05605d49460d02c6078d38d8bef3cbe323c548424", size = 2168146, upload-time = "2026-05-06T13:38:31.93Z" }, + { url = "https://files.pythonhosted.org/packages/c1/b8/4460f77f7e201893f649a29ab355dddd3beee8a97bcb1a320db414f9a06e/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:3bf92c5d0e00fefaab325a4d27828fe6b6e2a21848686b5b60d2d9eeb09d76c6", size = 2306309, upload-time = "2026-05-06T13:37:44.717Z" }, + { url = "https://files.pythonhosted.org/packages/64/c4/be2639293acd87dc8ddbcec41a73cee9b2ebf996fe6d892a1a74e88ad3f7/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:3ecbc122d18468d06ca279dc26a8c2e2d5acb10943bb35e36ae92096dc3b5565", size = 2369736, upload-time = "2026-05-06T13:37:05.645Z" }, + { url = "https://files.pythonhosted.org/packages/30/a6/9f9f380dbb301f67023bf8f707aaa75daadf84f7152d95c410fd7e81d994/pydantic_core-2.46.4-cp314-cp314t-win32.whl", hash = "sha256:e846ae7835bf0703ae43f534ab79a867146dadd59dc9ca5c8b53d5c8f7c9ef02", size = 1955575, upload-time = "2026-05-06T13:38:51.116Z" }, + { url = "https://files.pythonhosted.org/packages/40/1f/f1eb9eb350e795d1af8586289746f5c5677d16043040d63710e22abc43c9/pydantic_core-2.46.4-cp314-cp314t-win_amd64.whl", hash = "sha256:2108ba5c1c1eca18030634489dc544844144ee36357f2f9f780b93e7ddbb44b5", size = 2051624, upload-time = "2026-05-06T13:38:21.672Z" }, + { url = "https://files.pythonhosted.org/packages/f6/d2/42dd53d0a85c27606f316d3aa5d2869c4e8470a5ed6dec30e4a1abe19192/pydantic_core-2.46.4-cp314-cp314t-win_arm64.whl", hash = "sha256:4fcbe087dbc2068af7eda3aa87634eba216dbda64d1ae73c8684b621d33f6596", size = 2017325, upload-time = "2026-05-06T13:40:52.723Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1d/8987ad40f65ae1432753072f214fb5c74fe47ffbd0698bb9cbbb585664f8/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:1d8ba486450b14f3b1d63bc521d410ec7565e52f887b9fb671791886436a42f7", size = 2095527, upload-time = "2026-05-06T13:39:52.283Z" }, + { url = "https://files.pythonhosted.org/packages/64/d3/84c282a7eee1d3ac4c0377546ef5a1ea436ce26840d9ac3b7ed54a377507/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:3009f12e4e90b7f88b4f9adb1b0c4a3d58fe7820f3238c190047209d148026df", size = 1936024, upload-time = "2026-05-06T13:40:15.671Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ca/eac61596cdeb4d7e174d3dc0bd8a6238f14f75f97a24e7b7db4c7e7340a0/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad785e92e6dc634c21555edc8bd6b64957ab844541bcb96a1366c202951ae526", size = 1990696, upload-time = "2026-05-06T13:38:34.717Z" }, + { url = "https://files.pythonhosted.org/packages/fa/c3/7c8b240552251faf6b3a957db200fcfbbcec36763c050428b601e0c9b83b/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00c603d540afdd6b80eb39f078f33ebd46211f02f33e34a32d9f053bba711de0", size = 2147590, upload-time = "2026-05-06T13:39:29.883Z" }, +] + +[[package]] +name = "pydantic-extra-types" +version = "2.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/71/dba38ee2651f84f7842206adbd2233d8bbdb59fb85e9fa14232486a8c471/pydantic_extra_types-2.11.1.tar.gz", hash = "sha256:46792d2307383859e923d8fcefa82108b1a141f8a9c0198982b3832ab5ef1049", size = 172002, upload-time = "2026-03-16T08:08:03.92Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/c1/3226e6d7f5a4f736f38ac11a6fbb262d701889802595cdb0f53a885ac2e0/pydantic_extra_types-2.11.1-py3-none-any.whl", hash = "sha256:1722ea2bddae5628ace25f2aa685b69978ef533123e5638cfbddb999e0100ec1", size = 79526, upload-time = "2026-03-16T08:08:02.533Z" }, +] + +[[package]] +name = "pydantic-settings" +version = "2.14.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/07/60/1d1e59c9c90d54591469ada7d268251f71c24bdb765f1a8a832cee8c6653/pydantic_settings-2.14.1.tar.gz", hash = "sha256:e874d3bec7e787b0c9958277956ed9b4dd5de6a80e162188fdaff7c5e26fd5fa", size = 235551, upload-time = "2026-05-08T13:40:06.542Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/8d/f1af3832f5e6eb13ba94ee809e72b8ecb5eef226d27ee0bef7d963d943c7/pydantic_settings-2.14.1-py3-none-any.whl", hash = "sha256:6e3c7edfd8277687cdc598f56e5cff0e9bfff0910a3749deaa8d4401c3a2b9de", size = 60964, upload-time = "2026-05-08T13:40:04.958Z" }, +] + +[[package]] +name = "pygments" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, +] + +[[package]] +name = "pyjwt" +version = "2.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/81/58d0ac84e1ef3a3843791d6954d94c0b33d526c75eeb1efbce9d0a4c4077/pyjwt-2.13.0.tar.gz", hash = "sha256:41571c89ca91598c79e8ef18a2d07367d4810fbbd6f637794879baf1b7703423", size = 107515, upload-time = "2026-05-21T19:54:36.618Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/5e/ecf12fdb62546d64385c158514e9b2b671f7832108ef2ecd2020ce0af2d1/pyjwt-2.13.0-py3-none-any.whl", hash = "sha256:66adcc2aff09b3f1bbd95fc1e1577df8ac8723c978552fd43304c8a290ac5728", size = 31274, upload-time = "2026-05-21T19:54:35.362Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + +[[package]] +name = "pytest" +version = "9.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/7c/d36d04db312ecf4298932ef77e6e4a9e8ad017906e24e34f0b0c361a2473/pytest_asyncio-1.4.0.tar.gz", hash = "sha256:c6c0d2259945122819f171a32ecea2c349ead889ee28176caaf492143424be42", size = 58514, upload-time = "2026-05-26T09:56:04.083Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/e2/08a497ef684b88559c9cc5f4ad53a37e7b99e727094a86d6ea32536d5d3c/pytest_asyncio-1.4.0-py3-none-any.whl", hash = "sha256:933ca923a23075a87fb7070c0ec272a6848489824d887c85c812670932835aa1", size = 16930, upload-time = "2026-05-26T09:56:02.576Z" }, +] + +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + +[[package]] +name = "python-multipart" +version = "0.0.29" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/fe/70bd71a6738b09a0bdf6480ca6436b167469ca4578b2a0efbe390b4b0e70/python_multipart-0.0.29.tar.gz", hash = "sha256:643e93849196645e2dbdd81a0f8829a23123ad7f797a84a364c6fb3563f18904", size = 45678, upload-time = "2026-05-17T17:29:47.654Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/cb/769cfc37177252872a45a71f3fbdde9d51b471a3f3c14bfe95dde3407386/python_multipart-0.0.29-py3-none-any.whl", hash = "sha256:2ddcc971cef266225f54f552d8fa10bcfbb1f14446caec199060daac59ff2d69", size = 29640, upload-time = "2026-05-17T17:29:45.69Z" }, +] + +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, + { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, + { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, + { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, + { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, +] + +[[package]] +name = "rich" +version = "15.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, +] + +[[package]] +name = "rich-toolkit" +version = "0.19.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "rich" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/02/32217f3657ae91a0ea7cf1d74ade78f44352f830d00c468f753ddb3d4980/rich_toolkit-0.19.10.tar.gz", hash = "sha256:dc2e8c515ef9fbb4894e62bd41a2d2960dd7c2f505b5084894604d5ccfee3f09", size = 198167, upload-time = "2026-05-21T10:11:42.397Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/84/a005adcb4d1e6846ba3d62768090c3b943e3f6d8dc5c47af64f33584c4a7/rich_toolkit-0.19.10-py3-none-any.whl", hash = "sha256:93a41f67a09aefe90379f1729495c2fee9ccbcc8cfda48e2ca2ae54a995e32b1", size = 33907, upload-time = "2026-05-21T10:11:43.578Z" }, +] + +[[package]] +name = "rignore" +version = "0.7.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e5/f5/8bed2310abe4ae04b67a38374a4d311dd85220f5d8da56f47ae9361be0b0/rignore-0.7.6.tar.gz", hash = "sha256:00d3546cd793c30cb17921ce674d2c8f3a4b00501cb0e3dd0e82217dbeba2671", size = 57140, upload-time = "2025-11-05T21:41:21.968Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/0e/012556ef3047a2628842b44e753bb15f4dc46806780ff090f1e8fe4bf1eb/rignore-0.7.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:03e82348cb7234f8d9b2834f854400ddbbd04c0f8f35495119e66adbd37827a8", size = 883488, upload-time = "2025-11-05T20:42:41.359Z" }, + { url = "https://files.pythonhosted.org/packages/93/b0/d4f1f3fe9eb3f8e382d45ce5b0547ea01c4b7e0b4b4eb87bcd66a1d2b888/rignore-0.7.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9e624f6be6116ea682e76c5feb71ea91255c67c86cb75befe774365b2931961", size = 820411, upload-time = "2025-11-05T20:42:24.782Z" }, + { url = "https://files.pythonhosted.org/packages/4a/c8/dea564b36dedac8de21c18e1851789545bc52a0c22ece9843444d5608a6a/rignore-0.7.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bda49950d405aa8d0ebe26af807c4e662dd281d926530f03f29690a2e07d649a", size = 897821, upload-time = "2025-11-05T20:40:52.613Z" }, + { url = "https://files.pythonhosted.org/packages/b3/2b/ee96db17ac1835e024c5d0742eefb7e46de60020385ac883dd3d1cde2c1f/rignore-0.7.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b5fd5ab3840b8c16851d327ed06e9b8be6459702a53e5ab1fc4073b684b3789e", size = 873963, upload-time = "2025-11-05T20:41:07.49Z" }, + { url = "https://files.pythonhosted.org/packages/a5/8c/ad5a57bbb9d14d5c7e5960f712a8a0b902472ea3f4a2138cbf70d1777b75/rignore-0.7.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ced2a248352636a5c77504cb755dc02c2eef9a820a44d3f33061ce1bb8a7f2d2", size = 1169216, upload-time = "2025-11-05T20:41:23.73Z" }, + { url = "https://files.pythonhosted.org/packages/80/e6/5b00bc2a6bc1701e6878fca798cf5d9125eb3113193e33078b6fc0d99123/rignore-0.7.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a04a3b73b75ddc12c9c9b21efcdaab33ca3832941d6f1d67bffd860941cd448a", size = 942942, upload-time = "2025-11-05T20:41:39.393Z" }, + { url = "https://files.pythonhosted.org/packages/85/e5/7f99bd0cc9818a91d0e8b9acc65b792e35750e3bdccd15a7ee75e64efca4/rignore-0.7.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d24321efac92140b7ec910ac7c53ab0f0c86a41133d2bb4b0e6a7c94967f44dd", size = 959787, upload-time = "2025-11-05T20:42:09.765Z" }, + { url = "https://files.pythonhosted.org/packages/55/54/2ffea79a7c1eabcede1926347ebc2a81bc6b81f447d05b52af9af14948b9/rignore-0.7.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c7aa109d41e593785c55fdaa89ad80b10330affa9f9d3e3a51fa695f739b20", size = 984245, upload-time = "2025-11-05T20:41:54.062Z" }, + { url = "https://files.pythonhosted.org/packages/41/f7/e80f55dfe0f35787fa482aa18689b9c8251e045076c35477deb0007b3277/rignore-0.7.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1734dc49d1e9501b07852ef44421f84d9f378da9fbeda729e77db71f49cac28b", size = 1078647, upload-time = "2025-11-05T21:40:13.463Z" }, + { url = "https://files.pythonhosted.org/packages/d4/cf/2c64f0b6725149f7c6e7e5a909d14354889b4beaadddaa5fff023ec71084/rignore-0.7.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5719ea14ea2b652c0c0894be5dfde954e1853a80dea27dd2fbaa749618d837f5", size = 1139186, upload-time = "2025-11-05T21:40:31.27Z" }, + { url = "https://files.pythonhosted.org/packages/75/95/a86c84909ccc24af0d094b50d54697951e576c252a4d9f21b47b52af9598/rignore-0.7.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8e23424fc7ce35726854f639cb7968151a792c0c3d9d082f7f67e0c362cfecca", size = 1117604, upload-time = "2025-11-05T21:40:48.07Z" }, + { url = "https://files.pythonhosted.org/packages/7f/5e/13b249613fd5d18d58662490ab910a9f0be758981d1797789913adb4e918/rignore-0.7.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3efdcf1dd84d45f3e2bd2f93303d9be103888f56dfa7c3349b5bf4f0657ec696", size = 1127725, upload-time = "2025-11-05T21:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/c7/28/fa5dcd1e2e16982c359128664e3785f202d3eca9b22dd0b2f91c4b3d242f/rignore-0.7.6-cp312-cp312-win32.whl", hash = "sha256:ccca9d1a8b5234c76b71546fc3c134533b013f40495f394a65614a81f7387046", size = 646145, upload-time = "2025-11-05T21:41:51.096Z" }, + { url = "https://files.pythonhosted.org/packages/26/87/69387fb5dd81a0f771936381431780b8cf66fcd2cfe9495e1aaf41548931/rignore-0.7.6-cp312-cp312-win_amd64.whl", hash = "sha256:c96a285e4a8bfec0652e0bfcf42b1aabcdda1e7625f5006d188e3b1c87fdb543", size = 726090, upload-time = "2025-11-05T21:41:36.485Z" }, + { url = "https://files.pythonhosted.org/packages/24/5f/e8418108dcda8087fb198a6f81caadbcda9fd115d61154bf0df4d6d3619b/rignore-0.7.6-cp312-cp312-win_arm64.whl", hash = "sha256:a64a750e7a8277a323f01ca50b7784a764845f6cce2fe38831cb93f0508d0051", size = 656317, upload-time = "2025-11-05T21:41:25.305Z" }, + { url = "https://files.pythonhosted.org/packages/b7/8a/a4078f6e14932ac7edb171149c481de29969d96ddee3ece5dc4c26f9e0c3/rignore-0.7.6-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2bdab1d31ec9b4fb1331980ee49ea051c0d7f7bb6baa28b3125ef03cdc48fdaf", size = 883057, upload-time = "2025-11-05T20:42:42.741Z" }, + { url = "https://files.pythonhosted.org/packages/f9/8f/f8daacd177db4bf7c2223bab41e630c52711f8af9ed279be2058d2fe4982/rignore-0.7.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90f0a00ce0c866c275bf888271f1dc0d2140f29b82fcf33cdbda1e1a6af01010", size = 820150, upload-time = "2025-11-05T20:42:26.545Z" }, + { url = "https://files.pythonhosted.org/packages/36/31/b65b837e39c3f7064c426754714ac633b66b8c2290978af9d7f513e14aa9/rignore-0.7.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1ad295537041dc2ed4b540fb1a3906bd9ede6ccdad3fe79770cd89e04e3c73c", size = 897406, upload-time = "2025-11-05T20:40:53.854Z" }, + { url = "https://files.pythonhosted.org/packages/ca/58/1970ce006c427e202ac7c081435719a076c478f07b3a23f469227788dc23/rignore-0.7.6-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f782dbd3a65a5ac85adfff69e5c6b101285ef3f845c3a3cae56a54bebf9fe116", size = 874050, upload-time = "2025-11-05T20:41:08.922Z" }, + { url = "https://files.pythonhosted.org/packages/d4/00/eb45db9f90137329072a732273be0d383cb7d7f50ddc8e0bceea34c1dfdf/rignore-0.7.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65cece3b36e5b0826d946494734c0e6aaf5a0337e18ff55b071438efe13d559e", size = 1167835, upload-time = "2025-11-05T20:41:24.997Z" }, + { url = "https://files.pythonhosted.org/packages/f3/f1/6f1d72ddca41a64eed569680587a1236633587cc9f78136477ae69e2c88a/rignore-0.7.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d7e4bb66c13cd7602dc8931822c02dfbbd5252015c750ac5d6152b186f0a8be0", size = 941945, upload-time = "2025-11-05T20:41:40.628Z" }, + { url = "https://files.pythonhosted.org/packages/48/6f/2f178af1c1a276a065f563ec1e11e7a9e23d4996fd0465516afce4b5c636/rignore-0.7.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297e500c15766e196f68aaaa70e8b6db85fa23fdc075b880d8231fdfba738cd7", size = 959067, upload-time = "2025-11-05T20:42:11.09Z" }, + { url = "https://files.pythonhosted.org/packages/5b/db/423a81c4c1e173877c7f9b5767dcaf1ab50484a94f60a0b2ed78be3fa765/rignore-0.7.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a07084211a8d35e1a5b1d32b9661a5ed20669970b369df0cf77da3adea3405de", size = 984438, upload-time = "2025-11-05T20:41:55.443Z" }, + { url = "https://files.pythonhosted.org/packages/31/eb/c4f92cc3f2825d501d3c46a244a671eb737fc1bcf7b05a3ecd34abb3e0d7/rignore-0.7.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:181eb2a975a22256a1441a9d2f15eb1292839ea3f05606620bd9e1938302cf79", size = 1078365, upload-time = "2025-11-05T21:40:15.148Z" }, + { url = "https://files.pythonhosted.org/packages/26/09/99442f02794bd7441bfc8ed1c7319e890449b816a7493b2db0e30af39095/rignore-0.7.6-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:7bbcdc52b5bf9f054b34ce4af5269df5d863d9c2456243338bc193c28022bd7b", size = 1139066, upload-time = "2025-11-05T21:40:32.771Z" }, + { url = "https://files.pythonhosted.org/packages/2c/88/bcfc21e520bba975410e9419450f4b90a2ac8236b9a80fd8130e87d098af/rignore-0.7.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f2e027a6da21a7c8c0d87553c24ca5cc4364def18d146057862c23a96546238e", size = 1118036, upload-time = "2025-11-05T21:40:49.646Z" }, + { url = "https://files.pythonhosted.org/packages/e2/25/d37215e4562cda5c13312636393aea0bafe38d54d4e0517520a4cc0753ec/rignore-0.7.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee4a18b82cbbc648e4aac1510066682fe62beb5dc88e2c67c53a83954e541360", size = 1127550, upload-time = "2025-11-05T21:41:07.648Z" }, + { url = "https://files.pythonhosted.org/packages/dc/76/a264ab38bfa1620ec12a8ff1c07778da89e16d8c0f3450b0333020d3d6dc/rignore-0.7.6-cp313-cp313-win32.whl", hash = "sha256:a7d7148b6e5e95035d4390396895adc384d37ff4e06781a36fe573bba7c283e5", size = 646097, upload-time = "2025-11-05T21:41:53.201Z" }, + { url = "https://files.pythonhosted.org/packages/62/44/3c31b8983c29ea8832b6082ddb1d07b90379c2d993bd20fce4487b71b4f4/rignore-0.7.6-cp313-cp313-win_amd64.whl", hash = "sha256:b037c4b15a64dced08fc12310ee844ec2284c4c5c1ca77bc37d0a04f7bff386e", size = 726170, upload-time = "2025-11-05T21:41:38.131Z" }, + { url = "https://files.pythonhosted.org/packages/aa/41/e26a075cab83debe41a42661262f606166157df84e0e02e2d904d134c0d8/rignore-0.7.6-cp313-cp313-win_arm64.whl", hash = "sha256:e47443de9b12fe569889bdbe020abe0e0b667516ee2ab435443f6d0869bd2804", size = 656184, upload-time = "2025-11-05T21:41:27.396Z" }, + { url = "https://files.pythonhosted.org/packages/9a/b9/1f5bd82b87e5550cd843ceb3768b4a8ef274eb63f29333cf2f29644b3d75/rignore-0.7.6-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:8e41be9fa8f2f47239ded8920cc283699a052ac4c371f77f5ac017ebeed75732", size = 882632, upload-time = "2025-11-05T20:42:44.063Z" }, + { url = "https://files.pythonhosted.org/packages/e9/6b/07714a3efe4a8048864e8a5b7db311ba51b921e15268b17defaebf56d3db/rignore-0.7.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6dc1e171e52cefa6c20e60c05394a71165663b48bca6c7666dee4f778f2a7d90", size = 820760, upload-time = "2025-11-05T20:42:27.885Z" }, + { url = "https://files.pythonhosted.org/packages/ac/0f/348c829ea2d8d596e856371b14b9092f8a5dfbb62674ec9b3f67e4939a9d/rignore-0.7.6-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ce2268837c3600f82ab8db58f5834009dc638ee17103582960da668963bebc5", size = 899044, upload-time = "2025-11-05T20:40:55.336Z" }, + { url = "https://files.pythonhosted.org/packages/f0/30/2e1841a19b4dd23878d73edd5d82e998a83d5ed9570a89675f140ca8b2ad/rignore-0.7.6-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:690a3e1b54bfe77e89c4bacb13f046e642f8baadafc61d68f5a726f324a76ab6", size = 874144, upload-time = "2025-11-05T20:41:10.195Z" }, + { url = "https://files.pythonhosted.org/packages/c2/bf/0ce9beb2e5f64c30e3580bef09f5829236889f01511a125f98b83169b993/rignore-0.7.6-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09d12ac7a0b6210c07bcd145007117ebd8abe99c8eeb383e9e4673910c2754b2", size = 1168062, upload-time = "2025-11-05T20:41:26.511Z" }, + { url = "https://files.pythonhosted.org/packages/b9/8b/571c178414eb4014969865317da8a02ce4cf5241a41676ef91a59aab24de/rignore-0.7.6-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a2b2b74a8c60203b08452479b90e5ce3dbe96a916214bc9eb2e5af0b6a9beb0", size = 942542, upload-time = "2025-11-05T20:41:41.838Z" }, + { url = "https://files.pythonhosted.org/packages/19/62/7a3cf601d5a45137a7e2b89d10c05b5b86499190c4b7ca5c3c47d79ee519/rignore-0.7.6-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fc5a531ef02131e44359419a366bfac57f773ea58f5278c2cdd915f7d10ea94", size = 958739, upload-time = "2025-11-05T20:42:12.463Z" }, + { url = "https://files.pythonhosted.org/packages/5f/1f/4261f6a0d7caf2058a5cde2f5045f565ab91aa7badc972b57d19ce58b14e/rignore-0.7.6-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b7a1f77d9c4cd7e76229e252614d963442686bfe12c787a49f4fe481df49e7a9", size = 984138, upload-time = "2025-11-05T20:41:56.775Z" }, + { url = "https://files.pythonhosted.org/packages/2b/bf/628dfe19c75e8ce1f45f7c248f5148b17dfa89a817f8e3552ab74c3ae812/rignore-0.7.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ead81f728682ba72b5b1c3d5846b011d3e0174da978de87c61645f2ed36659a7", size = 1079299, upload-time = "2025-11-05T21:40:16.639Z" }, + { url = "https://files.pythonhosted.org/packages/af/a5/be29c50f5c0c25c637ed32db8758fdf5b901a99e08b608971cda8afb293b/rignore-0.7.6-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:12ffd50f520c22ffdabed8cd8bfb567d9ac165b2b854d3e679f4bcaef11a9441", size = 1139618, upload-time = "2025-11-05T21:40:34.507Z" }, + { url = "https://files.pythonhosted.org/packages/2a/40/3c46cd7ce4fa05c20b525fd60f599165e820af66e66f2c371cd50644558f/rignore-0.7.6-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:e5a16890fbe3c894f8ca34b0fcacc2c200398d4d46ae654e03bc9b3dbf2a0a72", size = 1117626, upload-time = "2025-11-05T21:40:51.494Z" }, + { url = "https://files.pythonhosted.org/packages/8c/b9/aea926f263b8a29a23c75c2e0d8447965eb1879d3feb53cfcf84db67ed58/rignore-0.7.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3abab3bf99e8a77488ef6c7c9a799fac22224c28fe9f25cc21aa7cc2b72bfc0b", size = 1128144, upload-time = "2025-11-05T21:41:09.169Z" }, + { url = "https://files.pythonhosted.org/packages/a4/f6/0d6242f8d0df7f2ecbe91679fefc1f75e7cd2072cb4f497abaab3f0f8523/rignore-0.7.6-cp314-cp314-win32.whl", hash = "sha256:eeef421c1782953c4375aa32f06ecae470c1285c6381eee2a30d2e02a5633001", size = 646385, upload-time = "2025-11-05T21:41:55.105Z" }, + { url = "https://files.pythonhosted.org/packages/d5/38/c0dcd7b10064f084343d6af26fe9414e46e9619c5f3224b5272e8e5d9956/rignore-0.7.6-cp314-cp314-win_amd64.whl", hash = "sha256:6aeed503b3b3d5af939b21d72a82521701a4bd3b89cd761da1e7dc78621af304", size = 725738, upload-time = "2025-11-05T21:41:39.736Z" }, + { url = "https://files.pythonhosted.org/packages/d9/7a/290f868296c1ece914d565757ab363b04730a728b544beb567ceb3b2d96f/rignore-0.7.6-cp314-cp314-win_arm64.whl", hash = "sha256:104f215b60b3c984c386c3e747d6ab4376d5656478694e22c7bd2f788ddd8304", size = 656008, upload-time = "2025-11-05T21:41:29.028Z" }, + { url = "https://files.pythonhosted.org/packages/ca/d2/3c74e3cd81fe8ea08a8dcd2d755c09ac2e8ad8fe409508904557b58383d3/rignore-0.7.6-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:bb24a5b947656dd94cb9e41c4bc8b23cec0c435b58be0d74a874f63c259549e8", size = 882835, upload-time = "2025-11-05T20:42:45.443Z" }, + { url = "https://files.pythonhosted.org/packages/77/61/a772a34b6b63154877433ac2d048364815b24c2dd308f76b212c408101a2/rignore-0.7.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5b1e33c9501cefe24b70a1eafd9821acfd0ebf0b35c3a379430a14df089993e3", size = 820301, upload-time = "2025-11-05T20:42:29.226Z" }, + { url = "https://files.pythonhosted.org/packages/71/30/054880b09c0b1b61d17eeb15279d8bf729c0ba52b36c3ada52fb827cbb3c/rignore-0.7.6-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bec3994665a44454df86deb762061e05cd4b61e3772f5b07d1882a8a0d2748d5", size = 897611, upload-time = "2025-11-05T20:40:56.475Z" }, + { url = "https://files.pythonhosted.org/packages/1e/40/b2d1c169f833d69931bf232600eaa3c7998ba4f9a402e43a822dad2ea9f2/rignore-0.7.6-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26cba2edfe3cff1dfa72bddf65d316ddebf182f011f2f61538705d6dbaf54986", size = 873875, upload-time = "2025-11-05T20:41:11.561Z" }, + { url = "https://files.pythonhosted.org/packages/55/59/ca5ae93d83a1a60e44b21d87deb48b177a8db1b85e82fc8a9abb24a8986d/rignore-0.7.6-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ffa86694fec604c613696cb91e43892aa22e1fec5f9870e48f111c603e5ec4e9", size = 1167245, upload-time = "2025-11-05T20:41:28.29Z" }, + { url = "https://files.pythonhosted.org/packages/a5/52/cf3dce392ba2af806cba265aad6bcd9c48bb2a6cb5eee448d3319f6e505b/rignore-0.7.6-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48efe2ed95aa8104145004afb15cdfa02bea5cdde8b0344afeb0434f0d989aa2", size = 941750, upload-time = "2025-11-05T20:41:43.111Z" }, + { url = "https://files.pythonhosted.org/packages/ec/be/3f344c6218d779395e785091d05396dfd8b625f6aafbe502746fcd880af2/rignore-0.7.6-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dcae43eb44b7f2457fef7cc87f103f9a0013017a6f4e62182c565e924948f21", size = 958896, upload-time = "2025-11-05T20:42:13.784Z" }, + { url = "https://files.pythonhosted.org/packages/c9/34/d3fa71938aed7d00dcad87f0f9bcb02ad66c85d6ffc83ba31078ce53646a/rignore-0.7.6-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2cd649a7091c0dad2f11ef65630d30c698d505cbe8660dd395268e7c099cc99f", size = 983992, upload-time = "2025-11-05T20:41:58.022Z" }, + { url = "https://files.pythonhosted.org/packages/24/a4/52a697158e9920705bdbd0748d59fa63e0f3233fb92e9df9a71afbead6ca/rignore-0.7.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42de84b0289d478d30ceb7ae59023f7b0527786a9a5b490830e080f0e4ea5aeb", size = 1078181, upload-time = "2025-11-05T21:40:18.151Z" }, + { url = "https://files.pythonhosted.org/packages/ac/65/aa76dbcdabf3787a6f0fd61b5cc8ed1e88580590556d6c0207960d2384bb/rignore-0.7.6-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:875a617e57b53b4acbc5a91de418233849711c02e29cc1f4f9febb2f928af013", size = 1139232, upload-time = "2025-11-05T21:40:35.966Z" }, + { url = "https://files.pythonhosted.org/packages/08/44/31b31a49b3233c6842acc1c0731aa1e7fb322a7170612acf30327f700b44/rignore-0.7.6-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8703998902771e96e49968105207719f22926e4431b108450f3f430b4e268b7c", size = 1117349, upload-time = "2025-11-05T21:40:53.013Z" }, + { url = "https://files.pythonhosted.org/packages/e9/ae/1b199a2302c19c658cf74e5ee1427605234e8c91787cfba0015f2ace145b/rignore-0.7.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:602ef33f3e1b04c1e9a10a3c03f8bc3cef2d2383dcc250d309be42b49923cabc", size = 1127702, upload-time = "2025-11-05T21:41:10.881Z" }, + { url = "https://files.pythonhosted.org/packages/fc/d3/18210222b37e87e36357f7b300b7d98c6dd62b133771e71ae27acba83a4f/rignore-0.7.6-cp314-cp314t-win32.whl", hash = "sha256:c1d8f117f7da0a4a96a8daef3da75bc090e3792d30b8b12cfadc240c631353f9", size = 647033, upload-time = "2025-11-05T21:42:00.095Z" }, + { url = "https://files.pythonhosted.org/packages/3e/87/033eebfbee3ec7d92b3bb1717d8f68c88e6fc7de54537040f3b3a405726f/rignore-0.7.6-cp314-cp314t-win_amd64.whl", hash = "sha256:ca36e59408bec81de75d307c568c2d0d410fb880b1769be43611472c61e85c96", size = 725647, upload-time = "2025-11-05T21:41:44.449Z" }, + { url = "https://files.pythonhosted.org/packages/79/62/b88e5879512c55b8ee979c666ee6902adc4ed05007226de266410ae27965/rignore-0.7.6-cp314-cp314t-win_arm64.whl", hash = "sha256:b83adabeb3e8cf662cabe1931b83e165b88c526fa6af6b3aa90429686e474896", size = 656035, upload-time = "2025-11-05T21:41:31.13Z" }, +] + +[[package]] +name = "rpds-py" +version = "2026.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/43/25a8dcd3feedd735039a8f0b5b7e3b118232b5eae288c4fd9ab200d41094/rpds_py-2026.5.1.tar.gz", hash = "sha256:07b24fea40541e28570e5b795a4a38fbdcd12550c06bd0748005ecc8116ca256", size = 64459, upload-time = "2026-05-28T12:02:13.232Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/e7/a78582dc57caa592dcc7d4fb69b61390561e908eb3d2f5df5928a8e354c0/rpds_py-2026.5.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3abe24a66e57adcfa645d718063a5fa5103ecc71ddbf26d78af8f9368018ff1d", size = 353040, upload-time = "2026-05-28T11:59:12.531Z" }, + { url = "https://files.pythonhosted.org/packages/a3/43/35e3f136343aef451e545ce8c38d36c2f93c0ed88703db8b64ba2b205c68/rpds_py-2026.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58b1d94308ddf0b1982f61f2eb54bf92997c9ece8a8093ef014250f4a517906c", size = 345775, upload-time = "2026-05-28T11:59:13.827Z" }, + { url = "https://files.pythonhosted.org/packages/20/e1/0f2160c5982d3157734d5cb3ed63d8b2d583a73c9864f77b666449f32cf8/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa92420128dadce7f54bd73ba1825a273e9268fe9e35dbf7e6362890efa4e08", size = 376329, upload-time = "2026-05-28T11:59:15.271Z" }, + { url = "https://files.pythonhosted.org/packages/d0/11/ee0ba42aff83bf4effdbc576673c6be64c5e173978c3f6d537e94482f77d/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca653c6546386227cd9800d1bef6a348099acf8db4250341da6d90f663d6dfcb", size = 383539, upload-time = "2026-05-28T11:59:16.665Z" }, + { url = "https://files.pythonhosted.org/packages/11/df/d94aa6a499d4ac40afe2d7620f2c597fd3c0f182e854ad7cf3f596a81cb6/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66c93681c4729e4e3ecba31b8179fae083ff3118841672835140338b4b9867c1", size = 494674, upload-time = "2026-05-28T11:59:17.991Z" }, + { url = "https://files.pythonhosted.org/packages/1f/75/33d30f43bb2f458de11979486a591b1bf6e5651765ed1704c6197c2dc773/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40ff257542e04796880e011e15cd4dc21c2599975df2aaa8f2c8495ca574e1a5", size = 389268, upload-time = "2026-05-28T11:59:19.434Z" }, + { url = "https://files.pythonhosted.org/packages/f4/1e/2c9096fc19d5fd084b0184ca2b651e659aa0a37e6fdbecf6ece47f147fe1/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6825cc329b290e93c5f6a9be2393118a763f6ccf6abd83704e0c102ca583644", size = 376280, upload-time = "2026-05-28T11:59:21Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e5/61ec9f8be8211ea7f48448195549e4aaf02004083475493b0e137702ecb2/rpds_py-2026.5.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:de42116e69cb53b911cc34aee5ab98f36c597b822545045d49e938818b99e5e4", size = 387233, upload-time = "2026-05-28T11:59:22.454Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ca/bcec1005c4f4a234f92a29078631fee49206c7265ccae966f18fd332e80e/rpds_py-2026.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0f920015df2a504bebaba6d4c31ccf3fcf942f92655c086da30b671aad19aa6", size = 405009, upload-time = "2026-05-28T11:59:23.845Z" }, + { url = "https://files.pythonhosted.org/packages/72/e6/4d5718c5cf26c522dc7c9999e238da1e77380b81d0c5d1df11e271ddfeb1/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0408a24e44feb919423dc6d9da677cb5cddb894d2ca9e763967d156d9c60fab4", size = 553113, upload-time = "2026-05-28T11:59:25.184Z" }, + { url = "https://files.pythonhosted.org/packages/d4/25/2ee807bdb3e1f0b7eddf7782acd5665a8b5205a331a7d7244a52c4812fd9/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cea68bcd53467561ae2f96a6bdad1544299ba97b5b0ddcd5ac3d376e5c781c24", size = 618838, upload-time = "2026-05-28T11:59:26.749Z" }, + { url = "https://files.pythonhosted.org/packages/6a/c1/7d4c26f167f8c41501cc073d30ee22082b16ce358cf5b00ec97cbc7804ea/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4be8b1d2a705cc37d08256004e1d07de143fa0075c8e85a3df020b776f62b732", size = 582436, upload-time = "2026-05-28T11:59:28.11Z" }, + { url = "https://files.pythonhosted.org/packages/04/1d/9d12b0a337bab46f4769f8857f4007e3b2d639e14f9a44a0efe157696e64/rpds_py-2026.5.1-cp312-cp312-win32.whl", hash = "sha256:6736718bd4fc49cbcb538ba30516fdbef161522acefb739657d48b97bd864fed", size = 212734, upload-time = "2026-05-28T11:59:29.689Z" }, + { url = "https://files.pythonhosted.org/packages/c5/93/e4116f2de7f56bc7406a76033dc501811ddeb22b7f056b92d632871ebb0c/rpds_py-2026.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:0a7d1eec967df0e9b22614a5e177622e0c89611d03727fa0cb48e45028907870", size = 229045, upload-time = "2026-05-28T11:59:31.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/53/6c3419d85eb2ec5938a37627c585b42d76a63bb731d6e42ed4b079ebf486/rpds_py-2026.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:1841d067089e117142d79b98aa0df2f08b52f2ecc1819dd2700636c0db74a473", size = 223967, upload-time = "2026-05-28T11:59:32.318Z" }, + { url = "https://files.pythonhosted.org/packages/6c/32/14c961ad295f490eb0849ada8b79683e93a59b9de3afdd983eaf55fa6867/rpds_py-2026.5.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:efef4ac29c6ff495531eb17ee705b62841ecaa291b7c7077e848ea03e237164d", size = 352787, upload-time = "2026-05-28T11:59:33.655Z" }, + { url = "https://files.pythonhosted.org/packages/ca/bb/d1b85117967c11191441a7274ae616c65d93901d082c588f89a50a8da5ae/rpds_py-2026.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c39f5b67a8a2e67179ada2a954227d670fe65fa9098457f698f56ddf248709b3", size = 345179, upload-time = "2026-05-28T11:59:35Z" }, + { url = "https://files.pythonhosted.org/packages/7c/46/d84105f062e626a1b233f863907288a4708c2d833b8b4c6fb2764bc080c0/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5c30f3f04eef4fbd362226a6f31d7c8895ca4fbb6e0b790f6890a98d8da8559", size = 376173, upload-time = "2026-05-28T11:59:36.43Z" }, + { url = "https://files.pythonhosted.org/packages/e2/ae/469d7959ce5b1201e1de135dc735b86db3b35dd0d1734f6a44246d5f061c/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:277f6c82f0580848796c7ecc8a7173aa3bfb928e4ff831261c2f60a81dc270db", size = 383162, upload-time = "2026-05-28T11:59:37.995Z" }, + { url = "https://files.pythonhosted.org/packages/dc/a2/57853d31a1116a561aa072794602ad3f6341e18d70a8523f1bd5b9fc1e5a/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63c2c4c213f1a4e3f3de28ecab029dbdee976324e729c0d7a55211be72576b02", size = 495093, upload-time = "2026-05-28T11:59:39.453Z" }, + { url = "https://files.pythonhosted.org/packages/99/63/3a8eabcad9314b7daf5c65f451d2c33d989235cd8a5762186cf2c3f5a4f8/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3350ec808fb538fe71a1f94dfaa0e29c598dfad805ce49f0caec5ae3183c652b", size = 389829, upload-time = "2026-05-28T11:59:40.896Z" }, + { url = "https://files.pythonhosted.org/packages/4b/25/05678d97fc25e2622df14dc530fb82023174ecfff6733991ed0d78f167bd/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b964e3ab599e718dc46c018d104b1ebc007cbc6567d827c94a687fca56d77e", size = 374786, upload-time = "2026-05-28T11:59:42.626Z" }, + { url = "https://files.pythonhosted.org/packages/88/d1/8c90b6431e80a3b91b284a5c7c8c0c4f9c006444d90477a740d6e0f9c694/rpds_py-2026.5.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:19cb09fab7b7fc96b2a6e28f2e34b72a3705ff27b37edb77455316e5d3f3dc9b", size = 386920, upload-time = "2026-05-28T11:59:44.124Z" }, + { url = "https://files.pythonhosted.org/packages/ff/99/4638f672ab356682d633ee0da9255f5b67ce6efd0b85eb94ad3e255e65a5/rpds_py-2026.5.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abe76bcdba31e576cb83eeb8797aa0d882b738fef6dc65d0601fc753806a5b46", size = 405059, upload-time = "2026-05-28T11:59:47.177Z" }, + { url = "https://files.pythonhosted.org/packages/66/3f/3546524b6eb4cc2e1f363a3d638fa52f6c24faae3500c25fb488b02f1740/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8bff7073db3899158fff55ebf57b113a67030af26f80a18978f9f0aa60250ddf", size = 553030, upload-time = "2026-05-28T11:59:48.603Z" }, + { url = "https://files.pythonhosted.org/packages/c6/c3/7b3388c796fcf471bd17194242d4dc1a7608567c0fa422bcc1c5e79f9c1e/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8ba264fa49be666cd9cc56bf34ec7002fb3d27a4aee5bcb4d43d0d18feb1bb6f", size = 618975, upload-time = "2026-05-28T11:59:50.314Z" }, + { url = "https://files.pythonhosted.org/packages/61/1e/a3cb07f2795075d1d88efddae2f541359fde5f08c81ee114c29c2949c90a/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4860b603ddda0475a8885499b3729e90229d480105b42651962a5397d995fa89", size = 581178, upload-time = "2026-05-28T11:59:51.673Z" }, + { url = "https://files.pythonhosted.org/packages/a1/74/e758c03a5ef46f04c37f2651a2893db846d569ba8a7bca469d4b58939bcd/rpds_py-2026.5.1-cp313-cp313-win32.whl", hash = "sha256:7944270ae71383f6e2657dd7d5ce4eeb4ac2d0059a6738f0510583d462ab4842", size = 212481, upload-time = "2026-05-28T11:59:53.148Z" }, + { url = "https://files.pythonhosted.org/packages/70/ec/a2aca432db9c7359b40fa393eeeaa0d166c2f70175be956e75fa24197c44/rpds_py-2026.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:88647f43a73c4e01be19b04ceef0c8d3a1958153604d13c773becd8016f2a0cf", size = 228519, upload-time = "2026-05-28T11:59:54.505Z" }, + { url = "https://files.pythonhosted.org/packages/29/60/a73bfdd45b096574556acf303bbd9fa9eed36ca8a818b514e2a5d5fe2b9d/rpds_py-2026.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:453895624ecf7db7063b1004e44037522bbaef9ff6a945e59bc71662d7a03abd", size = 223446, upload-time = "2026-05-28T11:59:56.081Z" }, + { url = "https://files.pythonhosted.org/packages/18/e2/408105fd611823f00882aea810f3989a30d26b1bab8b6beb20f98c724e0e/rpds_py-2026.5.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:b4e4bc98639ec915f512fde3aa7a95e0041d95d9c3cc86eea841fa63cb1e8600", size = 355287, upload-time = "2026-05-28T11:59:57.448Z" }, + { url = "https://files.pythonhosted.org/packages/8d/58/5c4a43436843c90d0f6d19f82c200c80e3843ca9fa07b237623327f6d384/rpds_py-2026.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cacedb7a6e167680acba45ad5716e89067d225dc80da0d7040cae8c81d4572fa", size = 347033, upload-time = "2026-05-28T11:59:58.881Z" }, + { url = "https://files.pythonhosted.org/packages/fb/c2/1a71acdacaf4e259b10278fb87b039ded3cf80041bcd89dd8a3ea702ded6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68700371c5d7ae1412862ddfa719090925c93ecf351c566d66f09d04b136ea00", size = 376891, upload-time = "2026-05-28T12:00:00.516Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c8/535f3d9b65addd8e28aa87b83c6e526799c3717a88273db8ea795beeef7a/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:296c799becfa849c779c8725494fe9ed94959ed886787df4364b058465bad7f0", size = 385646, upload-time = "2026-05-28T12:00:02.394Z" }, + { url = "https://files.pythonhosted.org/packages/1c/91/dc033f313345c354ade914dbe73cdb90b615a4409ea02430d5356794f3d8/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3858b908218ee108d0bbfb2095ccc237648053c9bf98affad7cb079acaf1d97", size = 498830, upload-time = "2026-05-28T12:00:04.189Z" }, + { url = "https://files.pythonhosted.org/packages/27/fc/90fcbea459dbb8ddc18a2e0fd1de9412b48bc84ffff2db771cf714bacfd6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4fb8d2e7cb2f850b169806d61d1b991738acec96500a75c30f49caf064ce7cef", size = 392830, upload-time = "2026-05-28T12:00:05.797Z" }, + { url = "https://files.pythonhosted.org/packages/b2/1d/46cd11a228c9750684a798d98f878be6f614aa762438da7378f035e79e35/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b74c10ed6a8f190f4287f53bcfea348b92a84a9c9f70d30183d1e6172d580d", size = 379613, upload-time = "2026-05-28T12:00:07.433Z" }, + { url = "https://files.pythonhosted.org/packages/24/4a/d9b0c6af3a1de03eb93741bbe8be2bdce84d8fda8224f3005451d86df389/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:b9a6528956191c48c52294a592dbd4a8386d7048bdb25c0efcb6b966466c6d83", size = 388183, upload-time = "2026-05-28T12:00:09.227Z" }, + { url = "https://files.pythonhosted.org/packages/c5/b4/db7aaabdda6d020afc87d981bcc2f57a434c7dec60ecfc2ab3dd50b20351/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:af03e34e860047bc7a352b842856fcf78798fbb81132cc98bd2f907ab4eb9cd2", size = 408578, upload-time = "2026-05-28T12:00:10.779Z" }, + { url = "https://files.pythonhosted.org/packages/08/d6/070f6a41cbb343e2ac4171859bf3f3623e0ab002f72619d6d505313ec2de/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fea6e836d10abbe191d557d33bd58bd5987725fe63aa1eefe557d230209855bd", size = 553573, upload-time = "2026-05-28T12:00:12.443Z" }, + { url = "https://files.pythonhosted.org/packages/75/ab/1a71ea3589c4345dac0a0518f0e6a031cb42689277851b683c46d27463a5/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:fc0c0f878ea770a0a8a462456c5ad36fc9fe6358e6b76fdadc7f17575e0b8bf1", size = 620861, upload-time = "2026-05-28T12:00:14.09Z" }, + { url = "https://files.pythonhosted.org/packages/8a/22/9bf80a56069c0c443fcfefac639a86a744550a2898817a6dfd3e26654924/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e0b360f316d966b048b085857630b3cc51f3db2f07b06f440eac8f695374d1e3", size = 585633, upload-time = "2026-05-28T12:00:15.66Z" }, + { url = "https://files.pythonhosted.org/packages/da/68/3b2c0a75c9e04125696f84ebdbbf304acf5a40b58ba4481cdb98a922c3ba/rpds_py-2026.5.1-cp313-cp313t-win32.whl", hash = "sha256:a2999883eedf72fdfb7520b92c7d4ec2572a71ff40239377aa604cc529eecafc", size = 210074, upload-time = "2026-05-28T12:00:17.291Z" }, + { url = "https://files.pythonhosted.org/packages/e7/8b/609157d5a25d37d4f29f92840ba531f416907c34ae5c5739dd21fc2bef98/rpds_py-2026.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e07be2a9d7122bd6e82dea89814ef8dc893feb1aae97fec1630f3263bbb30e55", size = 228635, upload-time = "2026-05-28T12:00:18.73Z" }, + { url = "https://files.pythonhosted.org/packages/d4/6f/19c1918a4b590d8de87e712e4abe4b3875771eff60216fb6153cf6665c68/rpds_py-2026.5.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:1f2c391c3059798093b65df23aca2cac150460ae9c630d99dec83d703d9485b9", size = 349756, upload-time = "2026-05-28T12:00:20.217Z" }, + { url = "https://files.pythonhosted.org/packages/e5/60/a06fe7da34eca79dacbf958a2ba0c6eea85bc2b29de20080bf40f72f66fa/rpds_py-2026.5.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:413b424f7c4ee65ab5e5be91f5731be0f8b41a1ee2b12dfe810d716312e95a78", size = 343831, upload-time = "2026-05-28T12:00:21.711Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ec/b2333b97b90e2a6ef6ca8ad386ee284968e74bcfe113b3f1a8d9036429a9/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c595a1d9255dce0599e13130d1440ab2506654f2b50294226ee06402f8fef63", size = 375127, upload-time = "2026-05-28T12:00:23.326Z" }, + { url = "https://files.pythonhosted.org/packages/14/7f/e00aae54067f2b488c4637961d5f58204d470795fc791085fa3f15060d2e/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c27c5f6102eac8c03e7595a00827a53b271ba40a53b59ff8709170e0855ea4a", size = 379034, upload-time = "2026-05-28T12:00:24.89Z" }, + { url = "https://files.pythonhosted.org/packages/be/cc/423999bbb8ae8dc93c77fc1d5e984ade5eb89d237d3bb884ccfa72ae2890/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c7fcf61d44cacecaf3aea542b0e053db77972a4573e7ceda16fb2b399161195", size = 490823, upload-time = "2026-05-28T12:00:26.676Z" }, + { url = "https://files.pythonhosted.org/packages/0f/aa/c671bf660f12e68d3c52ff86c7066ed1372df5a0f4f2ff584e419b8207e7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c817a189d4ee14290420e5ff051e4dd6baa13f3edf84685071dee07a6d538ee", size = 388144, upload-time = "2026-05-28T12:00:28.577Z" }, + { url = "https://files.pythonhosted.org/packages/19/c8/d63bb75b68afe77b229e3021c6031bcaf01da5db5b0e69d0d10f9ba679a7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21846aac0ed2e0589f38c12dc44e77bb64e494b771eadbcf169cba00566ba7ba", size = 371959, upload-time = "2026-05-28T12:00:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/82/35/c51122014d8274ff37dc606d60049c3db7d83da02b5b282511e5a906a9a6/rpds_py-2026.5.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b317c87a13f769a4e787819bd508aaa5d69aa09b0880de9af6d3a8a54571cdec", size = 383558, upload-time = "2026-05-28T12:00:31.764Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f9/2790cb99c136a5363acdeacf5c27c56f3de0d4118a1f48fca83404c99c89/rpds_py-2026.5.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce87129d9f2c14fa6c4a8601fb80eb4488c80d38a20cd13758ef11123e14995d", size = 402789, upload-time = "2026-05-28T12:00:33.247Z" }, + { url = "https://files.pythonhosted.org/packages/e5/1b/e4fb584f8c75d35c38150ff6a332cda949e6f97acba1f4fd123b14ab56fe/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9cdddb6c1207d284d94fd1530adf57fbd797fe7c4b8704ba85f49414f2557e7d", size = 551405, upload-time = "2026-05-28T12:00:34.819Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f7/a6731b4216cb3793ea1af5391da240f5683dacc0d13e034fe5fc3503f240/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4e237e139f94d3c036fd28eb9f564c99055476ff4ff05cd42be55ce349b5aa02", size = 616975, upload-time = "2026-05-28T12:00:36.268Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/2e051a81d95d8e63f4b35a1c463a87e8766bc3d083c067c5dfb6bf220747/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ed0954b524873214369184a9c82b0eaa45a3fbb9a798cd95b17e0d98499e7ea0", size = 578701, upload-time = "2026-05-28T12:00:37.82Z" }, + { url = "https://files.pythonhosted.org/packages/65/56/b5f6fdb2083e32bca8a8993d89e70db114b4756c9e2c38421328126689d2/rpds_py-2026.5.1-cp314-cp314-win32.whl", hash = "sha256:2d88621d6a7d4dfa633d21abe90f280bb205274e16b1d1e61c6ad4640b2453b7", size = 209806, upload-time = "2026-05-28T12:00:39.492Z" }, + { url = "https://files.pythonhosted.org/packages/fb/80/65a5aa96c155e611d1ed844e4e1f57f3e36b021f396d9f8585d756e6b90d/rpds_py-2026.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:cef8ac28d26f4dda3533060c20fbf80a325458fa9fd23ea72a73cdfa8e978838", size = 225985, upload-time = "2026-05-28T12:00:40.94Z" }, + { url = "https://files.pythonhosted.org/packages/27/7c/ad185212e87b05f196daef92bc5f3caf07298eb47c295b5585c3dd3093ac/rpds_py-2026.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:eaaea962c68cdc68d4a533ba985ab8e9484277910bbfaa2ab3ef7732667bfed8", size = 221219, upload-time = "2026-05-28T12:00:43.15Z" }, + { url = "https://files.pythonhosted.org/packages/23/58/e14ae18759020334646b031e708ab4158d653a938822bfb7b95ef2e93aa3/rpds_py-2026.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:21942f52dbbd5f8758bf021213d28bd45c39e873e65e2407faf5f1846f5761ad", size = 352148, upload-time = "2026-05-28T12:00:44.638Z" }, + { url = "https://files.pythonhosted.org/packages/31/9b/5f4a1e2f960bca3ac5d052b139dd31eed97b259f9d909173821760d542e8/rpds_py-2026.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f414556f6e3958300ff941e40c9f97e3dc9774ddd1b3434c475d73dd354bbed3", size = 345196, upload-time = "2026-05-28T12:00:46.14Z" }, + { url = "https://files.pythonhosted.org/packages/1a/71/1d9574d6a2fa20ab60eaa55c7467f5aa20cbc770f341a05f09c0876f59e2/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1013a8625c74043210190b246f5b1551e09757c1f356c6e4160ef96c5bc081", size = 374981, upload-time = "2026-05-28T12:00:47.531Z" }, + { url = "https://files.pythonhosted.org/packages/0c/9a/37e99f4915a80aa71670263c1267f7ae0af95f53a3f61e6c3bdc016d4515/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cc68e231a77a5f0d774ae278a1f8e55c0456501820847c1e4efb3829f3441df6", size = 379961, upload-time = "2026-05-28T12:00:49.216Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ff/6e73f74b89d2e0715e0fc86b7dde893f9a61ae2f9b256ff3bdfe41ac4e94/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9baffb505aff33acc69b422a19f77806680f3c8632227d79f48de8a810d1c2c5", size = 495965, upload-time = "2026-05-28T12:00:51.111Z" }, + { url = "https://files.pythonhosted.org/packages/ea/e0/425faba25f59d74d4638b267f7c7a80e8649d2ef4db10a19b0c4a71e6e6f/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8d2f912928d426e8cfa396f7f3f8d29a59e6689c86dcca3c420730c1096322b", size = 389526, upload-time = "2026-05-28T12:00:52.77Z" }, + { url = "https://files.pythonhosted.org/packages/c6/76/7a41960e3fddae47fab43a28684d5da981401dffd88253de0944148654cb/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90f628283be835db980c941767d41c9a27b5239e54ba0a9c1335247e82406964", size = 376190, upload-time = "2026-05-28T12:00:54.215Z" }, + { url = "https://files.pythonhosted.org/packages/27/60/5f38dc70824fc6951b51d35377e577a3a3a4c81a6769cc5a2de25ebe0ad1/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:1ebb2f0ab7e16132995a72de805170e0203df0c3dd22e1ef1cd1fdd90bd7a131", size = 383921, upload-time = "2026-05-28T12:00:55.673Z" }, + { url = "https://files.pythonhosted.org/packages/60/1a/d60a38caa1505f4b9483c3fbbde12c94e1079154f4f401a6da96f7e77621/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f3df3d16ded76f1f8c9cdebd0e1ea55fdf4c23b812de189814da7cf229c22a81", size = 404766, upload-time = "2026-05-28T12:00:57.518Z" }, + { url = "https://files.pythonhosted.org/packages/87/ff/602fd3f174d6425f0bce05ad0dfbec0e96b38d0f7d08a79af5aa20083885/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9af8905b8f854990e40d5206aa5ac58d9b0fe0b7f351ff2bb086c20f6c8c6a47", size = 551343, upload-time = "2026-05-28T12:00:58.978Z" }, + { url = "https://files.pythonhosted.org/packages/b8/c1/1be13327acdbead3eca1fde03b6a34dbb011f1e864e217f0d32cc1779a7f/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:036a36a87fb1cd3b214d11c4b3c4f7d2ddad933625dca1c900b56a057c07740a", size = 618502, upload-time = "2026-05-28T12:01:00.656Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d7/afb49b49d7f2be8b7ba1a9f0977fa5168003437b93086726f066544e8351/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ae3853454fe9ef283a03c96c2d835d39e84b14643a9d62c82ef0fb87d702ca", size = 581916, upload-time = "2026-05-28T12:01:02.22Z" }, + { url = "https://files.pythonhosted.org/packages/25/d1/dbef8c1f8a10f07beb62b5f054e20099fd9924b3ec001b8f0b6ac7813a85/rpds_py-2026.5.1-cp314-cp314t-win32.whl", hash = "sha256:6c3d771a46ec18b12af06ce36243a9a80b07a5d0515236332d90863ca8bb326a", size = 207855, upload-time = "2026-05-28T12:01:03.821Z" }, + { url = "https://files.pythonhosted.org/packages/2a/72/bfa4e61ab8e7dc1c8adf397e05e6cbdd4239357bd72b248d3de662f23915/rpds_py-2026.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c93c629be4636cf54337bd5f06c104d55e42ced54d681f6fe21ae510a65116f6", size = 225422, upload-time = "2026-05-28T12:01:05.194Z" }, + { url = "https://files.pythonhosted.org/packages/27/3a/7b5da92b640f67b6717ccafc83cdd06bfa7ff2395c3685c68922bb54d703/rpds_py-2026.5.1-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:3574b55c604b8f75dacb007136508bbc0db406e626301778096a133327e7f2fb", size = 349576, upload-time = "2026-05-28T12:01:06.722Z" }, + { url = "https://files.pythonhosted.org/packages/d7/8a/2aafd7ad355a1bd48ca76e2262b74b15e6432b5a1efe150efd4d779cd55d/rpds_py-2026.5.1-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:94068eb3ae6d43f5a786b7db96a406a34e6d5c24489feef32fd6e8946ea7b291", size = 343640, upload-time = "2026-05-28T12:01:08.441Z" }, + { url = "https://files.pythonhosted.org/packages/f7/7d/6c9523c1abbe840a1b7fba3c516d48e1d3487cc80fea4366c4071cf56784/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a5b10e8ce894825f380a8f1b6444cf73c294dfea62afbb2d13e3a9e630cec1", size = 375322, upload-time = "2026-05-28T12:01:09.934Z" }, + { url = "https://files.pythonhosted.org/packages/5a/5d/0b7b03fb1dc509321f01de3149784ab773e34c8573022029af8076afcb9c/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc09f82e63d4bcd58149572f857a431bae851dc747e313c3b5bdf7abb907fda8", size = 379066, upload-time = "2026-05-28T12:01:11.48Z" }, + { url = "https://files.pythonhosted.org/packages/d7/e2/8ef6012999ebf1cb1c22f876d9ce5e63d960fd4631d2af3202d3f480aa25/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e10464d17df3b582745c25cec695cb9558bca2cb6ddb631aee1787fc72c767b2", size = 494586, upload-time = "2026-05-28T12:01:13.051Z" }, + { url = "https://files.pythonhosted.org/packages/80/af/1eeb029bec67582c226b7809172207cd005073af4ebd906e65ff494f4983/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba05adbf15d994c38ec0b7ab32e858e5110c21e9009a00a86545fd220f84e038", size = 388415, upload-time = "2026-05-28T12:01:14.631Z" }, + { url = "https://files.pythonhosted.org/packages/18/23/ffbe10711c4d766c1cab0557d6906c074f795814863c67b351355d29354a/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77c004fdc7b891967106f78ddfd7b076bfe6813c6139c6fff6aed3bcaa960b26", size = 372427, upload-time = "2026-05-28T12:01:16.153Z" }, + { url = "https://files.pythonhosted.org/packages/bd/3a/30ba4a6ad457e5b070c18d742a33fb77d8d922b565cc881f8a5313d63bfe/rpds_py-2026.5.1-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:83bcf894486c9d78dd290d3c0124ff6dd8875d3025e2090a8ec49fcc37c55fdd", size = 383615, upload-time = "2026-05-28T12:01:17.809Z" }, + { url = "https://files.pythonhosted.org/packages/d3/69/62e242b53ce39c0814bd24e1a6e6eba6c92be716277745f317f9540a2e7b/rpds_py-2026.5.1-cp315-cp315-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3df104083952a0e0c6f10de33e440eabe98fb6317d23e1a58c68f6df08d01b9", size = 402786, upload-time = "2026-05-28T12:01:19.419Z" }, + { url = "https://files.pythonhosted.org/packages/38/c1/a770b9c186928a1ed0f7e6d7ae50e7f3950ed23e3f9e366dbc8e38cb55de/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:980450826cf22e133c57e0835070bdd0dd3f73b9b708c3ce223def2cb9469e14", size = 551583, upload-time = "2026-05-28T12:01:21.013Z" }, + { url = "https://files.pythonhosted.org/packages/21/7c/68e8579b95375b70d2a963103c42e705856cdb98569258bd807f4423891c/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_i686.whl", hash = "sha256:205dde846f24332ab0c1188699a043b8d165b79bb84529ce272c45048ff6be01", size = 616941, upload-time = "2026-05-28T12:01:22.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/a1/a6135aed5730ff03ab957182259987ac11e55fb392a28dc6f0592048a280/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:3966b82dd563176396df030f3dd52a6e54cb69b718e95e78bd555ed3d1e0185d", size = 578349, upload-time = "2026-05-28T12:01:24.118Z" }, + { url = "https://files.pythonhosted.org/packages/09/6e/f24201a76a84e6c49d0bdfdfcb735210e21701e9b21c5bfc0ba497dd62f6/rpds_py-2026.5.1-cp315-cp315-win32.whl", hash = "sha256:7818f8d0a415be74d2be3590b0a1c1f463a642f4d0217e7d10602dceef5b79aa", size = 209922, upload-time = "2026-05-28T12:01:25.522Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e4/966bc240bb0485fc265278f6de44d05834bf0b3618886e0b22e33d54c49a/rpds_py-2026.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:b3cc20c0d800af78fd0fac68086e28c1856cec51ea528bb81ea851aa40d39325", size = 226003, upload-time = "2026-05-28T12:01:27.062Z" }, + { url = "https://files.pythonhosted.org/packages/5c/5c/a15a59269cd5e74472734516c73795c15eccfc841b3d4b0228c3f53f19d0/rpds_py-2026.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:3609e9939a8a76cd904cf98a3f1f13b5dc7e150adeaee89e0ea09652ea213e16", size = 221245, upload-time = "2026-05-28T12:01:28.51Z" }, + { url = "https://files.pythonhosted.org/packages/e0/22/135ce03804e179a71ceb13be095deda4a279bc88f7a6b8fa161c5ad44e12/rpds_py-2026.5.1-cp315-cp315t-macosx_10_12_x86_64.whl", hash = "sha256:5d333a7127d4b307601ac37792bee01bb95c867cbfacf21b6375b804d6bbd723", size = 352015, upload-time = "2026-05-28T12:01:30.214Z" }, + { url = "https://files.pythonhosted.org/packages/3b/5f/f1f6d2652eb9d848f6eb369d8db83a2da6249bb49ad2c2a48f45d54538d3/rpds_py-2026.5.1-cp315-cp315t-macosx_11_0_arm64.whl", hash = "sha256:b5f077b44a4f7808520f66dae234988d867deb9aed9be5da057ce9ba831b2a41", size = 345016, upload-time = "2026-05-28T12:01:31.656Z" }, + { url = "https://files.pythonhosted.org/packages/88/66/b74182775691ea2290c99e52ac8d5db844e56fbec90ce421f107658c8314/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d8f9b7b78c9538fc9e04e82ec0e888ff0c3cffcfad152c77e57cd09351a98a", size = 374775, upload-time = "2026-05-28T12:01:33.136Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8f/15e5a61d9f0a43902d36561d4f07cae6ae9f4716be825159fd72717f33af/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e3a8ae58895ac107ed934a6bf51e5846f95c53b9b940c2c6d310838fd5846358", size = 380270, upload-time = "2026-05-28T12:01:34.574Z" }, + { url = "https://files.pythonhosted.org/packages/02/c3/f859b12763a80540cdf2af0f15b19904cf756a71d7bdd3f82ff3e5b1bbf9/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0957cf3c2b8632ec7aaebffebea8005b353cc2a237b6e2ae3c2cac0820704cfb", size = 495285, upload-time = "2026-05-28T12:01:36.127Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c7/ff27c2ac8411d30b03b1829fd88cae8dad1a4d0da48dd25e57c4038042e6/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c396c1304de421050b3681ea70f371874b54d41b0151e96109758144c231e30b", size = 389581, upload-time = "2026-05-28T12:01:37.635Z" }, + { url = "https://files.pythonhosted.org/packages/6e/67/fe92ee32a6cc05c77228a2f8b1762e7124f386ec20ff83d0757b762d58d0/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad1bff7f666b9598e573815affd666aac6a13a585dde336f843e33350c7fadc", size = 376041, upload-time = "2026-05-28T12:01:39.307Z" }, + { url = "https://files.pythonhosted.org/packages/f8/91/b4d6685c27aba55bd82f25b278be8237038117d05f9659a6213ad3408130/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_31_riscv64.whl", hash = "sha256:656a042550878f12d45752452d47094b7cfe5ad1e9d7b87b5a22ad3ae5ff8015", size = 383946, upload-time = "2026-05-28T12:01:41.043Z" }, + { url = "https://files.pythonhosted.org/packages/bd/79/2c1d832a53c8e0f8e98fc970ec257b950fecd4f62be2ab7182b500a0cbc8/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c4bd4f70294737b5206a3e8e30ccadbf8a60301831c8ea23eec5dbeea1ecfa", size = 405526, upload-time = "2026-05-28T12:01:43.032Z" }, + { url = "https://files.pythonhosted.org/packages/78/c4/c98117b03c6a8581ab2c2dfccfe9a5ad82bd8128a3c28b46a6ad2d97c393/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:43bca78665423cabae77146f2fe7ce55272b6c8d55d82cca83effd42c7e13972", size = 551165, upload-time = "2026-05-28T12:01:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c1/bc479ca069200af730881b1bd525e3114b2b391a351509fcb1b772f28086/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_i686.whl", hash = "sha256:42d0f20e85e549c870749d0e247f0c10d318a45b7e9676d575d2dcb04a1b2e66", size = 618778, upload-time = "2026-05-28T12:01:46.337Z" }, + { url = "https://files.pythonhosted.org/packages/77/65/38ab2f90df44c2febfb63cc10ced40763d9b4bc94d173e734528663fe7f5/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:b1be5c35683684d5331b93600c210e8367c254683d8a6df6bd21bd2da3a334fb", size = 581839, upload-time = "2026-05-28T12:01:48.109Z" }, + { url = "https://files.pythonhosted.org/packages/15/2d/ce1f605fe036aadd460e5822e578c6c7ec3a860936cca37d6e0f299daa77/rpds_py-2026.5.1-cp315-cp315t-win32.whl", hash = "sha256:75808f6c38ce7749bb68cc2770161aae5045e6c6f6781a9782e74b93304399df", size = 207866, upload-time = "2026-05-28T12:01:49.648Z" }, + { url = "https://files.pythonhosted.org/packages/79/cb/966040123eb102371559746908ef2c9471f4d43e17ec9a645a2258dab64b/rpds_py-2026.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:90bd6630002a1c7f09e7843dd79f0d24f3d2897cc25a753480917865d14f15b3", size = 225441, upload-time = "2026-05-28T12:01:51.408Z" }, +] + +[[package]] +name = "sentry-sdk" +version = "2.61.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/4d/3c66e6045bd2071256b6b6fdcb0cc02b86ce54b2acc2ceac79af8e0efbb5/sentry_sdk-2.61.0.tar.gz", hash = "sha256:1ca9b4bb777eb5be67004edab7eb894f21c6301f1d05ed64966719ad5d1764ce", size = 458510, upload-time = "2026-05-28T09:40:28.917Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/21/5a/9794736d5802689c1a48862e6afe6b7f3e86cc37c15d4a84bc0143877dc1/sentry_sdk-2.61.0-py3-none-any.whl", hash = "sha256:ec4d30273909cb1d198e03208b16ee70e2bc5d90a16fd9f1fb2fc6a72e1f03dc", size = 483111, upload-time = "2026-05-28T09:40:27.027Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + +[[package]] +name = "sse-starlette" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "starlette" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/2b/58abc2d1fd397e7dde08e947e05c884d8ef2f78d5e2588c17a12d42d6994/sse_starlette-3.4.4.tar.gz", hash = "sha256:07e0fa0460138baf25cdd5fb28683472c3995dc1642225191b3832d62526bcb0", size = 31819, upload-time = "2026-05-12T17:37:17.019Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/67/805710444ea8cc75fbf70b920ed431a560c4bf9c57f7d5a3117213189399/sse_starlette-3.4.4-py3-none-any.whl", hash = "sha256:3f4dd50d8aed2771a091f3a83000323fc3844541c16b4fe585ae2420cc6df973", size = 16514, upload-time = "2026-05-12T17:37:15.601Z" }, +] + +[[package]] +name = "starlette" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c5/bf/616a066c2760f6c2b1ae3437cc28149734d069fbb46511712beae118a68c/starlette-1.2.0.tar.gz", hash = "sha256:3c5a6b23fff42492914e93890bb80cbfea72dbf37de268eec06185d62a4ca553", size = 2668923, upload-time = "2026-05-28T11:42:50.568Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/85/492183764d5d01d4514be3730fdb8e228a80605783099551c51627578b5d/starlette-1.2.0-py3-none-any.whl", hash = "sha256:36e0c76ac59157e75dc4b3bdeafba97fb04eaf1878045f15dbef666a6f092ed7", size = 73213, upload-time = "2026-05-28T11:42:48.801Z" }, +] + +[[package]] +name = "typer" +version = "0.26.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/15/f5fc7be23b7196bc065b282d9589a372392fb10860c80f9c1dd7eb008662/typer-0.26.3.tar.gz", hash = "sha256:3e2b9352f535e5303ef27806dadc2c8647687bdca5c902f03fec3fb88f46a46a", size = 198326, upload-time = "2026-05-28T20:30:50.984Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cd/cc/c6c5dea061e2740355bfeef22ac6a41751bd2f3903e83921295569bdcec4/typer-0.26.3-py3-none-any.whl", hash = "sha256:e70549ec5a403ca8a0bf0802ddd9f3c6ff7a14ccbb859b01b697baa943636f33", size = 122338, upload-time = "2026-05-28T20:30:49.816Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "urllib3" +version = "2.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602, upload-time = "2026-05-07T16:13:18.596Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087, upload-time = "2026-05-07T16:13:17.151Z" }, +] + +[[package]] +name = "uvicorn" +version = "0.48.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e6/bf/f6544ba992ddb9a6077343a576f9844f7f8f06ab819aefd00206e9255f18/uvicorn-0.48.0.tar.gz", hash = "sha256:a5504207195d08c2511bf9125ede5ac4a4b71725d519e758d01dcf0bc2d31c37", size = 91074, upload-time = "2026-05-24T12:08:41.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/be/72532be3da7acc5fdfbccdb95215cd04f995a0886532a5b423f929cda4cc/uvicorn-0.48.0-py3-none-any.whl", hash = "sha256:48097851328b87ec36117d3d575234519eb58c2b22d79666e9bbc6c49a761dad", size = 71410, upload-time = "2026-05-24T12:08:40.258Z" }, +] + +[package.optional-dependencies] +standard = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "httptools" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "watchfiles" }, + { name = "websockets" }, +] + +[[package]] +name = "uvloop" +version = "0.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" }, + { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" }, + { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" }, + { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" }, + { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" }, + { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" }, + { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" }, + { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" }, + { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" }, + { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, + { url = "https://files.pythonhosted.org/packages/90/cd/b62bdeaa429758aee8de8b00ac0dd26593a9de93d302bff3d21439e9791d/uvloop-0.22.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142", size = 1362067, upload-time = "2025-10-16T22:16:44.503Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f8/a132124dfda0777e489ca86732e85e69afcd1ff7686647000050ba670689/uvloop-0.22.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74", size = 752423, upload-time = "2025-10-16T22:16:45.968Z" }, + { url = "https://files.pythonhosted.org/packages/a3/94/94af78c156f88da4b3a733773ad5ba0b164393e357cc4bd0ab2e2677a7d6/uvloop-0.22.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35", size = 4272437, upload-time = "2025-10-16T22:16:47.451Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/60249e9fd07b32c665192cec7af29e06c7cd96fa1d08b84f012a56a0b38e/uvloop-0.22.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25", size = 4292101, upload-time = "2025-10-16T22:16:49.318Z" }, + { url = "https://files.pythonhosted.org/packages/02/62/67d382dfcb25d0a98ce73c11ed1a6fba5037a1a1d533dcbb7cab033a2636/uvloop-0.22.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6", size = 4114158, upload-time = "2025-10-16T22:16:50.517Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/f1171b4a882a5d13c8b7576f348acfe6074d72eaf52cccef752f748d4a9f/uvloop-0.22.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079", size = 4177360, upload-time = "2025-10-16T22:16:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/79/7b/b01414f31546caf0919da80ad57cbfe24c56b151d12af68cee1b04922ca8/uvloop-0.22.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289", size = 1454790, upload-time = "2025-10-16T22:16:54.355Z" }, + { url = "https://files.pythonhosted.org/packages/d4/31/0bb232318dd838cad3fa8fb0c68c8b40e1145b32025581975e18b11fab40/uvloop-0.22.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3", size = 796783, upload-time = "2025-10-16T22:16:55.906Z" }, + { url = "https://files.pythonhosted.org/packages/42/38/c9b09f3271a7a723a5de69f8e237ab8e7803183131bc57c890db0b6bb872/uvloop-0.22.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c", size = 4647548, upload-time = "2025-10-16T22:16:57.008Z" }, + { url = "https://files.pythonhosted.org/packages/c1/37/945b4ca0ac27e3dc4952642d4c900edd030b3da6c9634875af6e13ae80e5/uvloop-0.22.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21", size = 4467065, upload-time = "2025-10-16T22:16:58.206Z" }, + { url = "https://files.pythonhosted.org/packages/97/cc/48d232f33d60e2e2e0b42f4e73455b146b76ebe216487e862700457fbf3c/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88", size = 4328384, upload-time = "2025-10-16T22:16:59.36Z" }, + { url = "https://files.pythonhosted.org/packages/e4/16/c1fd27e9549f3c4baf1dc9c20c456cd2f822dbf8de9f463824b0c0357e06/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e", size = 4296730, upload-time = "2025-10-16T22:17:00.744Z" }, +] + +[[package]] +name = "watchfiles" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/41/5e1a4bb12aac5f1493fa1bdc11154eca3b258ca4eba65d39c473fe19d8e9/watchfiles-1.2.0.tar.gz", hash = "sha256:c995fba777f1ea992f090f9236e9284cf7a5d1a0130dd5a3d82c598cacd76838", size = 108252, upload-time = "2026-05-18T04:32:04.251Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/2f/e42c992d2afda3108ea1c02acecc991b9f31d05c14adc2a7cee9ee211fc4/watchfiles-1.2.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:bc13eb17538be00c874699dc0abe4ee2bc8d50bb1166a6b9e175ef3fd7eb8f26", size = 400115, upload-time = "2026-05-18T04:32:02.06Z" }, + { url = "https://files.pythonhosted.org/packages/5f/8f/6af2ea19065c91d8b0ea3516fdfc8c0d349f407e8e9fbf4e5a17360de8ad/watchfiles-1.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d95ddc1eb6914154253d239089900813f6a767e174b8e6a50e7fdacb7e4236c", size = 393659, upload-time = "2026-05-18T04:30:50.951Z" }, + { url = "https://files.pythonhosted.org/packages/13/01/b32a967c56fb3e3e5be3db52c3d3b87fa4513aa367d8ed1ad96d42952e5f/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f70d8b291ef6e88d19b1f297a6905ddb978888d9272b0d05e6f53309856bcfc", size = 453207, upload-time = "2026-05-18T04:31:04.231Z" }, + { url = "https://files.pythonhosted.org/packages/04/98/97557a812180338cb1abd32e1cffcc4588f59b5f23e0cb006b2ba95ba64a/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56d8641cf834c2836922899105bd3ce3d0dfc69291d52edf0b4d0436829b34c0", size = 459273, upload-time = "2026-05-18T04:31:50.377Z" }, + { url = "https://files.pythonhosted.org/packages/e8/a8/b4b08dcb7653b8087c6586f7ce649505900e866bbcfe40dc9587af02e686/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2581a94056e55d7d0a31a823ea92bf73749c489ca2285bfdc0fbe6b2bb49d50c", size = 489927, upload-time = "2026-05-18T04:31:42.485Z" }, + { url = "https://files.pythonhosted.org/packages/50/94/3dceea03545d2e5ddfd839f0ddd5e1cecbf1697b5a428d5ba11cef6af95d/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41bc1199f7523b3f82843c88cbb979180c949caef0342cf90968f178e5d49b01", size = 570476, upload-time = "2026-05-18T04:31:03.071Z" }, + { url = "https://files.pythonhosted.org/packages/cc/f2/d39a5450c3532092b91f81d274360e613c2371bc874a89c7a1a3c5e8d138/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7571e4464cb6e434958f867f7f730b8ab0b75e3f8e5eac0499168486ab3c33a8", size = 465650, upload-time = "2026-05-18T04:30:12.701Z" }, + { url = "https://files.pythonhosted.org/packages/22/24/ed72f68cbc1333ca9b9f2200aa048bb6658ae41709bc1caad4310f4bdffd/watchfiles-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e53a384f76b631c3ae5334ce6a52f0baa3a911eb94a4eac7f160079868b716d5", size = 456398, upload-time = "2026-05-18T04:30:13.784Z" }, + { url = "https://files.pythonhosted.org/packages/0d/64/982ef4a4e5bab5b6e5b6becc8cd5e732f6130a78b855f0abec6439a9a135/watchfiles-1.2.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:d20029a60a71a052a24c4db7673bc4de39ab89adbaccbfb5d67987c5d73f424d", size = 465140, upload-time = "2026-05-18T04:31:52.111Z" }, + { url = "https://files.pythonhosted.org/packages/a0/0c/95282abf4ed680b6096010bcfc30c5fa7a041fc5aa5a2ad17a2cc6c75bba/watchfiles-1.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2cb93af48550faf1cea04c303107c8b75833de7013e57ce27d3b8d21d8d0f58c", size = 630259, upload-time = "2026-05-18T04:31:25.676Z" }, + { url = "https://files.pythonhosted.org/packages/30/45/607c1de1530c4bdcf2cf1d1ecc2505ddba5d96bd43ba9f2b0e79876f850f/watchfiles-1.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2995c176de7692b86a2e4c58d9ec718f753150a979cb4a754e2b4ffa38e70906", size = 659859, upload-time = "2026-05-18T04:30:24.333Z" }, + { url = "https://files.pythonhosted.org/packages/fa/08/d9e2e0f9e8e6791d33aefc694ad7eefa7f901f63caff84a81ded38692f9c/watchfiles-1.2.0-cp312-cp312-win32.whl", hash = "sha256:7a2cffd17d27d2ecbb310c2b1d8174f222a5495b1a721894afa88ec11e25b898", size = 275480, upload-time = "2026-05-18T04:30:31.307Z" }, + { url = "https://files.pythonhosted.org/packages/1c/e6/9d42569c0102645cc8cea5d8c7d8a1e9d4ada2cb7f05f75e554b8aa2202a/watchfiles-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:f155b3a1b2a5fc89cdc70d47ee5d54e3b75e88efa34982028a35daef9ba00379", size = 288718, upload-time = "2026-05-18T04:32:10.745Z" }, + { url = "https://files.pythonhosted.org/packages/0a/26/88e0dc6ee3898169d7fa22bb6a69cabf2502d2ee25cb8c876d1262d204f8/watchfiles-1.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:8fa585ede612ee9f9e91b18bebf9ba11b9ae29a4e3a0d0cf6fca3e382133f0d5", size = 281026, upload-time = "2026-05-18T04:30:22.23Z" }, + { url = "https://files.pythonhosted.org/packages/d1/4d/70a7feced9f87e2ff26dba42667290f41694fc64646c67261fbb8cab5d5c/watchfiles-1.2.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:01ea8d66f0693b9b60a6541c8d10263091ca9a9060d242f3c1f3143f9aad2c98", size = 399730, upload-time = "2026-05-18T04:31:38.162Z" }, + { url = "https://files.pythonhosted.org/packages/31/3a/0da302f2307aee316922806ebd5726c542cbd787c938271cf14a074c7daf/watchfiles-1.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7ba0480b9a74af058f43b337e937a451e109295c420916d68ad24e3dc02f5e44", size = 392842, upload-time = "2026-05-18T04:30:27.051Z" }, + { url = "https://files.pythonhosted.org/packages/db/ef/d5bdb705c224dbc256aa0c1ec47bf4e61ec52558f2afb44a71a1fe4d7015/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f34e26a19f91f710c08e0183429f0d1d15df734e6bc78c31e77b9ea9c433658", size = 452989, upload-time = "2026-05-18T04:31:11.945Z" }, + { url = "https://files.pythonhosted.org/packages/71/29/5495f2c1661949ef7a35e4d71111d129cfe7606414a26887a919d0a55406/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b4e77f6a55f858504069abd35d336a637555c09bca453dde1ee1e5ada8a6a1fb", size = 458978, upload-time = "2026-05-18T04:30:52.606Z" }, + { url = "https://files.pythonhosted.org/packages/d5/8c/7f9c07c433811c2fffd93e13fdfb7135de9aab5f2ae41be08960fa0047dc/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cb4d80e212f116474a545c21c912b445f16bb0cef9e6a73a498164223e14e2f", size = 490248, upload-time = "2026-05-18T04:31:36.003Z" }, + { url = "https://files.pythonhosted.org/packages/3c/11/d93632febc52fbc21be90231bb7c17fd5387f46c9076fd40a5f9c2ae6910/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b974946a10af379d425e2eef5b62f5c6ebeaccf91d45eaad6f5b27ecd4f91aa0", size = 571847, upload-time = "2026-05-18T04:31:10.862Z" }, + { url = "https://files.pythonhosted.org/packages/55/b4/383173e73aabb07ad1d9c7aa859d95437ac46a6d6a1e11005facda0c9d19/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86bc13c25a8d1fcd70b51d0ce7c9b65e90de5666fcbfd3e34957cc73ee19aeb5", size = 465974, upload-time = "2026-05-18T04:30:17.006Z" }, + { url = "https://files.pythonhosted.org/packages/a7/6c/89b1a230a78f57c52dd8893adb1f92f94411721b6ec12596c56d98c74356/watchfiles-1.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca148d73dea36c9763aaa351e4d7a51780ec1584217c45276f4fe8239c768b71", size = 454782, upload-time = "2026-05-18T04:30:35.656Z" }, + { url = "https://files.pythonhosted.org/packages/24/62/1732118367cfff0a9fce3bf62ff4bfded09ef5df21d9d446b858b3f70a96/watchfiles-1.2.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:c525543d91961c6955b2636b308569e84a1d1c5f5f2932041ab9ef46422f43e3", size = 465182, upload-time = "2026-05-18T04:30:20.846Z" }, + { url = "https://files.pythonhosted.org/packages/28/96/716f7e5f51339bf22963f3345f9f27d7f3b30e2eadc597e257c881dd3c53/watchfiles-1.2.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:a204794696ffb8f9b10fba6f7cb5216d42f3b2b71860ccac6b6e42f5f10973b0", size = 629841, upload-time = "2026-05-18T04:31:05.397Z" }, + { url = "https://files.pythonhosted.org/packages/4c/fe/c40783950fd771ccf66ab3ec2722d188a9af1c7f96c6e811f36e40c6e03f/watchfiles-1.2.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:10d86db20695afe7997ac9e1717637d6714a8d0220458c33f3d2061f54cec427", size = 658028, upload-time = "2026-05-18T04:31:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/71/72/4508db1856d1d87fcbb3b63f4839bab1b5682cb0e8d224d122263c09654a/watchfiles-1.2.0-cp313-cp313-win32.whl", hash = "sha256:eb283ee99e21ad6443c8cdb06ac5b34b1308c329cbdf03fa02b445363714c799", size = 275183, upload-time = "2026-05-18T04:30:59.57Z" }, + { url = "https://files.pythonhosted.org/packages/f9/36/14b76ca57652e5cc5fd1c11f32a261292c08a0d19a00351013c2549cbfb2/watchfiles-1.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:a0f27f01bee51861392bb6b7c4fdb290b27d1eb194e9e28788d68102a0e898d9", size = 288059, upload-time = "2026-05-18T04:32:07.937Z" }, + { url = "https://files.pythonhosted.org/packages/1b/8d/0a85e395398d8d20fadfe5c5d32c726eee17a519e78fb356f2cf7531bffe/watchfiles-1.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:3651aa7058595e9cfb75d35dd5ada2bf9f48a5b8a0f3562821d3e210c507e077", size = 280186, upload-time = "2026-05-18T04:31:54.484Z" }, + { url = "https://files.pythonhosted.org/packages/37/68/36db056f1fdcc5f07302f56e631774d6835bcd6fa3ace402304621d5f9e5/watchfiles-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:faea288b6f0ab1902ef08f4ca6de005dccf856c4e0c4f21b8c5fce02d90a1b08", size = 399031, upload-time = "2026-05-18T04:30:44.576Z" }, + { url = "https://files.pythonhosted.org/packages/c1/64/01a9d6f66a82a5c101ce939274106cc72759d62427e153f01edd2b9f87c2/watchfiles-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:01859b11fd9fbca670f4d5da00fbac282cfea9bd67a2125d8b2833a3b5617ea9", size = 391205, upload-time = "2026-05-18T04:30:25.413Z" }, + { url = "https://files.pythonhosted.org/packages/84/2c/0a44fe058cb4bb7b8ede6b6670698bbb7c0400740e378d00022189b7b31d/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fff610d7bb2256a317bb1e96f0d7862c7aa8076733ee5df0fd41bbe76a24a4f4", size = 451892, upload-time = "2026-05-18T04:32:14.005Z" }, + { url = "https://files.pythonhosted.org/packages/67/a1/351e0d56cd35e6488b5c8b4fb11a809a5bc923e8fe8fed9faf8920be0c89/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b141a4891c995a039cd89e9a49e62df1dc8a559a5d1a6e4c7106d16c12777a55", size = 458867, upload-time = "2026-05-18T04:31:22.279Z" }, + { url = "https://files.pythonhosted.org/packages/d5/7d/9d09605187f1b838998624049fcf8bf47b73c1a3b76901fcac1782f62277/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f22943b7770483f6ea0721c6b11d022947a98eb0acae14694de034f4d0d38925", size = 490217, upload-time = "2026-05-18T04:31:43.657Z" }, + { url = "https://files.pythonhosted.org/packages/60/5d/a17a16eccb182f04188cd308ec24b1a71a9b5c4e7098269cf35d9fa56d02/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1bc6195825b7dcd217968bb1f801a60fd4c16e8eeab5bedc7fe917d7d5995ab4", size = 571458, upload-time = "2026-05-18T04:32:11.875Z" }, + { url = "https://files.pythonhosted.org/packages/d3/3d/4dd457062083ab1938e5dfd45032eb425cee2ac817287ca8ff4356183e5d/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4a4b147f5dca2a5d325a06a832fb43f345751adfbc63204aec30e0d9ca965a2", size = 464707, upload-time = "2026-05-18T04:30:43.492Z" }, + { url = "https://files.pythonhosted.org/packages/c6/71/ea8c57b128f5383de74d0c7d2d9c57ad7c9a65a930c451bd25d524b295b7/watchfiles-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4543579a9bdb0c9560039b4ffddbdb39545707659fbc430ce4c10f3f68d557f9", size = 454663, upload-time = "2026-05-18T04:30:16.061Z" }, + { url = "https://files.pythonhosted.org/packages/53/fd/2e812bf938406d7db351f0703ddd3fc6c061cf30d96153a77bc79a943a44/watchfiles-1.2.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:20aa0e708b920bde876a4aa82dc7dd6ebea228a63a67cda6632c2fc87b787efa", size = 463537, upload-time = "2026-05-18T04:31:44.9Z" }, + { url = "https://files.pythonhosted.org/packages/86/56/d17a7f1dd1bc3035f1072694a551301272f1739c2d8e319c927cb9e29b38/watchfiles-1.2.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:d413349d565dab74297f2a63e84a097936be69bf8f3b3801f27f380e32040f44", size = 629194, upload-time = "2026-05-18T04:31:14.141Z" }, + { url = "https://files.pythonhosted.org/packages/be/06/f1ff66bf5cae50aa4062779a0ecd0bbaf15e466195719074078947d9a17d/watchfiles-1.2.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f28b2725eb8cce327b9b3ab02415c853011dc55c95832fe90de6bc56f5315f72", size = 656194, upload-time = "2026-05-18T04:31:47.14Z" }, + { url = "https://files.pythonhosted.org/packages/e7/54/a9c7ea9a82a4ac65e7004c0a03920b5cdd2f9c3b678757d9cd425aa51d53/watchfiles-1.2.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:b8c8358484d5fa12ef34f05b7f4168eaf1932f408725ff6d023c33ec17bd79d4", size = 400205, upload-time = "2026-05-18T04:32:05.153Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5d/c9ab3534374a4a67450696905d6ef16a04405448b8dc52bd752ae50423d4/watchfiles-1.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f04b092229ad2c50126dd3c922c8822e51e605993764a33058d4a791ab42281", size = 392508, upload-time = "2026-05-18T04:30:54.849Z" }, + { url = "https://files.pythonhosted.org/packages/26/ca/1ad30103535cf0cecd7b993e8d50edc5351b1820e38f2d22e3df58962feb/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a7ce236284f002a156f70add88efe5c70879cccbb658be0822c54b1306fc09d", size = 452448, upload-time = "2026-05-18T04:30:53.727Z" }, + { url = "https://files.pythonhosted.org/packages/37/a1/ceee2cdf2afbd715fa07758d39c9859513eae411b23196f7fd039e5feedd/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b9909cc2b48468b575eefa944919e1fe8a36c5849d5c7c168f80a8c1db69398e", size = 459605, upload-time = "2026-05-18T04:30:23.312Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f6/421e30fd1cb3907a84ed92ab3f1983e37ba2dca015e9a894a048418417a2/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a37faaed405c67e28e6be45a1fa4f206ef5a2860f27c237db9fa30704c38242", size = 490757, upload-time = "2026-05-18T04:30:47.358Z" }, + { url = "https://files.pythonhosted.org/packages/41/b0/55ed1b97ed08be7bba6f9a541cac15f2a858e1d74d2b07b6da70a82aab00/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9649193aa27bd9ff2e80ff29bfaa93085496c7a3a377592823cc58b77ee88add", size = 568672, upload-time = "2026-05-18T04:30:38.915Z" }, + { url = "https://files.pythonhosted.org/packages/d1/cf/d8ae8a80dd7bafab395ea7681c10237311bbf34d37704a8c744e7cf31fc7/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e4ff8e37f99cf1da89e255e07c9c4b37c214038c4283707bdec308cb1b0ea1f", size = 464197, upload-time = "2026-05-18T04:30:09.914Z" }, + { url = "https://files.pythonhosted.org/packages/7c/8a/3076c496ca8dafe0e8cd03fcebdfc47be4b1174b4e5b24ff6e396e6b3af2/watchfiles-1.2.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:054dc20fd2e3132b4c3883b4a00d72fd6e1f56fdaf89fccd12e8057d74cd74d7", size = 453181, upload-time = "2026-05-18T04:30:14.829Z" }, + { url = "https://files.pythonhosted.org/packages/e5/10/9745e17c98e7b8a86454df0a3c7b5686bd650383f1e9f26e4ebcbd6cc0c0/watchfiles-1.2.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:e140ed30ebde76796b686e67c182cff10ea2fbab186fafd1560f74bb5a473a6e", size = 465109, upload-time = "2026-05-18T04:30:28.123Z" }, + { url = "https://files.pythonhosted.org/packages/8f/95/8ef4a95481d3e0cb52d62a06fa6e972e81424be2d9698b91a2fecca9904c/watchfiles-1.2.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:bb7e52ecf68ba46d22df23467b87cffeb2146908aa523ebfe803019618cfda06", size = 630653, upload-time = "2026-05-18T04:31:49.304Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e4/3b3bf36b0f829b50c6ebcb8d031583863c59f923d6a6af3d485e470d0fac/watchfiles-1.2.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:23282a321c8baf9b3a3c4afff673f9fe65eb7fdc2338d765ccad9d3d1916a5ba", size = 657838, upload-time = "2026-05-18T04:31:06.497Z" }, + { url = "https://files.pythonhosted.org/packages/21/b1/6cbbb50c1f3002ab568777d44aa21206dfb8807a840990c4037523b51812/watchfiles-1.2.0-cp314-cp314-win32.whl", hash = "sha256:c0db965c5f79aa49fe672d297cf1febc5ad149b658594944f49a54a2b96270a7", size = 275108, upload-time = "2026-05-18T04:30:06.891Z" }, + { url = "https://files.pythonhosted.org/packages/92/45/190ce6db8dcb4536682cf75d3889ff1a27182a58cb519d343cb6d9ea63d8/watchfiles-1.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:71283b39fd17e5408eb123bd37aeecfd9d54c81fc184421943208aadb879d103", size = 288441, upload-time = "2026-05-18T04:32:12.901Z" }, + { url = "https://files.pythonhosted.org/packages/74/0d/3eae1c2313ab08378431d907c3f8095ecca00f3eda33111cf4f0f2591799/watchfiles-1.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:c5c19526f4e54a00f2666a6c0e9e40d582c09e865055ea7378bf0009aab857b3", size = 280684, upload-time = "2026-05-18T04:31:26.902Z" }, + { url = "https://files.pythonhosted.org/packages/b1/75/fb64e6c25d6b5ca636d03df34ffb1c6e9873303e76d27967e045f8df088f/watchfiles-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:d73a585accffa5ae39c17264c36ec3166d2fad7000c780f5ef83b2722afb9dd2", size = 398857, upload-time = "2026-05-18T04:32:17.108Z" }, + { url = "https://files.pythonhosted.org/packages/73/4e/9f7adf01754cbf81843722ccfec169d8f26c69778281a302855cecd2ee08/watchfiles-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ae99b14c5f21e026e0e9d96f40e07d8570ebee6cafd9d8fc318354606daa7a28", size = 392413, upload-time = "2026-05-18T04:31:07.911Z" }, + { url = "https://files.pythonhosted.org/packages/47/c8/bec626bcc2d69f44b9acb24ce7d60ed7b16b73628eea747fcbd169d8edda/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4429f3b105524a10b72c3a819b091c495d2811d419c1e1e8df773a5a5974f831", size = 452409, upload-time = "2026-05-18T04:31:20.142Z" }, + { url = "https://files.pythonhosted.org/packages/00/b7/b6362068e81e7c556d155a34c35d40ac3ef42d747b06d7f6e5bf58e359c2/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:43d818978d06062d9b22c4fab2ebe44cf5213d42dc8e62bda8c2760cfa2eeb33", size = 458827, upload-time = "2026-05-18T04:32:06.219Z" }, + { url = "https://files.pythonhosted.org/packages/67/f8/9a813fa42afb1e0b4625e75f0479826644d3ee8dc287e093799bc01f390c/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b9f732dc58b2dbe69e464ccf8fff7a03b0dd0be439da4c0720d3558527d3d6b4", size = 490104, upload-time = "2026-05-18T04:31:56.034Z" }, + { url = "https://files.pythonhosted.org/packages/2f/bf/27dfb6094ca4c9aad21298b5525b6c53cb36121ee454331d05161e58d130/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8f200104103feb097de4cab8fe4f5dd18a2026934c7dea98c55a2f5fd6d5a33b", size = 571360, upload-time = "2026-05-18T04:31:57.133Z" }, + { url = "https://files.pythonhosted.org/packages/fb/39/44a096d67270ea93df91d33877dbe91fbda3aa4f8ec2edf799d93eda8736/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63ac26eefbf4af1741247d6fb68b11c49a25b2f7413fbd318a83a12aaa9cf666", size = 464644, upload-time = "2026-05-18T04:30:57.33Z" }, + { url = "https://files.pythonhosted.org/packages/0e/80/c7472203bad6268e3ef1ad260739704847898938ad7ea8b63a5131f46b50/watchfiles-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c4997d4e4a55f0d02b6cde327322daf3a0400e5df6c6b15948994bf72497925", size = 454771, upload-time = "2026-05-18T04:30:48.736Z" }, + { url = "https://files.pythonhosted.org/packages/51/cf/3b10b268b4b7f0fc26e9debb5eef1998b515887840f444cd3ec80c688755/watchfiles-1.2.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:4c887eba18b7945ac73067a8b4a66f21cd46c2539b2bc68588f7be6c7eb6d26b", size = 463494, upload-time = "2026-05-18T04:31:33.826Z" }, + { url = "https://files.pythonhosted.org/packages/3d/3e/a4302545cd589262a0dc7d140e86f7688eba3f9c72776c27f7e23b8864c4/watchfiles-1.2.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:3416ff151bb6b5a8d8d11664974fbef4d9305b9b2957839ab5a270468fd8df30", size = 629383, upload-time = "2026-05-18T04:31:15.596Z" }, + { url = "https://files.pythonhosted.org/packages/db/99/d5649df0a9a410d45b7c882304d0b790903ac9b6e8f2cfd12114e0c6b9f2/watchfiles-1.2.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:0e831a271c035d89789cffc386b6aa1375f39f1cd25eb7ca0997e4970d152fc5", size = 656093, upload-time = "2026-05-18T04:31:58.707Z" }, + { url = "https://files.pythonhosted.org/packages/92/b9/362702539275019a54dd2e94511b31a9b89c5f9e6a21966de7eb692549fc/watchfiles-1.2.0-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:37a6721cdf3f65dbb13aa9503510ccb4451603ac837e44d265d7992a597e1374", size = 400109, upload-time = "2026-05-18T04:31:16.879Z" }, + { url = "https://files.pythonhosted.org/packages/8f/75/71d5ba62db781e5587bded1d944c675374bc4aa37ff33d5018d98e8b6538/watchfiles-1.2.0-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:2b37d10b5a63bd4d87e18472d80fa525bd670586fae62e5dd580452764879b65", size = 392167, upload-time = "2026-05-18T04:31:28.058Z" }, + { url = "https://files.pythonhosted.org/packages/3c/01/c66dd95d0423fe30d31820e2d1d5bda773764131bbb6ac0cb1cf303ac328/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a105bc2283f67e8fbec74253ec2d94925de92ed72c0393f1206bf326b7b7b69", size = 452372, upload-time = "2026-05-18T04:31:00.836Z" }, + { url = "https://files.pythonhosted.org/packages/91/15/2fe99557e72f85627c6a8eed50d889e8d101623e060a22ad75b875cb932d/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5327989a465505f05cfe06f04fa9d0c2fd5432bb243e10e6f012b1bdca3c8579", size = 459596, upload-time = "2026-05-18T04:31:34.96Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/d4acfa0023367428ed48351b3b9b267893037b6cadae55620c61c24bcfd4/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecb47f183a8025b2aa18b546725c3657e542112ae9c0613a2af79b4fa8d04ad7", size = 490869, upload-time = "2026-05-18T04:31:59.923Z" }, + { url = "https://files.pythonhosted.org/packages/a4/5f/3164cbdce06c9fb95c4f7b9e2f9760b5e2797af43a9ecc317ef42a23a278/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8520a4ab0e37f770afc34459c4f8f7019e153f9124dc101c15538365875d1ab2", size = 571641, upload-time = "2026-05-18T04:32:00.948Z" }, + { url = "https://files.pythonhosted.org/packages/41/e6/85d3731c55e65cd7690f3f803d24c139588aaf863e4bf2148fe7a7fa1a19/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:71cd71740ed2c15211ebb237ced4e39a1cdf6f80566e5fe95428da1626f4fde6", size = 464444, upload-time = "2026-05-18T04:30:34.298Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7d/562641012b8b09872742c3b8adf9629ec479fd78f8d68ae4a0c13da8add6/watchfiles-1.2.0-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f88af53d6ddaf72179ef613ddc905e6f4785f712b49b80b3bef9f3525e6194b4", size = 453593, upload-time = "2026-05-18T04:31:23.464Z" }, + { url = "https://files.pythonhosted.org/packages/56/fe/cb8ef3d6f929d14158fdaaad9925985b7310abc9384dcd4d82dd0016fb59/watchfiles-1.2.0-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:cee9d5efd929efdac5f7e58f72b3376f676b64050a91c5b99a7094c5b2317488", size = 465096, upload-time = "2026-05-18T04:31:30.384Z" }, + { url = "https://files.pythonhosted.org/packages/25/91/80908e835e100527a9267147b08c0eee1fa6ab0ffec15edc04d1d44885f7/watchfiles-1.2.0-cp315-cp315-musllinux_1_1_aarch64.whl", hash = "sha256:b718bf356bbc15e559bd8ef41782b573b8ae0e3f177ab244b440568d7ea02cfb", size = 630638, upload-time = "2026-05-18T04:30:49.89Z" }, + { url = "https://files.pythonhosted.org/packages/46/4b/95ab2f256bb4af3cb2eb23b9317bda984ee6e0f11733a5c004a6c95b06e3/watchfiles-1.2.0-cp315-cp315-musllinux_1_1_x86_64.whl", hash = "sha256:922c0e019fe68b3ae392965a766b02a71ba1168c932cebc3733cd52c5fe5b377", size = 657684, upload-time = "2026-05-18T04:31:32.027Z" }, +] + +[[package]] +name = "websockets" +version = "16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/7b/bac442e6b96c9d25092695578dda82403c77936104b5682307bd4deb1ad4/websockets-16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00", size = 177365, upload-time = "2026-01-10T09:22:46.787Z" }, + { url = "https://files.pythonhosted.org/packages/b0/fe/136ccece61bd690d9c1f715baaeefd953bb2360134de73519d5df19d29ca/websockets-16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79", size = 175038, upload-time = "2026-01-10T09:22:47.999Z" }, + { url = "https://files.pythonhosted.org/packages/40/1e/9771421ac2286eaab95b8575b0cb701ae3663abf8b5e1f64f1fd90d0a673/websockets-16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39", size = 175328, upload-time = "2026-01-10T09:22:49.809Z" }, + { url = "https://files.pythonhosted.org/packages/18/29/71729b4671f21e1eaa5d6573031ab810ad2936c8175f03f97f3ff164c802/websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c", size = 184915, upload-time = "2026-01-10T09:22:51.071Z" }, + { url = "https://files.pythonhosted.org/packages/97/bb/21c36b7dbbafc85d2d480cd65df02a1dc93bf76d97147605a8e27ff9409d/websockets-16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f", size = 186152, upload-time = "2026-01-10T09:22:52.224Z" }, + { url = "https://files.pythonhosted.org/packages/4a/34/9bf8df0c0cf88fa7bfe36678dc7b02970c9a7d5e065a3099292db87b1be2/websockets-16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1", size = 185583, upload-time = "2026-01-10T09:22:53.443Z" }, + { url = "https://files.pythonhosted.org/packages/47/88/4dd516068e1a3d6ab3c7c183288404cd424a9a02d585efbac226cb61ff2d/websockets-16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2", size = 184880, upload-time = "2026-01-10T09:22:55.033Z" }, + { url = "https://files.pythonhosted.org/packages/91/d6/7d4553ad4bf1c0421e1ebd4b18de5d9098383b5caa1d937b63df8d04b565/websockets-16.0-cp312-cp312-win32.whl", hash = "sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89", size = 178261, upload-time = "2026-01-10T09:22:56.251Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f0/f3a17365441ed1c27f850a80b2bc680a0fa9505d733fe152fdf5e98c1c0b/websockets-16.0-cp312-cp312-win_amd64.whl", hash = "sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea", size = 178693, upload-time = "2026-01-10T09:22:57.478Z" }, + { url = "https://files.pythonhosted.org/packages/cc/9c/baa8456050d1c1b08dd0ec7346026668cbc6f145ab4e314d707bb845bf0d/websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9", size = 177364, upload-time = "2026-01-10T09:22:59.333Z" }, + { url = "https://files.pythonhosted.org/packages/7e/0c/8811fc53e9bcff68fe7de2bcbe75116a8d959ac699a3200f4847a8925210/websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230", size = 175039, upload-time = "2026-01-10T09:23:01.171Z" }, + { url = "https://files.pythonhosted.org/packages/aa/82/39a5f910cb99ec0b59e482971238c845af9220d3ab9fa76dd9162cda9d62/websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c", size = 175323, upload-time = "2026-01-10T09:23:02.341Z" }, + { url = "https://files.pythonhosted.org/packages/bd/28/0a25ee5342eb5d5f297d992a77e56892ecb65e7854c7898fb7d35e9b33bd/websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5", size = 184975, upload-time = "2026-01-10T09:23:03.756Z" }, + { url = "https://files.pythonhosted.org/packages/f9/66/27ea52741752f5107c2e41fda05e8395a682a1e11c4e592a809a90c6a506/websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82", size = 186203, upload-time = "2026-01-10T09:23:05.01Z" }, + { url = "https://files.pythonhosted.org/packages/37/e5/8e32857371406a757816a2b471939d51c463509be73fa538216ea52b792a/websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8", size = 185653, upload-time = "2026-01-10T09:23:06.301Z" }, + { url = "https://files.pythonhosted.org/packages/9b/67/f926bac29882894669368dc73f4da900fcdf47955d0a0185d60103df5737/websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f", size = 184920, upload-time = "2026-01-10T09:23:07.492Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a1/3d6ccdcd125b0a42a311bcd15a7f705d688f73b2a22d8cf1c0875d35d34a/websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a", size = 178255, upload-time = "2026-01-10T09:23:09.245Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ae/90366304d7c2ce80f9b826096a9e9048b4bb760e44d3b873bb272cba696b/websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156", size = 178689, upload-time = "2026-01-10T09:23:10.483Z" }, + { url = "https://files.pythonhosted.org/packages/f3/1d/e88022630271f5bd349ed82417136281931e558d628dd52c4d8621b4a0b2/websockets-16.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0", size = 177406, upload-time = "2026-01-10T09:23:12.178Z" }, + { url = "https://files.pythonhosted.org/packages/f2/78/e63be1bf0724eeb4616efb1ae1c9044f7c3953b7957799abb5915bffd38e/websockets-16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904", size = 175085, upload-time = "2026-01-10T09:23:13.511Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/d3c9220d818ee955ae390cf319a7c7a467beceb24f05ee7aaaa2414345ba/websockets-16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4", size = 175328, upload-time = "2026-01-10T09:23:14.727Z" }, + { url = "https://files.pythonhosted.org/packages/63/bc/d3e208028de777087e6fb2b122051a6ff7bbcca0d6df9d9c2bf1dd869ae9/websockets-16.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e", size = 185044, upload-time = "2026-01-10T09:23:15.939Z" }, + { url = "https://files.pythonhosted.org/packages/ad/6e/9a0927ac24bd33a0a9af834d89e0abc7cfd8e13bed17a86407a66773cc0e/websockets-16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4", size = 186279, upload-time = "2026-01-10T09:23:17.148Z" }, + { url = "https://files.pythonhosted.org/packages/b9/ca/bf1c68440d7a868180e11be653c85959502efd3a709323230314fda6e0b3/websockets-16.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1", size = 185711, upload-time = "2026-01-10T09:23:18.372Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f8/fdc34643a989561f217bb477cbc47a3a07212cbda91c0e4389c43c296ebf/websockets-16.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3", size = 184982, upload-time = "2026-01-10T09:23:19.652Z" }, + { url = "https://files.pythonhosted.org/packages/dd/d1/574fa27e233764dbac9c52730d63fcf2823b16f0856b3329fc6268d6ae4f/websockets-16.0-cp314-cp314-win32.whl", hash = "sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8", size = 177915, upload-time = "2026-01-10T09:23:21.458Z" }, + { url = "https://files.pythonhosted.org/packages/8a/f1/ae6b937bf3126b5134ce1f482365fde31a357c784ac51852978768b5eff4/websockets-16.0-cp314-cp314-win_amd64.whl", hash = "sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d", size = 178381, upload-time = "2026-01-10T09:23:22.715Z" }, + { url = "https://files.pythonhosted.org/packages/06/9b/f791d1db48403e1f0a27577a6beb37afae94254a8c6f08be4a23e4930bc0/websockets-16.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244", size = 177737, upload-time = "2026-01-10T09:23:24.523Z" }, + { url = "https://files.pythonhosted.org/packages/bd/40/53ad02341fa33b3ce489023f635367a4ac98b73570102ad2cdd770dacc9a/websockets-16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e", size = 175268, upload-time = "2026-01-10T09:23:25.781Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/6158d4e459b984f949dcbbb0c5d270154c7618e11c01029b9bbd1bb4c4f9/websockets-16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641", size = 175486, upload-time = "2026-01-10T09:23:27.033Z" }, + { url = "https://files.pythonhosted.org/packages/e5/2d/7583b30208b639c8090206f95073646c2c9ffd66f44df967981a64f849ad/websockets-16.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8", size = 185331, upload-time = "2026-01-10T09:23:28.259Z" }, + { url = "https://files.pythonhosted.org/packages/45/b0/cce3784eb519b7b5ad680d14b9673a31ab8dcb7aad8b64d81709d2430aa8/websockets-16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e", size = 186501, upload-time = "2026-01-10T09:23:29.449Z" }, + { url = "https://files.pythonhosted.org/packages/19/60/b8ebe4c7e89fb5f6cdf080623c9d92789a53636950f7abacfc33fe2b3135/websockets-16.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944", size = 186062, upload-time = "2026-01-10T09:23:31.368Z" }, + { url = "https://files.pythonhosted.org/packages/88/a8/a080593f89b0138b6cba1b28f8df5673b5506f72879322288b031337c0b8/websockets-16.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206", size = 185356, upload-time = "2026-01-10T09:23:32.627Z" }, + { url = "https://files.pythonhosted.org/packages/c2/b6/b9afed2afadddaf5ebb2afa801abf4b0868f42f8539bfe4b071b5266c9fe/websockets-16.0-cp314-cp314t-win32.whl", hash = "sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6", size = 178085, upload-time = "2026-01-10T09:23:33.816Z" }, + { url = "https://files.pythonhosted.org/packages/9f/3e/28135a24e384493fa804216b79a6a6759a38cc4ff59118787b9fb693df93/websockets-16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd", size = 178531, upload-time = "2026-01-10T09:23:35.016Z" }, + { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" }, +] diff --git a/src/api/extraction/application/chat_turn_service.py b/src/api/extraction/application/chat_turn_service.py index 64081bf9d..7df0d0aaa 100644 --- a/src/api/extraction/application/chat_turn_service.py +++ b/src/api/extraction/application/chat_turn_service.py @@ -17,6 +17,7 @@ from extraction.ports.chat_agent import IExtractionChatAgent from extraction.ports.ingestion_readiness import IIngestionReadinessReader from extraction.ports.runtime import IStickySessionRuntimeManager +from extraction.ports.sticky_session_bootstrap import IStickySessionBootstrapBuilder class ExtractionChatTurnService: @@ -30,16 +31,19 @@ def __init__( ingestion_readiness_reader: IIngestionReadinessReader, sticky_runtime_manager: IStickySessionRuntimeManager, chat_agent: IExtractionChatAgent, + bootstrap_builder: IStickySessionBootstrapBuilder, ) -> None: self._session_service = session_service self._skill_resolution_service = skill_resolution_service self._ingestion_readiness_reader = ingestion_readiness_reader self._sticky_runtime_manager = sticky_runtime_manager self._chat_agent = chat_agent + self._bootstrap_builder = bootstrap_builder async def stream_chat_turn( self, *, + tenant_id: str, user_id: str, knowledge_graph_id: str, mode: ExtractionSessionMode, @@ -77,26 +81,6 @@ async def stream_chat_turn( "graph_management_ui_mode": ui_mode.value, } - lease = self._sticky_runtime_manager.get_or_start_runtime( - session_id=session.id, - user_id=user_id, - knowledge_graph_id=knowledge_graph_id, - mode=mode.value, - ) - session.runtime_context["sticky_runtime"] = { - "container_id": lease.container_id, - "status": lease.status, - "expires_at": lease.expires_at.isoformat(), - } - - yield { - "type": "thinking", - "recent": [ - "Contacting Graph Management Assistant…", - f"Sticky container {lease.container_id[:8]} active", - ], - } - readiness = await self._ingestion_readiness_reader.read_for_knowledge_graph( knowledge_graph_id=knowledge_graph_id, ) @@ -133,6 +117,34 @@ async def stream_chat_turn( yield {"type": "done", "ok": True, "reply": assistant_reply, "wait": True} return + bootstrap = await self._bootstrap_builder.build( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + session_id=session.id, + include_job_packages=gate.phase != SessionJobPackagePhase.NOT_REQUIRED, + ) + lease = self._sticky_runtime_manager.get_or_start_runtime( + session_id=session.id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode.value, + bootstrap=bootstrap, + ) + session.runtime_context["sticky_runtime"] = { + "container_id": lease.container_id, + "status": lease.status, + "expires_at": lease.expires_at.isoformat(), + "runtime_base_url": lease.runtime_base_url, + } + + yield { + "type": "thinking", + "recent": [ + "Contacting Graph Management Assistant…", + f"Sticky container {lease.container_id[:8]} active", + ], + } + session.runtime_context["job_package"]["phase"] = SessionJobPackagePhase.READY.value thinking_lines: list[str] = [] assistant_reply: str | None = None diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index b2aaf1a09..f1125096f 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -1,6 +1,7 @@ """FastAPI dependencies for Extraction services.""" from functools import lru_cache +from pathlib import Path from typing import Annotated from fastapi import Depends @@ -11,17 +12,24 @@ ExtractionChatTurnService, ExtractionSkillResolutionService, ) -from extraction.infrastructure.deterministic_chat_agent import DeterministicExtractionChatAgent +from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader from extraction.infrastructure.ingestion_readiness_reader import SqlIngestionReadinessReader from extraction.infrastructure.repositories import ( ExtractionAgentSessionRepository, ExtractionSessionRunMetricsReader, ExtractionSkillOverrideRepository, ) +from extraction.infrastructure.sticky_session_bootstrap_builder import StickySessionBootstrapBuilder +from extraction.infrastructure.sticky_session_workdir_materializer import ( + StickySessionWorkdirMaterializer, +) from extraction.infrastructure.workload_runtime_factory import ( create_ephemeral_extraction_worker_launcher, + create_extraction_chat_agent, create_sticky_session_runtime_manager, + get_workload_credential_issuer, ) +from extraction.infrastructure.workload_runtime_settings import get_extraction_workload_runtime_settings from extraction.ports.runtime import ( IEphemeralExtractionWorkerLauncher, IStickySessionRuntimeManager, @@ -66,6 +74,7 @@ def get_extraction_chat_turn_service( ], ) -> ExtractionChatTurnService: """Get ExtractionChatTurnService instance.""" + runtime_settings = get_extraction_workload_runtime_settings() skill_resolution_service = ExtractionSkillResolutionService( override_repository=ExtractionSkillOverrideRepository() ) @@ -75,10 +84,19 @@ def get_extraction_chat_turn_service( run_metrics_reader=ExtractionSessionRunMetricsReader(session=session), sticky_runtime_manager=sticky_runtime_manager, ) + bootstrap_builder = StickySessionBootstrapBuilder( + credential_issuer=get_workload_credential_issuer(), + prepared_job_package_reader=SqlPreparedJobPackageReader(session=session), + workdir_materializer=StickySessionWorkdirMaterializer( + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ), + runtime_settings=runtime_settings, + ) return ExtractionChatTurnService( session_service=session_service, skill_resolution_service=skill_resolution_service, ingestion_readiness_reader=SqlIngestionReadinessReader(session=session), sticky_runtime_manager=sticky_runtime_manager, - chat_agent=DeterministicExtractionChatAgent(), + chat_agent=create_extraction_chat_agent(runtime_settings), + bootstrap_builder=bootstrap_builder, ) diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index 95ccc1f6a..53b638281 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -14,6 +14,7 @@ IEphemeralExtractionWorkerLauncher, IStickySessionRuntimeManager, ScopedWorkloadCredentials, + StickySessionRuntimeBootstrap, StickySessionRuntimeLease, ) from shared_kernel.container_runtime.ports import ContainerRunSpec, IContainerRuntime @@ -37,11 +38,19 @@ def __init__( sticky_image: str, sticky_command: tuple[str, ...], session_ttl: timedelta = timedelta(minutes=30), + container_network: str | None = None, + sticky_service_port: int = 8787, + container_skills_mount: str = "/app/skills", + container_work_mount: str = "/workspace", ) -> None: self._container_runtime = container_runtime self._sticky_image = sticky_image self._sticky_command = sticky_command self._session_ttl = session_ttl + self._container_network = container_network + self._sticky_service_port = sticky_service_port + self._container_skills_mount = container_skills_mount + self._container_work_mount = container_work_mount self._leases: dict[str, StickySessionRuntimeLease] = {} def get_or_start_runtime( @@ -51,6 +60,7 @@ def get_or_start_runtime( user_id: str, knowledge_graph_id: str, mode: str, + bootstrap: StickySessionRuntimeBootstrap | None = None, ) -> StickySessionRuntimeLease: now = datetime.now(UTC) existing = self._leases.get(session_id) @@ -77,6 +87,7 @@ def get_or_start_runtime( knowledge_graph_id=knowledge_graph_id, mode=mode, now=now, + bootstrap=bootstrap, ) self._leases[session_id] = lease return lease @@ -88,6 +99,7 @@ def reset_runtime( user_id: str, knowledge_graph_id: str, mode: str, + bootstrap: StickySessionRuntimeBootstrap | None = None, ) -> StickySessionRuntimeLease: existing = self._leases.pop(session_id, None) if existing is not None: @@ -97,6 +109,7 @@ def reset_runtime( user_id=user_id, knowledge_graph_id=knowledge_graph_id, mode=mode, + bootstrap=bootstrap, ) def cleanup_expired(self, *, now: datetime) -> list[str]: @@ -120,12 +133,49 @@ def _start_runtime( knowledge_graph_id: str, mode: str, now: datetime, + bootstrap: StickySessionRuntimeBootstrap | None, ) -> StickySessionRuntimeLease: container_name = _sanitize_container_name("kartograph-sticky-", session_id) + env: dict[str, str] = { + "KARTOGRAPH_SESSION_ID": session_id, + "KARTOGRAPH_KNOWLEDGE_GRAPH_ID": knowledge_graph_id, + "KARTOGRAPH_USER_ID": user_id, + "KARTOGRAPH_SESSION_MODE": mode, + "KARTOGRAPH_SKILLS_DIR": self._container_skills_mount, + "KARTOGRAPH_WORKSPACE_DIR": self._container_work_mount, + } + binds: list[str] = [] + if bootstrap is not None: + required_scopes = { + f"tenant:{bootstrap.tenant_id}", + f"knowledge_graph:{knowledge_graph_id}", + "workload:chat", + } + if not required_scopes.issubset(set(bootstrap.credentials.scopes)): + raise ValueError("sticky session credentials scope is invalid") + if bootstrap.credentials.expires_at <= datetime.now(UTC): + raise ValueError("sticky session credentials are expired") + env.update( + { + "KARTOGRAPH_WORKLOAD_TOKEN": bootstrap.credentials.token, + "KARTOGRAPH_TENANT_ID": bootstrap.tenant_id, + "KARTOGRAPH_API_BASE_URL": bootstrap.api_base_url, + } + ) + binds.extend( + [ + f"{bootstrap.host_skills_dir}:{self._container_skills_mount}:ro", + f"{bootstrap.host_session_work_dir}:{self._container_work_mount}:ro", + ] + ) + launched = self._container_runtime.run( ContainerRunSpec( image=self._sticky_image, name=container_name, + env=env, + binds=tuple(binds), + network=self._container_network, labels={ "kartograph.runtime.kind": "sticky", "kartograph.session_id": session_id, @@ -136,6 +186,7 @@ def _start_runtime( command=self._sticky_command, ) ) + runtime_base_url = f"http://{container_name}:{self._sticky_service_port}" return StickySessionRuntimeLease( session_id=session_id, container_id=launched.container_id, @@ -145,6 +196,7 @@ def _start_runtime( status="active", last_activity_at=now, expires_at=now + self._session_ttl, + runtime_base_url=runtime_base_url, ) def _terminate_container(self, container_id: str) -> None: diff --git a/src/api/extraction/infrastructure/prepared_job_package_reader.py b/src/api/extraction/infrastructure/prepared_job_package_reader.py new file mode 100644 index 000000000..b99684894 --- /dev/null +++ b/src/api/extraction/infrastructure/prepared_job_package_reader.py @@ -0,0 +1,37 @@ +"""SQL reader for latest prepared JobPackage identifiers without importing Management.""" + +from __future__ import annotations + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + + +class SqlPreparedJobPackageReader: + """Reads latest prepared JobPackage ids from outbox events for one knowledge graph.""" + + def __init__(self, *, session: AsyncSession) -> None: + self._session = session + + async def list_latest_for_knowledge_graph( + self, *, knowledge_graph_id: str + ) -> tuple[str, ...]: + result = await self._session.execute( + text( + """ + SELECT DISTINCT ON (payload->>'data_source_id') + payload->>'job_package_id' AS job_package_id + FROM outbox + WHERE event_type IN ('IngestionPrepared', 'JobPackageProduced') + AND payload->>'knowledge_graph_id' = :knowledge_graph_id + AND payload->>'job_package_id' IS NOT NULL + ORDER BY payload->>'data_source_id', occurred_at DESC + """ + ), + {"knowledge_graph_id": knowledge_graph_id}, + ) + package_ids = tuple( + str(row.job_package_id) + for row in result + if row.job_package_id is not None and str(row.job_package_id).strip() + ) + return package_ids diff --git a/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py new file mode 100644 index 000000000..f20b5079c --- /dev/null +++ b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py @@ -0,0 +1,78 @@ +"""HTTP client that streams chat turns from a sticky session agent runtime container.""" + +from __future__ import annotations + +import json +from collections.abc import AsyncIterator +from typing import Any + +import httpx + +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import GraphManagementUiMode + + +class RemoteStickyContainerChatAgent: + """Delegates conversational turns to the sticky session Claude agent runtime.""" + + def __init__(self, *, request_timeout_seconds: float = 120.0) -> None: + self._request_timeout_seconds = request_timeout_seconds + + async def stream_turn( + self, + *, + session: ExtractionAgentSession, + user_message: str, + ui_mode: GraphManagementUiMode, + ) -> AsyncIterator[dict[str, Any]]: + sticky_runtime = session.runtime_context.get("sticky_runtime", {}) + runtime_base_url = sticky_runtime.get("runtime_base_url") + if not isinstance(runtime_base_url, str) or not runtime_base_url.strip(): + yield { + "type": "done", + "ok": False, + "error": { + "code": "RUNTIME_UNAVAILABLE", + "message": "Sticky session runtime endpoint is unavailable.", + }, + } + return + + payload = { + "message": user_message, + "ui_mode": ui_mode.value, + "agent_configuration": session.runtime_context.get("agent_configuration", {}), + "message_history": session.message_history[-20:], + } + url = f"{runtime_base_url.rstrip('/')}/v1/turn" + + try: + async with httpx.AsyncClient(timeout=self._request_timeout_seconds) as client: + async with client.stream("POST", url, json=payload) as response: + if response.status_code >= 400: + body = await response.aread() + detail = body.decode("utf-8", errors="replace") + yield { + "type": "done", + "ok": False, + "error": { + "code": "RUNTIME_HTTP_ERROR", + "message": detail or f"Agent runtime returned {response.status_code}", + }, + } + return + + async for line in response.aiter_lines(): + trimmed = line.strip() + if not trimmed: + continue + yield json.loads(trimmed) + except httpx.HTTPError as exc: + yield { + "type": "done", + "ok": False, + "error": { + "code": "RUNTIME_TRANSPORT_ERROR", + "message": str(exc), + }, + } diff --git a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py new file mode 100644 index 000000000..1777db9e4 --- /dev/null +++ b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py @@ -0,0 +1,64 @@ +"""Build sticky session runtime bootstrap payloads for container launch.""" + +from __future__ import annotations + +from extraction.infrastructure.sticky_session_workdir_materializer import ( + StickySessionWorkdirMaterializer, +) +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer +from extraction.infrastructure.workload_runtime_settings import ( + ExtractionWorkloadRuntimeSettings, + get_extraction_workload_runtime_settings, +) +from extraction.ports.prepared_job_packages import IPreparedJobPackageReader +from extraction.ports.runtime import StickySessionRuntimeBootstrap + + +class StickySessionBootstrapBuilder: + """Prepare host workdirs and credentials for sticky session containers.""" + + def __init__( + self, + *, + credential_issuer: ScopedWorkloadCredentialIssuer, + prepared_job_package_reader: IPreparedJobPackageReader, + workdir_materializer: StickySessionWorkdirMaterializer, + runtime_settings: ExtractionWorkloadRuntimeSettings | None = None, + ) -> None: + self._credential_issuer = credential_issuer + self._prepared_job_package_reader = prepared_job_package_reader + self._workdir_materializer = workdir_materializer + self._runtime_settings = runtime_settings or get_extraction_workload_runtime_settings() + + async def build( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + session_id: str, + include_job_packages: bool, + ) -> StickySessionRuntimeBootstrap | None: + if self._runtime_settings.backend != "container": + return None + + package_ids: tuple[str, ...] = () + if include_job_packages: + package_ids = await self._prepared_job_package_reader.list_latest_for_knowledge_graph( + knowledge_graph_id=knowledge_graph_id, + ) + host_session_work_dir = self._workdir_materializer.prepare( + session_id=session_id, + knowledge_graph_id=knowledge_graph_id, + job_package_ids=package_ids, + ) + credentials = self._credential_issuer.issue_for_sticky_session( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + ) + return StickySessionRuntimeBootstrap( + tenant_id=tenant_id, + credentials=credentials, + host_session_work_dir=str(host_session_work_dir), + host_skills_dir=self._runtime_settings.skills_dir, + api_base_url=self._runtime_settings.api_base_url, + ) \ No newline at end of file diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py new file mode 100644 index 000000000..1fe5b5db3 --- /dev/null +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -0,0 +1,67 @@ +"""Prepare sticky session work directories with JobPackage materialization.""" + +from __future__ import annotations + +from pathlib import Path +import shutil +import zipfile + +from shared_kernel.job_package.path_safety import validate_zip_entry_name +from shared_kernel.job_package.reader import JobPackageReader +from shared_kernel.job_package.value_objects import JobPackageId + + +class StickySessionWorkdirMaterializer: + """Materialize JobPackage archives into a session-scoped work directory.""" + + def __init__(self, *, job_package_work_dir: Path) -> None: + self._job_package_work_dir = job_package_work_dir + + def prepare( + self, + *, + session_id: str, + knowledge_graph_id: str, + job_package_ids: tuple[str, ...] = (), + ) -> Path: + """Create or refresh the host work directory for one sticky session.""" + session_root = self._job_package_work_dir / "sticky-sessions" / session_id + if session_root.exists(): + shutil.rmtree(session_root) + ingestion_context_dir = session_root / "ingestion-context" + repository_files_dir = session_root / "repository-files" + ingestion_context_dir.mkdir(parents=True, exist_ok=True) + repository_files_dir.mkdir(parents=True, exist_ok=True) + + discovered = job_package_ids or self._discover_job_package_ids() + for package_id in discovered: + archive_path = self._job_package_work_dir / JobPackageId(value=package_id).archive_name() + if not archive_path.exists(): + continue + package_dir = ingestion_context_dir / package_id + package_dir.mkdir(parents=True, exist_ok=True) + with zipfile.ZipFile(archive_path) as archive: + for entry_name in archive.namelist(): + validate_zip_entry_name(entry_name) + archive.extract(entry_name, path=package_dir) + + reader = JobPackageReader(archive_path) + for change in reader.iter_changeset(): + if change.content_ref is None or not change.path: + continue + validate_zip_entry_name(change.path) + output_path = repository_files_dir / package_id / change.path + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(reader.read_content(change.content_ref)) + + marker = session_root / "knowledge-graph-id" + marker.write_text(knowledge_graph_id, encoding="utf-8") + return session_root + + def _discover_job_package_ids(self) -> tuple[str, ...]: + package_ids: list[str] = [] + for archive in sorted(self._job_package_work_dir.glob("job-package-*.zip")): + stem = archive.stem.removeprefix("job-package-") + if stem: + package_ids.append(stem) + return tuple(package_ids) diff --git a/src/api/extraction/infrastructure/workload_runtime.py b/src/api/extraction/infrastructure/workload_runtime.py index 4f50940be..6af2f5655 100644 --- a/src/api/extraction/infrastructure/workload_runtime.py +++ b/src/api/extraction/infrastructure/workload_runtime.py @@ -13,6 +13,7 @@ IEphemeralExtractionWorkerLauncher, IStickySessionRuntimeManager, ScopedWorkloadCredentials, + StickySessionRuntimeBootstrap, StickySessionRuntimeLease, ) @@ -31,6 +32,7 @@ def get_or_start_runtime( user_id: str, knowledge_graph_id: str, mode: str, + bootstrap: StickySessionRuntimeBootstrap | None = None, ) -> StickySessionRuntimeLease: now = datetime.now(UTC) existing = self._leases.get(session_id) @@ -53,6 +55,7 @@ def get_or_start_runtime( status="active", last_activity_at=now, expires_at=now + self._session_ttl, + runtime_base_url="memory://sticky-runtime", ) self._leases[session_id] = lease return lease @@ -64,6 +67,7 @@ def reset_runtime( user_id: str, knowledge_graph_id: str, mode: str, + bootstrap: StickySessionRuntimeBootstrap | None = None, ) -> StickySessionRuntimeLease: self._leases.pop(session_id, None) return self.get_or_start_runtime( @@ -87,22 +91,53 @@ def cleanup_expired(self, *, now: datetime) -> list[str]: class ScopedWorkloadCredentialIssuer: - """Issues short-lived tenant/KG scoped credentials for extraction workers.""" + """Issues short-lived tenant/KG scoped credentials for extraction workloads.""" def __init__(self, *, default_ttl: timedelta = timedelta(minutes=15)) -> None: self._default_ttl = default_ttl + self._issued: dict[str, ScopedWorkloadCredentials] = {} - def issue(self, *, tenant_id: str, knowledge_graph_id: str) -> ScopedWorkloadCredentials: + def issue( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + extra_scopes: tuple[str, ...] = (), + ) -> ScopedWorkloadCredentials: now = datetime.now(UTC) - return ScopedWorkloadCredentials( + scopes = ( + f"tenant:{tenant_id}", + f"knowledge_graph:{knowledge_graph_id}", + "workload:extraction", + *extra_scopes, + ) + credentials = ScopedWorkloadCredentials( token=str(ULID()), expires_at=now + self._default_ttl, - scopes=( - f"tenant:{tenant_id}", - f"knowledge_graph:{knowledge_graph_id}", - "workload:extraction", - ), + scopes=scopes, ) + self._issued[credentials.token] = credentials + return credentials + + def issue_for_sticky_session( + self, *, tenant_id: str, knowledge_graph_id: str + ) -> ScopedWorkloadCredentials: + """Issue chat-scoped credentials for sticky session agent containers.""" + return self.issue( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + extra_scopes=("workload:chat",), + ) + + def verify(self, token: str) -> ScopedWorkloadCredentials | None: + """Return credentials when token is known and not expired.""" + credentials = self._issued.get(token) + if credentials is None: + return None + if credentials.expires_at <= datetime.now(UTC): + self._issued.pop(token, None) + return None + return credentials class InMemoryEphemeralExtractionWorkerLauncher(IEphemeralExtractionWorkerLauncher): diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py index 176102028..1b62a3b76 100644 --- a/src/api/extraction/infrastructure/workload_runtime_factory.py +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -3,19 +3,26 @@ from __future__ import annotations from datetime import timedelta +from functools import lru_cache from extraction.infrastructure.container_workload_runtime import ( ContainerEphemeralExtractionWorkerLauncher, ContainerStickySessionRuntimeManager, ) +from extraction.infrastructure.deterministic_chat_agent import DeterministicExtractionChatAgent +from extraction.infrastructure.remote_sticky_container_chat_agent import ( + RemoteStickyContainerChatAgent, +) from extraction.infrastructure.workload_runtime import ( InMemoryEphemeralExtractionWorkerLauncher, InMemoryStickySessionRuntimeManager, + ScopedWorkloadCredentialIssuer, ) from extraction.infrastructure.workload_runtime_settings import ( ExtractionWorkloadRuntimeSettings, get_extraction_workload_runtime_settings, ) +from extraction.ports.chat_agent import IExtractionChatAgent from extraction.ports.runtime import ( IEphemeralExtractionWorkerLauncher, IStickySessionRuntimeManager, @@ -23,6 +30,25 @@ from shared_kernel.container_runtime.factory import create_container_runtime +@lru_cache +def get_workload_credential_issuer() -> ScopedWorkloadCredentialIssuer: + """Return shared workload credential issuer for runtime containers.""" + settings = get_extraction_workload_runtime_settings() + return ScopedWorkloadCredentialIssuer( + default_ttl=timedelta(minutes=settings.session_ttl_minutes) + ) + + +def create_extraction_chat_agent( + settings: ExtractionWorkloadRuntimeSettings | None = None, +) -> IExtractionChatAgent: + """Build chat agent implementation for configured runtime backend.""" + resolved = settings or get_extraction_workload_runtime_settings() + if resolved.backend == "container": + return RemoteStickyContainerChatAgent() + return DeterministicExtractionChatAgent() + + def create_sticky_session_runtime_manager( settings: ExtractionWorkloadRuntimeSettings | None = None, ) -> IStickySessionRuntimeManager: @@ -39,6 +65,10 @@ def create_sticky_session_runtime_manager( sticky_image=resolved.sticky_image, sticky_command=resolved.sticky_command, session_ttl=timedelta(minutes=resolved.session_ttl_minutes), + container_network=resolved.container_network, + sticky_service_port=resolved.sticky_service_port, + container_skills_mount=resolved.container_skills_mount, + container_work_mount=resolved.container_work_mount, ) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 8a99c07d7..d55b57ba6 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -21,11 +21,18 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): backend: Literal["memory", "container"] = Field(default="memory") container_engine: Literal["auto", "docker", "podman"] = Field(default="auto") - sticky_image: str = Field(default="docker.io/library/busybox:1.36") + container_network: str | None = Field(default=None) + sticky_image: str = Field(default="kartograph-agent-runtime:dev") worker_image: str = Field(default="docker.io/library/busybox:1.36") - sticky_command: tuple[str, ...] = Field(default=("sleep", "3600")) + sticky_command: tuple[str, ...] = Field(default=("python", "-m", "kartograph_agent_runtime")) worker_command: tuple[str, ...] = Field(default=("sleep", "3600")) + sticky_service_port: int = Field(default=8787, ge=1024, le=65535) + container_skills_mount: str = Field(default="/app/skills") + container_work_mount: str = Field(default="/workspace") session_ttl_minutes: int = Field(default=30, ge=1, le=24 * 60) + job_package_work_dir: str = Field(default="/tmp/kartograph/job_packages") + skills_dir: str = Field(default="/app/skills") + api_base_url: str = Field(default="http://api:8000") @field_validator("sticky_command", "worker_command", mode="before") @classmethod diff --git a/src/api/extraction/ports/prepared_job_packages.py b/src/api/extraction/ports/prepared_job_packages.py new file mode 100644 index 000000000..124b6768f --- /dev/null +++ b/src/api/extraction/ports/prepared_job_packages.py @@ -0,0 +1,15 @@ +"""Port for reading prepared JobPackage identifiers for sticky session materialization.""" + +from __future__ import annotations + +from typing import Protocol + + +class IPreparedJobPackageReader(Protocol): + """Read latest prepared JobPackage ids for one knowledge graph.""" + + async def list_latest_for_knowledge_graph( + self, *, knowledge_graph_id: str + ) -> tuple[str, ...]: + """Return latest JobPackage ids per data source for the knowledge graph.""" + ... diff --git a/src/api/extraction/ports/runtime.py b/src/api/extraction/ports/runtime.py index b446abeba..b24ccae88 100644 --- a/src/api/extraction/ports/runtime.py +++ b/src/api/extraction/ports/runtime.py @@ -19,6 +19,18 @@ class StickySessionRuntimeLease: status: str last_activity_at: datetime expires_at: datetime + runtime_base_url: str | None = None + + +@dataclass(frozen=True) +class StickySessionRuntimeBootstrap: + """Host paths and credentials used when starting a sticky session container.""" + + tenant_id: str + credentials: ScopedWorkloadCredentials + host_session_work_dir: str + host_skills_dir: str + api_base_url: str @dataclass(frozen=True) @@ -70,6 +82,7 @@ def get_or_start_runtime( user_id: str, knowledge_graph_id: str, mode: str, + bootstrap: StickySessionRuntimeBootstrap | None = None, ) -> StickySessionRuntimeLease: """Return current runtime lease or start a new sticky runtime.""" ... @@ -81,6 +94,7 @@ def reset_runtime( user_id: str, knowledge_graph_id: str, mode: str, + bootstrap: StickySessionRuntimeBootstrap | None = None, ) -> StickySessionRuntimeLease: """Terminate existing runtime for session and start a clean one.""" ... diff --git a/src/api/extraction/ports/sticky_session_bootstrap.py b/src/api/extraction/ports/sticky_session_bootstrap.py new file mode 100644 index 000000000..35fc4b0b6 --- /dev/null +++ b/src/api/extraction/ports/sticky_session_bootstrap.py @@ -0,0 +1,22 @@ +"""Port for building sticky session runtime bootstrap payloads.""" + +from __future__ import annotations + +from typing import Protocol + +from extraction.ports.runtime import StickySessionRuntimeBootstrap + + +class IStickySessionBootstrapBuilder(Protocol): + """Prepare host paths and credentials for sticky session containers.""" + + async def build( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + session_id: str, + include_job_packages: bool, + ) -> StickySessionRuntimeBootstrap | None: + """Return bootstrap payload when container runtime is enabled.""" + ... diff --git a/src/api/extraction/ports/workload_graph.py b/src/api/extraction/ports/workload_graph.py new file mode 100644 index 000000000..744a565e4 --- /dev/null +++ b/src/api/extraction/ports/workload_graph.py @@ -0,0 +1,31 @@ +"""Port for graph reads performed by extraction workload runtimes.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Protocol + + +@dataclass(frozen=True) +class WorkloadGraphNode: + """Graph node returned to sticky session agent tools.""" + + id: str + entity_type: str + slug: str | None + properties: dict + + +class IWorkloadGraphReader(Protocol): + """Read-only graph access scoped to a workload token context.""" + + async def search_by_slug( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + slug: str, + entity_type: str | None = None, + ) -> list[WorkloadGraphNode]: + """Search nodes by slug within one knowledge graph.""" + ... diff --git a/src/api/extraction/presentation/__init__.py b/src/api/extraction/presentation/__init__.py index 62603fc2b..aa7246a4e 100644 --- a/src/api/extraction/presentation/__init__.py +++ b/src/api/extraction/presentation/__init__.py @@ -6,10 +6,11 @@ from fastapi import APIRouter -from extraction.presentation import routes +from extraction.presentation import routes, workload_routes router = APIRouter(prefix="/extraction", tags=["extraction"]) router.include_router(routes.router) +router.include_router(workload_routes.router) __all__ = ["router"] diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index 7ccae2a17..815352479 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -182,6 +182,7 @@ async def stream_chat_turn( async def event_stream(): async for event in service.stream_chat_turn( user_id=current_user.user_id.value, + tenant_id=current_user.tenant_id.value, knowledge_graph_id=knowledge_graph_id, mode=mode, ui_mode=request.graph_management_ui_mode, diff --git a/src/api/extraction/presentation/workload_auth.py b/src/api/extraction/presentation/workload_auth.py new file mode 100644 index 000000000..c35a719b4 --- /dev/null +++ b/src/api/extraction/presentation/workload_auth.py @@ -0,0 +1,69 @@ +"""FastAPI dependency helpers for extraction workload token authentication.""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import UTC, datetime +from typing import Annotated + +from fastapi import Depends, Header, HTTPException, status + +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer +from extraction.ports.runtime import ScopedWorkloadCredentials +from infrastructure.extraction_workload.dependencies import ( + get_extraction_workload_credential_issuer, +) + + +@dataclass(frozen=True) +class WorkloadAuthContext: + """Authenticated workload context derived from a runtime token.""" + + credentials: ScopedWorkloadCredentials + tenant_id: str + knowledge_graph_id: str + + +def get_workload_auth_context( + workload_token: Annotated[str | None, Header(alias="X-Workload-Token")] = None, + issuer: Annotated[ + ScopedWorkloadCredentialIssuer, Depends(get_extraction_workload_credential_issuer) + ] = ..., +) -> WorkloadAuthContext: + """Validate a sticky-session or worker runtime token.""" + if not workload_token: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Missing X-Workload-Token header", + ) + + credentials = issuer.verify(workload_token) + if credentials is None or credentials.expires_at <= datetime.now(UTC): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or expired workload token", + ) + + tenant_scope = next( + (scope.removeprefix("tenant:") for scope in credentials.scopes if scope.startswith("tenant:")), + None, + ) + kg_scope = next( + ( + scope.removeprefix("knowledge_graph:") + for scope in credentials.scopes + if scope.startswith("knowledge_graph:") + ), + None, + ) + if not tenant_scope or not kg_scope: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Workload token is missing tenant or knowledge graph scope", + ) + + return WorkloadAuthContext( + credentials=credentials, + tenant_id=tenant_scope, + knowledge_graph_id=kg_scope, + ) diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py new file mode 100644 index 000000000..e95fd1f51 --- /dev/null +++ b/src/api/extraction/presentation/workload_routes.py @@ -0,0 +1,99 @@ +"""HTTP routes for extraction workload runtimes (graph read + mutation emitters).""" + +from __future__ import annotations + +from typing import Annotated + +from fastapi import APIRouter, Depends, HTTPException, Query, status +from pydantic import BaseModel, Field + +from extraction.ports.workload_graph import IWorkloadGraphReader +from extraction.presentation.workload_auth import ( + WorkloadAuthContext, + get_workload_auth_context, +) +from infrastructure.extraction_workload.dependencies import get_workload_graph_reader + +router = APIRouter(prefix="/workloads", tags=["extraction-workloads"]) + + +class WorkloadGraphSearchResponse(BaseModel): + """Graph read response for sticky session agent tools.""" + + nodes: list[dict] + count: int + + +class WorkloadMutationProposalRequest(BaseModel): + """Mutation emitter payload from sticky session agent tools.""" + + operation: str = Field(min_length=1) + summary: str = Field(min_length=1) + payload: dict = Field(default_factory=dict) + + +class WorkloadMutationProposalResponse(BaseModel): + """Acknowledgement for a proposed mutation (not yet applied).""" + + accepted: bool + proposal_id: str + message: str + + +@router.get( + "/graph/search-by-slug", + response_model=WorkloadGraphSearchResponse, +) +async def workload_search_graph_by_slug( + slug: Annotated[str, Query(min_length=1)], + entity_type: Annotated[str | None, Query()] = None, + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + reader: Annotated[IWorkloadGraphReader, Depends(get_workload_graph_reader)] = ..., +) -> WorkloadGraphSearchResponse: + if "workload:chat" not in auth.credentials.scopes: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Workload token is not authorized for chat graph reads", + ) + + nodes = await reader.search_by_slug( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + slug=slug, + entity_type=entity_type, + ) + serialized = [ + { + "id": node.id, + "entity_type": node.entity_type, + "slug": node.slug, + "properties": node.properties, + } + for node in nodes + ] + return WorkloadGraphSearchResponse(nodes=serialized, count=len(serialized)) + + +@router.post( + "/mutations/propose", + response_model=WorkloadMutationProposalResponse, +) +async def workload_propose_mutation( + request: WorkloadMutationProposalRequest, + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., +) -> WorkloadMutationProposalResponse: + if "workload:chat" not in auth.credentials.scopes: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Workload token is not authorized for chat mutation proposals", + ) + + proposal_id = f"proposal-{request.operation}-{auth.knowledge_graph_id}" + return WorkloadMutationProposalResponse( + accepted=True, + proposal_id=proposal_id, + message=( + "Mutation proposal recorded for audit. Apply via mutation log pipeline " + "in a follow-up change." + ), + ) diff --git a/src/api/infrastructure/extraction_workload/dependencies.py b/src/api/infrastructure/extraction_workload/dependencies.py new file mode 100644 index 000000000..a74594e5b --- /dev/null +++ b/src/api/infrastructure/extraction_workload/dependencies.py @@ -0,0 +1,31 @@ +"""Dependencies for extraction workload HTTP endpoints.""" + +from __future__ import annotations + +from functools import lru_cache +from typing import Annotated + +from fastapi import Depends + +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer +from extraction.infrastructure.workload_runtime_factory import get_workload_credential_issuer +from extraction.ports.workload_graph import IWorkloadGraphReader +from infrastructure.database.connection_pool import ConnectionPool +from infrastructure.dependencies import get_age_connection_pool +from infrastructure.extraction_workload.graph_reader import GraphWorkloadGraphReader +from infrastructure.settings import get_database_settings + + +@lru_cache +def _cached_workload_credential_issuer() -> ScopedWorkloadCredentialIssuer: + return get_workload_credential_issuer() + + +def get_extraction_workload_credential_issuer() -> ScopedWorkloadCredentialIssuer: + return _cached_workload_credential_issuer() + + +def get_workload_graph_reader( + pool: Annotated[ConnectionPool, Depends(get_age_connection_pool)], +) -> IWorkloadGraphReader: + return GraphWorkloadGraphReader(pool=pool, settings=get_database_settings()) diff --git a/src/api/infrastructure/extraction_workload/graph_reader.py b/src/api/infrastructure/extraction_workload/graph_reader.py new file mode 100644 index 000000000..6ff902aea --- /dev/null +++ b/src/api/infrastructure/extraction_workload/graph_reader.py @@ -0,0 +1,61 @@ +"""Graph-backed adapter for extraction workload graph reads.""" + +from __future__ import annotations + +from graph.application.observability import DefaultGraphServiceProbe +from graph.application.services import GraphQueryService +from graph.infrastructure.age_client import AgeGraphClient +from graph.infrastructure.graph_repository import GraphExtractionReadOnlyRepository +from infrastructure.database.connection import ConnectionFactory +from infrastructure.database.connection_pool import ConnectionPool +from infrastructure.settings import DatabaseSettings + +from extraction.ports.workload_graph import IWorkloadGraphReader, WorkloadGraphNode + + +class GraphWorkloadGraphReader(IWorkloadGraphReader): + """Uses Graph bounded context services via infrastructure composition root.""" + + def __init__( + self, + *, + pool: ConnectionPool, + settings: DatabaseSettings, + ) -> None: + self._pool = pool + self._settings = settings + + async def search_by_slug( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + slug: str, + entity_type: str | None = None, + ) -> list[WorkloadGraphNode]: + graph_name = f"tenant_{tenant_id}" + factory = ConnectionFactory(self._settings, pool=self._pool) + client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) + client.connect() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=knowledge_graph_id, + ) + service = GraphQueryService(repository=repository, probe=DefaultGraphServiceProbe()) + nodes = service.search_by_slug( + slug=slug, + node_type=entity_type, + knowledge_graph_id=knowledge_graph_id, + ) + return [ + WorkloadGraphNode( + id=node.id, + entity_type=node.label, + slug=node.properties.get("slug"), + properties=node.properties, + ) + for node in nodes + ] + finally: + client.disconnect() diff --git a/src/api/main.py b/src/api/main.py index da073481e..dc3d05eb1 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -340,9 +340,9 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: from extraction.infrastructure.runtime_context_builder import ( FilesystemExtractionRuntimeContextBuilder, ) - from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer from extraction.infrastructure.workload_runtime_factory import ( create_ephemeral_extraction_worker_launcher, + get_workload_credential_issuer, ) from management.domain.value_objects import KnowledgeGraphId from management.infrastructure.repositories.knowledge_graph_repository import ( @@ -360,9 +360,7 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: extraction_service=self._extraction_service, outbox=outbox, runtime_context_builder=runtime_context_builder, - credential_issuer=ScopedWorkloadCredentialIssuer( - default_ttl=timedelta(minutes=15) - ), + credential_issuer=get_workload_credential_issuer(), worker_launcher=create_ephemeral_extraction_worker_launcher(), ) diff --git a/src/api/shared_kernel/container_runtime/cli_runtime.py b/src/api/shared_kernel/container_runtime/cli_runtime.py index 7eba19956..29ee2e817 100644 --- a/src/api/shared_kernel/container_runtime/cli_runtime.py +++ b/src/api/shared_kernel/container_runtime/cli_runtime.py @@ -32,6 +32,10 @@ def run(self, spec: ContainerRunSpec) -> ContainerRunResult: command.extend(["--label", f"{key}={value}"]) for key, value in sorted(spec.env.items()): command.extend(["--env", f"{key}={value}"]) + for bind in spec.binds: + command.extend(["--volume", bind]) + if spec.network is not None: + command.extend(["--network", spec.network]) command.append(spec.image) if spec.command: command.extend(spec.command) diff --git a/src/api/shared_kernel/container_runtime/ports.py b/src/api/shared_kernel/container_runtime/ports.py index 1870e8923..3824eb4bb 100644 --- a/src/api/shared_kernel/container_runtime/ports.py +++ b/src/api/shared_kernel/container_runtime/ports.py @@ -19,6 +19,8 @@ class ContainerRunSpec: env: dict[str, str] = field(default_factory=dict) labels: dict[str, str] = field(default_factory=dict) command: tuple[str, ...] | None = None + binds: tuple[str, ...] = field(default_factory=tuple) + network: str | None = None detach: bool = True remove_on_exit: bool = False diff --git a/src/api/tests/unit/extraction/application/test_chat_turn_service.py b/src/api/tests/unit/extraction/application/test_chat_turn_service.py index a807b62a2..7c6a01748 100644 --- a/src/api/tests/unit/extraction/application/test_chat_turn_service.py +++ b/src/api/tests/unit/extraction/application/test_chat_turn_service.py @@ -79,6 +79,11 @@ async def resolve_for_graph_management_turn(self, **kwargs): )() +class _StaticBootstrapBuilder: + async def build(self, **kwargs): + return None + + @pytest.mark.asyncio async def test_stream_chat_turn_persists_assistant_reply() -> None: repo = _InMemoryAgentSessionRepository() @@ -92,11 +97,13 @@ async def test_stream_chat_turn_persists_assistant_reply() -> None: ), sticky_runtime_manager=sticky, chat_agent=DeterministicExtractionChatAgent(), + bootstrap_builder=_StaticBootstrapBuilder(), ) events = [ event async for event in service.stream_chat_turn( + tenant_id="tenant-1", user_id="user-1", knowledge_graph_id="kg-1", mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, @@ -127,11 +134,13 @@ async def test_stream_chat_turn_wait_when_job_package_unprepared() -> None: ), sticky_runtime_manager=sticky, chat_agent=DeterministicExtractionChatAgent(), + bootstrap_builder=_StaticBootstrapBuilder(), ) events = [ event async for event in service.stream_chat_turn( + tenant_id="tenant-1", user_id="user-1", knowledge_graph_id="kg-1", mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py new file mode 100644 index 000000000..8a297b999 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py @@ -0,0 +1,49 @@ +"""Unit tests for container sticky runtime bootstrap wiring.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from unittest.mock import MagicMock + +from extraction.infrastructure.container_workload_runtime import ( + ContainerStickySessionRuntimeManager, +) +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer +from extraction.ports.runtime import StickySessionRuntimeBootstrap +from shared_kernel.container_runtime.ports import ContainerRunResult, ContainerRunSpec + + +def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: + runtime = MagicMock() + runtime.run.return_value = ContainerRunResult(container_id="container-1", name="name-1") + manager = ContainerStickySessionRuntimeManager( + container_runtime=runtime, + sticky_image="kartograph-agent-runtime:dev", + sticky_command=("python", "-m", "kartograph_agent_runtime"), + session_ttl=timedelta(minutes=30), + container_network="kartograph_kartograph", + ) + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) + credentials = issuer.issue_for_sticky_session(tenant_id="tenant-1", knowledge_graph_id="kg-1") + bootstrap = StickySessionRuntimeBootstrap( + tenant_id="tenant-1", + credentials=credentials, + host_session_work_dir="/tmp/session-work", + host_skills_dir="/tmp/skills", + api_base_url="http://api:8000", + ) + + lease = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + bootstrap=bootstrap, + ) + + spec: ContainerRunSpec = runtime.run.call_args.args[0] + assert spec.network == "kartograph_kartograph" + assert spec.env["KARTOGRAPH_WORKLOAD_TOKEN"] == credentials.token + assert "/tmp/skills:/app/skills:ro" in spec.binds + assert "/tmp/session-work:/workspace:ro" in spec.binds + assert lease.runtime_base_url.startswith("http://kartograph-sticky-") diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py new file mode 100644 index 000000000..e6373bb60 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -0,0 +1,59 @@ +"""Unit tests for sticky session workdir materialization.""" + +from __future__ import annotations + +from pathlib import Path + +from shared_kernel.job_package.builder import JobPackageBuilder +from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, + ChangeOperation, + ChangesetEntry, + ContentRef, + JobPackageId, + SyncMode, +) + +from extraction.infrastructure.sticky_session_workdir_materializer import ( + StickySessionWorkdirMaterializer, +) + + +def _build_package(work_dir: Path, package_id: str) -> None: + content_bytes = b"# hello\n" + content_ref = ContentRef.from_bytes(content_bytes) + builder = JobPackageBuilder( + data_source_id="ds-1", + knowledge_graph_id="kg-1", + sync_mode=SyncMode.FULL_REFRESH, + package_id=JobPackageId(value=package_id), + ) + ref = builder.add_content(content_bytes) + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id="file-1", + type="io.kartograph.change.file", + path="README.md", + content_ref=ref, + content_type="text/markdown", + metadata={}, + ) + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={})) + builder.build(work_dir) + + +def test_materializer_extracts_job_package_into_session_workspace(tmp_path: Path) -> None: + package_id = "01JTESTPACK0000000000000000" + _build_package(tmp_path, package_id) + materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) + + session_root = materializer.prepare( + session_id="session-1", + knowledge_graph_id="kg-1", + job_package_ids=(package_id,), + ) + + repo_file = session_root / "repository-files" / package_id / "README.md" + assert repo_file.read_text(encoding="utf-8") == "# hello\n" diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py b/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py new file mode 100644 index 000000000..4a72d633f --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py @@ -0,0 +1,23 @@ +"""Unit tests for scoped workload credential issuer.""" + +from __future__ import annotations + +from datetime import timedelta + +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer + + +def test_issue_for_sticky_session_includes_chat_scope() -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=5)) + credentials = issuer.issue_for_sticky_session( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + ) + + assert "workload:chat" in credentials.scopes + assert issuer.verify(credentials.token) == credentials + + +def test_verify_rejects_unknown_token() -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=5)) + assert issuer.verify("missing-token") is None diff --git a/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py b/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py index 4e6d4c199..c161ad7d1 100644 --- a/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py +++ b/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py @@ -11,6 +11,34 @@ class TestCliContainerRuntime: + def test_run_launches_detached_container_with_labels_env_and_binds(self) -> None: + runtime = CliContainerRuntime(binary="docker") + + with patch("shared_kernel.container_runtime.cli_runtime.subprocess.run") as run: + run.return_value = MagicMock(returncode=0, stdout="abc123\n", stderr="") + + result = runtime.run( + ContainerRunSpec( + image="busybox:1.36", + name="kartograph-sticky-session-1", + env={"KARTOGRAPH_WORKLOAD_TOKEN": "secret"}, + labels={ + "kartograph.runtime.kind": "sticky", + "kartograph.session_id": "session-1", + }, + binds=("/host/skills:/app/skills:ro",), + network="kartograph_kartograph", + command=("sleep", "3600"), + ) + ) + + assert result.container_id == "abc123" + command = run.call_args.args[0] + assert "--volume" in command + assert "/host/skills:/app/skills:ro" in command + assert "--network" in command + assert "kartograph_kartograph" in command + def test_run_launches_detached_container_with_labels_and_env(self) -> None: runtime = CliContainerRuntime(binary="docker") From e7479668f3beda7a5a33dda35ef8be861f46f75f Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 29 May 2026 01:47:05 -0400 Subject: [PATCH 064/153] feat(extraction): use Vertex AI and proactive runtime warmup Align sticky Claude Agent SDK containers with k-extract Vertex auth and warm the graph-management assistant on UI entry with streamed readiness progress. Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 5 + .../kartograph_agent_runtime/executor.py | 30 +- .../kartograph_agent_runtime/settings.py | 12 + .../kartograph_agent_runtime/vertex.py | 33 +++ src/agent-runtime/tests/test_executor.py | 7 +- .../application/chat_turn_service.py | 124 ++++----- .../sticky_session_runtime_service.py | 259 ++++++++++++++++++ src/api/extraction/dependencies.py | 15 +- .../container_workload_runtime.py | 20 ++ .../infrastructure/sticky_runtime_health.py | 47 ++++ .../infrastructure/vertex_runtime_env.py | 38 +++ .../workload_runtime_factory.py | 4 + .../workload_runtime_settings.py | 33 ++- .../extraction/ports/sticky_runtime_health.py | 19 ++ .../ports/sticky_session_runtime.py | 36 +++ src/api/extraction/presentation/models.py | 6 + src/api/extraction/presentation/routes.py | 31 +++ .../application/test_chat_turn_service.py | 71 +++-- .../infrastructure/test_vertex_runtime_env.py | 42 +++ .../pages/knowledge-graphs/[kgId]/manage.vue | 97 ++++++- .../app/tests/kg-extraction-chat.test.ts | 48 +++- .../knowledge-graph-manage-workspace.test.ts | 2 + src/dev-ui/app/utils/kgExtractionChat.ts | 90 ++++-- 23 files changed, 928 insertions(+), 141 deletions(-) create mode 100644 src/agent-runtime/kartograph_agent_runtime/vertex.py create mode 100644 src/api/extraction/application/sticky_session_runtime_service.py create mode 100644 src/api/extraction/infrastructure/sticky_runtime_health.py create mode 100644 src/api/extraction/infrastructure/vertex_runtime_env.py create mode 100644 src/api/extraction/ports/sticky_runtime_health.py create mode 100644 src/api/extraction/ports/sticky_session_runtime.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_vertex_runtime_env.py diff --git a/compose.dev.yaml b/compose.dev.yaml index 51fb575c0..480388cdc 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -19,6 +19,11 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_API_BASE_URL: http://api:8000 KARTOGRAPH_EXTRACTION_RUNTIME_JOB_PACKAGE_WORK_DIR: /tmp/kartograph/job_packages KARTOGRAPH_EXTRACTION_RUNTIME_SKILLS_DIR: /app/skills + # Vertex AI for Claude Agent SDK (mirror k-extract; set in host .env or shell) + CLAUDE_CODE_USE_VERTEX: ${CLAUDE_CODE_USE_VERTEX:-} + ANTHROPIC_VERTEX_PROJECT_ID: ${ANTHROPIC_VERTEX_PROJECT_ID:-} + CLOUD_ML_REGION: ${CLOUD_ML_REGION:-us-east5} + KARTOGRAPH_GCLOUD_CONFIG_MOUNT: ${KARTOGRAPH_GCLOUD_CONFIG_MOUNT:-} volumes: # Mount the entire app directory (minus venv) for hot-reload - ./src/api:/app:z diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 935cf44e7..e55b1d58b 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -8,6 +8,7 @@ from kartograph_agent_runtime.settings import AgentRuntimeSettings from kartograph_agent_runtime.tools import RuntimeTooling +from kartograph_agent_runtime.vertex import build_claude_agent_env def _build_system_prompt(agent_configuration: dict[str, Any]) -> str: @@ -20,6 +21,16 @@ def _build_system_prompt(agent_configuration: dict[str, Any]) -> str: return "\n\n".join(sections) or "You are the Graph Management Assistant." +def _apply_model_env(settings: AgentRuntimeSettings) -> str: + for key, value in build_claude_agent_env(settings).items(): + os.environ[key] = value + if settings.vertex_enabled(): + return "Vertex AI" + if settings.anthropic_api_key.strip(): + return "Anthropic API" + return "unconfigured" + + async def stream_turn_events( *, settings: AgentRuntimeSettings, @@ -28,22 +39,25 @@ async def stream_turn_events( agent_configuration: dict[str, Any], message_history: list[dict[str, Any]], ) -> AsyncIterator[dict[str, Any]]: + auth_mode = _apply_model_env(settings) yield { "type": "thinking", "recent": [ "Starting Claude Agent SDK runtime…", + f"Model backend: {auth_mode}", f"Applying {ui_mode} skill overlay", f"Workspace mounted at {settings.workspace_dir}", ], } - if settings.anthropic_api_key: + if settings.model_configured(): async for event in _stream_with_claude_sdk( settings=settings, message=message, ui_mode=ui_mode, agent_configuration=agent_configuration, message_history=message_history, + auth_mode=auth_mode, ): yield event return @@ -54,8 +68,9 @@ async def stream_turn_events( f"**Graph Management Assistant ({ui_mode})**\n\n" f"I received your message with skills: {skill_keys}.\n\n" f"> {message.strip()}\n\n" - "Claude Agent SDK is configured for this container. Set `ANTHROPIC_API_KEY` " - "to enable live model execution. Graph and mutation tools are wired via " + "Configure Vertex AI (`CLAUDE_CODE_USE_VERTEX=1`, `ANTHROPIC_VERTEX_PROJECT_ID`, " + "`CLOUD_ML_REGION`) or `ANTHROPIC_API_KEY` to enable live model execution. " + "Graph and mutation tools are wired via " f"`{settings.api_base_url}` using the injected workload token." ) if message.lower().startswith("search graph:"): @@ -75,10 +90,10 @@ async def _stream_with_claude_sdk( ui_mode: str, agent_configuration: dict[str, Any], message_history: list[dict[str, Any]], + auth_mode: str, ) -> AsyncIterator[dict[str, Any]]: from claude_agent_sdk import ClaudeAgentOptions, query - os.environ.setdefault("ANTHROPIC_API_KEY", settings.anthropic_api_key) system_prompt = _build_system_prompt(agent_configuration) history_lines = [ f"{entry.get('role', 'unknown')}: {entry.get('content', '')}" @@ -92,14 +107,17 @@ async def _stream_with_claude_sdk( yield { "type": "thinking", "recent": [ - "Claude Agent SDK query started…", + f"Claude Agent SDK query started ({auth_mode})…", f"Mode overlay: {ui_mode}", "Tools: graph read enclave, mutation emitter", ], } chunks: list[str] = [] - options = ClaudeAgentOptions(system_prompt=system_prompt) + options = ClaudeAgentOptions( + system_prompt=system_prompt, + env=build_claude_agent_env(settings), + ) async for sdk_message in query(prompt=prompt, options=options): text = getattr(sdk_message, "result", None) or getattr(sdk_message, "content", None) if isinstance(text, str) and text.strip(): diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py index f37c53ee8..724e190f9 100644 --- a/src/agent-runtime/kartograph_agent_runtime/settings.py +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -5,6 +5,8 @@ from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict +from kartograph_agent_runtime.vertex import vertex_enabled_from_env + class AgentRuntimeSettings(BaseSettings): """Runtime configuration for sticky session agent containers.""" @@ -21,3 +23,13 @@ class AgentRuntimeSettings(BaseSettings): skills_dir: str = Field(default="/app/skills", alias="KARTOGRAPH_SKILLS_DIR") workspace_dir: str = Field(default="/workspace", alias="KARTOGRAPH_WORKSPACE_DIR") anthropic_api_key: str = Field(default="", alias="ANTHROPIC_API_KEY") + vertex_project_id: str = Field(default="", alias="ANTHROPIC_VERTEX_PROJECT_ID") + vertex_region: str = Field(default="us-east5", alias="CLOUD_ML_REGION") + + def vertex_enabled(self) -> bool: + return vertex_enabled_from_env() + + def model_configured(self) -> bool: + if self.vertex_enabled(): + return bool(self.vertex_project_id.strip()) + return bool(self.anthropic_api_key.strip()) diff --git a/src/agent-runtime/kartograph_agent_runtime/vertex.py b/src/agent-runtime/kartograph_agent_runtime/vertex.py new file mode 100644 index 000000000..dd88824c4 --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/vertex.py @@ -0,0 +1,33 @@ +"""Vertex AI helpers for Claude Agent SDK in sticky session containers.""" + +from __future__ import annotations + +import os + + +def is_truthy_env(value: str | None) -> bool: + if not value: + return False + normalized = value.strip().lower() + return normalized in {"1", "true", "yes", "on"} + + +def vertex_enabled_from_env() -> bool: + return is_truthy_env(os.getenv("CLAUDE_CODE_USE_VERTEX")) + + +def build_claude_agent_env(settings) -> dict[str, str]: + """Build Claude Agent SDK env for Vertex or direct Anthropic API.""" + env: dict[str, str] = {} + if settings.vertex_enabled(): + env["CLAUDE_CODE_USE_VERTEX"] = "1" + if settings.vertex_project_id.strip(): + env["ANTHROPIC_VERTEX_PROJECT_ID"] = settings.vertex_project_id.strip() + region = settings.vertex_region.strip() + if region: + env["CLOUD_ML_REGION"] = region + env["VERTEXAI_LOCATION"] = region + return env + if settings.anthropic_api_key.strip(): + env["ANTHROPIC_API_KEY"] = settings.anthropic_api_key.strip() + return env diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index d754d2599..0aa472fd0 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -9,7 +9,12 @@ @pytest.mark.asyncio -async def test_stream_turn_events_without_api_key_returns_done_reply() -> None: +async def test_stream_turn_events_without_api_key_returns_done_reply( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("CLAUDE_CODE_USE_VERTEX", raising=False) + monkeypatch.delenv("ANTHROPIC_VERTEX_PROJECT_ID", raising=False) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) settings = AgentRuntimeSettings( KARTOGRAPH_WORKLOAD_TOKEN="token", KARTOGRAPH_API_BASE_URL="http://api:8000", diff --git a/src/api/extraction/application/chat_turn_service.py b/src/api/extraction/application/chat_turn_service.py index 7df0d0aaa..812771730 100644 --- a/src/api/extraction/application/chat_turn_service.py +++ b/src/api/extraction/application/chat_turn_service.py @@ -7,17 +7,13 @@ from typing import Any from extraction.application.agent_session_service import ExtractionAgentSessionService -from extraction.application.job_package_gate import resolve_job_package_gate -from extraction.application.skill_resolution_service import ExtractionSkillResolutionService +from extraction.ports.sticky_session_runtime import IStickySessionRuntimeService from extraction.domain.value_objects import ( ExtractionSessionMode, GraphManagementUiMode, SessionJobPackagePhase, ) from extraction.ports.chat_agent import IExtractionChatAgent -from extraction.ports.ingestion_readiness import IIngestionReadinessReader -from extraction.ports.runtime import IStickySessionRuntimeManager -from extraction.ports.sticky_session_bootstrap import IStickySessionBootstrapBuilder class ExtractionChatTurnService: @@ -27,18 +23,30 @@ def __init__( self, *, session_service: ExtractionAgentSessionService, - skill_resolution_service: ExtractionSkillResolutionService, - ingestion_readiness_reader: IIngestionReadinessReader, - sticky_runtime_manager: IStickySessionRuntimeManager, + runtime_service: IStickySessionRuntimeService, chat_agent: IExtractionChatAgent, - bootstrap_builder: IStickySessionBootstrapBuilder, ) -> None: self._session_service = session_service - self._skill_resolution_service = skill_resolution_service - self._ingestion_readiness_reader = ingestion_readiness_reader - self._sticky_runtime_manager = sticky_runtime_manager + self._runtime_service = runtime_service self._chat_agent = chat_agent - self._bootstrap_builder = bootstrap_builder + + async def stream_runtime_warmup( + self, + *, + tenant_id: str, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, + ) -> AsyncIterator[dict[str, Any]]: + async for event in self._runtime_service.stream_runtime_warmup( + tenant_id=tenant_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ui_mode=ui_mode, + ): + yield event async def stream_chat_turn( self, @@ -68,44 +76,24 @@ async def stream_chat_turn( mode=mode, ) - resolved_skills = await self._skill_resolution_service.resolve_for_graph_management_turn( + async for event in self._runtime_service.ensure_runtime_for_chat( + tenant_id=tenant_id, + user_id=user_id, knowledge_graph_id=knowledge_graph_id, mode=mode, ui_mode=ui_mode, - ) - session.runtime_context["agent_configuration"] = { - "system_prompt": resolved_skills.system_prompt, - "prompt_hierarchy": list(resolved_skills.prompt_hierarchy), - "guardrails": list(resolved_skills.guardrails), - "skills": dict(resolved_skills.skills), - "graph_management_ui_mode": ui_mode.value, - } - - readiness = await self._ingestion_readiness_reader.read_for_knowledge_graph( - knowledge_graph_id=knowledge_graph_id, - ) - gate = resolve_job_package_gate(ui_mode=ui_mode, readiness=readiness) - session.runtime_context["job_package"] = { - "phase": gate.phase.value, - "data_source_count": readiness.data_source_count, - "prepared_source_count": readiness.prepared_source_count, - } - - session.message_history.append({"role": "user", "content": trimmed}) - session.updated_at = datetime.now(UTC) + session=session, + ): + yield event - if gate.phase == SessionJobPackagePhase.AWAITING_PREPARE: - wait_message = gate.wait_message or "Waiting for JobPackage ingestion context." - session.runtime_context["activity_lines"] = [wait_message] - yield { - "type": "wait", - "phase": gate.phase.value, - "message": wait_message, - } - yield { - "type": "thinking", - "recent": ["Waiting for JobPackage ingestion context…", wait_message], - } + job_package_phase = session.runtime_context.get("job_package", {}).get("phase") + if job_package_phase == SessionJobPackagePhase.AWAITING_PREPARE.value: + wait_message = ( + session.runtime_context.get("activity_lines", ["Waiting for JobPackage ingestion context."])[0] + if session.runtime_context.get("activity_lines") + else "Waiting for JobPackage ingestion context." + ) + session.message_history.append({"role": "user", "content": trimmed}) assistant_reply = ( f"**Waiting for ingestion context**\n\n{wait_message}\n\n" "I'll respond with full repository-aware guidance once JobPackage " @@ -117,36 +105,29 @@ async def stream_chat_turn( yield {"type": "done", "ok": True, "reply": assistant_reply, "wait": True} return - bootstrap = await self._bootstrap_builder.build( - tenant_id=tenant_id, - knowledge_graph_id=knowledge_graph_id, - session_id=session.id, - include_job_packages=gate.phase != SessionJobPackagePhase.NOT_REQUIRED, - ) - lease = self._sticky_runtime_manager.get_or_start_runtime( - session_id=session.id, - user_id=user_id, - knowledge_graph_id=knowledge_graph_id, - mode=mode.value, - bootstrap=bootstrap, - ) - session.runtime_context["sticky_runtime"] = { - "container_id": lease.container_id, - "status": lease.status, - "expires_at": lease.expires_at.isoformat(), - "runtime_base_url": lease.runtime_base_url, - } + sticky = session.runtime_context.get("sticky_runtime", {}) + if sticky.get("phase") != "ready": + yield { + "type": "done", + "ok": False, + "error": { + "code": "RUNTIME_NOT_READY", + "message": "Graph Management Assistant runtime is not ready yet.", + }, + } + return yield { "type": "thinking", "recent": [ "Contacting Graph Management Assistant…", - f"Sticky container {lease.container_id[:8]} active", + f"Sticky container {str(sticky.get('container_id', ''))[:8]} active", ], } - session.runtime_context["job_package"]["phase"] = SessionJobPackagePhase.READY.value - thinking_lines: list[str] = [] + session.message_history.append({"role": "user", "content": trimmed}) + session.updated_at = datetime.now(UTC) + assistant_reply: str | None = None async for event in self._chat_agent.stream_turn( session=session, @@ -156,8 +137,9 @@ async def stream_chat_turn( if event.get("type") == "thinking": recent = event.get("recent") if isinstance(recent, list): - thinking_lines = [str(line) for line in recent if str(line).strip()] - session.runtime_context["activity_lines"] = thinking_lines + session.runtime_context["activity_lines"] = [ + str(line) for line in recent if str(line).strip() + ] if event.get("type") == "done": if event.get("ok") is True and event.get("reply"): assistant_reply = str(event["reply"]) diff --git a/src/api/extraction/application/sticky_session_runtime_service.py b/src/api/extraction/application/sticky_session_runtime_service.py new file mode 100644 index 000000000..e76604f43 --- /dev/null +++ b/src/api/extraction/application/sticky_session_runtime_service.py @@ -0,0 +1,259 @@ +"""Prepare sticky session containers before graph-management chat turns.""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from datetime import UTC, datetime +from typing import Any + +from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.application.job_package_gate import resolve_job_package_gate +from extraction.application.skill_resolution_service import ExtractionSkillResolutionService +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import ( + ExtractionSessionMode, + GraphManagementUiMode, + SessionJobPackagePhase, +) +from extraction.ports.ingestion_readiness import IIngestionReadinessReader +from extraction.ports.runtime import IStickySessionRuntimeManager, StickySessionRuntimeLease +from extraction.ports.sticky_runtime_health import IStickyRuntimeHealthChecker +from extraction.ports.sticky_session_bootstrap import IStickySessionBootstrapBuilder + + +class StickySessionRuntimeService: + """Starts sticky containers and streams transparent readiness progress.""" + + def __init__( + self, + *, + session_service: ExtractionAgentSessionService, + skill_resolution_service: ExtractionSkillResolutionService, + ingestion_readiness_reader: IIngestionReadinessReader, + sticky_runtime_manager: IStickySessionRuntimeManager, + bootstrap_builder: IStickySessionBootstrapBuilder, + health_checker: IStickyRuntimeHealthChecker, + runtime_backend: str, + sticky_health_timeout_seconds: float, + ) -> None: + self._session_service = session_service + self._skill_resolution_service = skill_resolution_service + self._ingestion_readiness_reader = ingestion_readiness_reader + self._sticky_runtime_manager = sticky_runtime_manager + self._bootstrap_builder = bootstrap_builder + self._health_checker = health_checker + self._runtime_backend = runtime_backend + self._sticky_health_timeout_seconds = sticky_health_timeout_seconds + + async def stream_runtime_warmup( + self, + *, + tenant_id: str, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, + ) -> AsyncIterator[dict[str, Any]]: + session = await self._session_service.get_or_create_active_session( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + async for event in self._stream_prepare_runtime( + tenant_id=tenant_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ui_mode=ui_mode, + session=session, + persist_session=True, + emit_terminal=True, + ): + yield event + + async def ensure_runtime_for_chat( + self, + *, + tenant_id: str, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, + session: ExtractionAgentSession, + ) -> AsyncIterator[dict[str, Any]]: + sticky = session.runtime_context.get("sticky_runtime", {}) + if ( + isinstance(sticky.get("runtime_base_url"), str) + and sticky.get("phase") == "ready" + and sticky.get("container_id") + ): + return + async for event in self._stream_prepare_runtime( + tenant_id=tenant_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ui_mode=ui_mode, + session=session, + persist_session=True, + emit_terminal=False, + ): + yield event + + async def _stream_prepare_runtime( + self, + *, + tenant_id: str, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, + session: ExtractionAgentSession, + persist_session: bool, + emit_terminal: bool, + ) -> AsyncIterator[dict[str, Any]]: + yield { + "type": "thinking", + "recent": ["Preparing Graph Management Assistant runtime…"], + } + + resolved_skills = await self._skill_resolution_service.resolve_for_graph_management_turn( + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ui_mode=ui_mode, + ) + session.runtime_context["agent_configuration"] = { + "system_prompt": resolved_skills.system_prompt, + "prompt_hierarchy": list(resolved_skills.prompt_hierarchy), + "guardrails": list(resolved_skills.guardrails), + "skills": dict(resolved_skills.skills), + "graph_management_ui_mode": ui_mode.value, + } + + readiness = await self._ingestion_readiness_reader.read_for_knowledge_graph( + knowledge_graph_id=knowledge_graph_id, + ) + gate = resolve_job_package_gate(ui_mode=ui_mode, readiness=readiness) + session.runtime_context["job_package"] = { + "phase": gate.phase.value, + "data_source_count": readiness.data_source_count, + "prepared_source_count": readiness.prepared_source_count, + } + + if gate.phase == SessionJobPackagePhase.AWAITING_PREPARE: + wait_message = gate.wait_message or "Waiting for JobPackage ingestion context." + session.runtime_context["activity_lines"] = [wait_message] + session.runtime_context["sticky_runtime"] = { + "phase": "awaiting_job_package", + "status": "waiting", + } + if persist_session: + await self._session_service.save_session(session) + yield {"type": "wait", "phase": gate.phase.value, "message": wait_message} + yield { + "type": "thinking", + "recent": ["Waiting for JobPackage ingestion context…", wait_message], + } + if emit_terminal: + yield { + "type": "done", + "ok": True, + "ready": False, + "wait": True, + "message": wait_message, + } + return + + if self._runtime_backend != "container": + lease = self._sticky_runtime_manager.get_or_start_runtime( + session_id=session.id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode.value, + bootstrap=None, + ) + session.runtime_context["sticky_runtime"] = self._lease_context(lease, phase="ready") + if persist_session: + await self._session_service.save_session(session) + yield { + "type": "thinking", + "recent": ["In-memory assistant runtime ready"], + } + yield {"type": "ready", "runtime_base_url": lease.runtime_base_url} + yield {"type": "done", "ok": True, "ready": True} + return + + yield { + "type": "thinking", + "recent": [ + "Preparing Graph Management Assistant runtime…", + "Materializing workspace and skills for sticky container", + ], + } + bootstrap = await self._bootstrap_builder.build( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + session_id=session.id, + include_job_packages=gate.phase != SessionJobPackagePhase.NOT_REQUIRED, + ) + yield { + "type": "thinking", + "recent": [ + "Materializing workspace and skills for sticky container", + "Starting isolated Claude Agent SDK container", + ], + } + lease = self._sticky_runtime_manager.get_or_start_runtime( + session_id=session.id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode.value, + bootstrap=bootstrap, + ) + session.runtime_context["sticky_runtime"] = self._lease_context(lease, phase="starting") + yield { + "type": "thinking", + "recent": [ + "Starting isolated Claude Agent SDK container", + f"Container {lease.container_id[:8]} launched", + ], + } + + runtime_base_url = lease.runtime_base_url or "" + try: + async for line in self._health_checker.wait_until_healthy( + runtime_base_url=runtime_base_url, + timeout_seconds=self._sticky_health_timeout_seconds, + ): + yield {"type": "thinking", "recent": [line]} + except TimeoutError as exc: + session.runtime_context["sticky_runtime"]["phase"] = "unhealthy" + session.runtime_context["sticky_runtime"]["status"] = "unhealthy" + if persist_session: + await self._session_service.save_session(session) + yield { + "type": "done", + "ok": False, + "ready": False, + "error": {"code": "RUNTIME_UNHEALTHY", "message": str(exc)}, + } + return + + session.runtime_context["sticky_runtime"] = self._lease_context(lease, phase="ready") + session.runtime_context.pop("activity_lines", None) + session.updated_at = datetime.now(UTC) + if persist_session: + await self._session_service.save_session(session) + + yield {"type": "ready", "runtime_base_url": runtime_base_url} + yield {"type": "done", "ok": True, "ready": True} + + @staticmethod + def _lease_context(lease: StickySessionRuntimeLease, *, phase: str) -> dict[str, Any]: + return { + "container_id": lease.container_id, + "status": lease.status, + "expires_at": lease.expires_at.isoformat(), + "runtime_base_url": lease.runtime_base_url, + "phase": phase, + } diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index f1125096f..d81916c98 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -12,8 +12,10 @@ ExtractionChatTurnService, ExtractionSkillResolutionService, ) -from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader +from extraction.application.sticky_session_runtime_service import StickySessionRuntimeService +from extraction.infrastructure.sticky_runtime_health import StickyRuntimeHealthChecker from extraction.infrastructure.ingestion_readiness_reader import SqlIngestionReadinessReader +from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader from extraction.infrastructure.repositories import ( ExtractionAgentSessionRepository, ExtractionSessionRunMetricsReader, @@ -92,11 +94,18 @@ def get_extraction_chat_turn_service( ), runtime_settings=runtime_settings, ) - return ExtractionChatTurnService( + runtime_service = StickySessionRuntimeService( session_service=session_service, skill_resolution_service=skill_resolution_service, ingestion_readiness_reader=SqlIngestionReadinessReader(session=session), sticky_runtime_manager=sticky_runtime_manager, - chat_agent=create_extraction_chat_agent(runtime_settings), bootstrap_builder=bootstrap_builder, + health_checker=StickyRuntimeHealthChecker(), + runtime_backend=runtime_settings.backend, + sticky_health_timeout_seconds=runtime_settings.sticky_health_timeout_seconds, + ) + return ExtractionChatTurnService( + session_service=session_service, + runtime_service=runtime_service, + chat_agent=create_extraction_chat_agent(runtime_settings), ) diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index 53b638281..eda814fb9 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -8,6 +8,7 @@ from ulid import ULID +from extraction.infrastructure.vertex_runtime_env import build_vertex_container_env from extraction.ports.runtime import ( EphemeralWorkerLaunchRequest, EphemeralWorkerLaunchResult, @@ -42,6 +43,10 @@ def __init__( sticky_service_port: int = 8787, container_skills_mount: str = "/app/skills", container_work_mount: str = "/workspace", + vertex_project_id: str = "", + vertex_region: str = "us-east5", + vertex_enabled: bool = False, + gcloud_config_mount: str | None = None, ) -> None: self._container_runtime = container_runtime self._sticky_image = sticky_image @@ -51,6 +56,10 @@ def __init__( self._sticky_service_port = sticky_service_port self._container_skills_mount = container_skills_mount self._container_work_mount = container_work_mount + self._vertex_project_id = vertex_project_id + self._vertex_region = vertex_region + self._vertex_enabled = vertex_enabled + self._gcloud_config_mount = gcloud_config_mount self._leases: dict[str, StickySessionRuntimeLease] = {} def get_or_start_runtime( @@ -169,6 +178,17 @@ def _start_runtime( ] ) + if self._vertex_enabled: + env.update( + build_vertex_container_env( + project_id=self._vertex_project_id, + region=self._vertex_region, + ) + ) + if self._gcloud_config_mount: + binds.append(f"{self._gcloud_config_mount}:/root/.config/gcloud:ro") + env.setdefault("CLOUDSDK_CONFIG", "/root/.config/gcloud") + launched = self._container_runtime.run( ContainerRunSpec( image=self._sticky_image, diff --git a/src/api/extraction/infrastructure/sticky_runtime_health.py b/src/api/extraction/infrastructure/sticky_runtime_health.py new file mode 100644 index 000000000..01f99523b --- /dev/null +++ b/src/api/extraction/infrastructure/sticky_runtime_health.py @@ -0,0 +1,47 @@ +"""Health polling for sticky session agent runtime containers.""" + +from __future__ import annotations + +import asyncio +from collections.abc import AsyncIterator + +import httpx + + +class StickyRuntimeHealthChecker: + """Poll agent runtime /health until the sticky container is ready.""" + + def __init__(self, *, request_timeout_seconds: float = 3.0) -> None: + self._request_timeout_seconds = request_timeout_seconds + + async def wait_until_healthy( + self, + *, + runtime_base_url: str, + timeout_seconds: float = 90.0, + poll_interval_seconds: float = 1.0, + ) -> AsyncIterator[str]: + """Yield human-readable progress lines until healthy or timeout.""" + if runtime_base_url.startswith("memory://"): + yield "In-memory assistant runtime ready" + return + + deadline = asyncio.get_event_loop().time() + timeout_seconds + url = f"{runtime_base_url.rstrip('/')}/health" + attempt = 0 + while asyncio.get_event_loop().time() < deadline: + attempt += 1 + yield f"Waiting for assistant container health check (attempt {attempt})…" + try: + async with httpx.AsyncClient(timeout=self._request_timeout_seconds) as client: + response = await client.get(url) + if response.status_code == 200: + yield "Assistant container is healthy" + return + except httpx.HTTPError: + pass + await asyncio.sleep(poll_interval_seconds) + + raise TimeoutError( + f"Sticky session runtime did not become healthy within {int(timeout_seconds)}s" + ) diff --git a/src/api/extraction/infrastructure/vertex_runtime_env.py b/src/api/extraction/infrastructure/vertex_runtime_env.py new file mode 100644 index 000000000..a3738335d --- /dev/null +++ b/src/api/extraction/infrastructure/vertex_runtime_env.py @@ -0,0 +1,38 @@ +"""Vertex AI environment helpers for Claude Agent SDK runtimes.""" + +from __future__ import annotations + +import os + + +def is_truthy_env(value: str | None) -> bool: + if not value: + return False + normalized = value.strip().lower() + return normalized in {"1", "true", "yes", "on"} + + +def vertex_enabled_from_env() -> bool: + return is_truthy_env(os.getenv("CLAUDE_CODE_USE_VERTEX")) + + +def build_vertex_container_env( + *, + project_id: str, + region: str, +) -> dict[str, str]: + """Return env vars for Claude Agent SDK Vertex mode inside sticky containers.""" + env: dict[str, str] = {"CLAUDE_CODE_USE_VERTEX": "1"} + if project_id.strip(): + env["ANTHROPIC_VERTEX_PROJECT_ID"] = project_id.strip() + if region.strip(): + env["CLOUD_ML_REGION"] = region.strip() + env["VERTEXAI_LOCATION"] = region.strip() + return env + + +def claude_model_configured() -> bool: + """Return True when Vertex or direct Anthropic API credentials are configured.""" + if vertex_enabled_from_env(): + return bool(os.getenv("ANTHROPIC_VERTEX_PROJECT_ID", "").strip()) + return bool(os.getenv("ANTHROPIC_API_KEY", "").strip()) diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py index 1b62a3b76..f04a17760 100644 --- a/src/api/extraction/infrastructure/workload_runtime_factory.py +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -69,6 +69,10 @@ def create_sticky_session_runtime_manager( sticky_service_port=resolved.sticky_service_port, container_skills_mount=resolved.container_skills_mount, container_work_mount=resolved.container_work_mount, + vertex_project_id=resolved.vertex_project_id, + vertex_region=resolved.vertex_region, + vertex_enabled=resolved.vertex_enabled(), + gcloud_config_mount=resolved.gcloud_config_mount, ) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index d55b57ba6..da2541971 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -2,12 +2,15 @@ from __future__ import annotations +import os from functools import lru_cache from typing import Literal -from pydantic import Field, field_validator +from pydantic import Field, field_validator, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict +from extraction.infrastructure.vertex_runtime_env import vertex_enabled_from_env + class ExtractionWorkloadRuntimeSettings(BaseSettings): """Container and in-memory extraction runtime configuration.""" @@ -33,6 +36,34 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): job_package_work_dir: str = Field(default="/tmp/kartograph/job_packages") skills_dir: str = Field(default="/app/skills") api_base_url: str = Field(default="http://api:8000") + sticky_health_timeout_seconds: float = Field(default=90.0, ge=5.0, le=600.0) + vertex_project_id: str = Field(default="") + vertex_region: str = Field(default="us-east5") + gcloud_config_mount: str | None = Field(default=None) + + def vertex_enabled(self) -> bool: + return vertex_enabled_from_env() + + @model_validator(mode="after") + def _apply_vertex_env_aliases(self) -> "ExtractionWorkloadRuntimeSettings": + if not self.vertex_project_id: + object.__setattr__( + self, + "vertex_project_id", + os.getenv("ANTHROPIC_VERTEX_PROJECT_ID", "").strip(), + ) + if self.vertex_region == "us-east5": + region = ( + os.getenv("CLOUD_ML_REGION", "").strip() + or os.getenv("VERTEXAI_LOCATION", "").strip() + ) + if region: + object.__setattr__(self, "vertex_region", region) + if self.gcloud_config_mount is None: + gcloud = os.getenv("KARTOGRAPH_GCLOUD_CONFIG_MOUNT", "").strip() + if gcloud: + object.__setattr__(self, "gcloud_config_mount", gcloud) + return self @field_validator("sticky_command", "worker_command", mode="before") @classmethod diff --git a/src/api/extraction/ports/sticky_runtime_health.py b/src/api/extraction/ports/sticky_runtime_health.py new file mode 100644 index 000000000..be72eaa0f --- /dev/null +++ b/src/api/extraction/ports/sticky_runtime_health.py @@ -0,0 +1,19 @@ +"""Port for polling sticky session agent runtime health.""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from typing import Protocol + + +class IStickyRuntimeHealthChecker(Protocol): + """Poll agent runtime /health until the sticky container is ready.""" + + async def wait_until_healthy( + self, + *, + runtime_base_url: str, + timeout_seconds: float = 90.0, + ) -> AsyncIterator[str]: + """Yield human-readable progress lines until healthy or timeout.""" + ... diff --git a/src/api/extraction/ports/sticky_session_runtime.py b/src/api/extraction/ports/sticky_session_runtime.py new file mode 100644 index 000000000..1a3c4ba06 --- /dev/null +++ b/src/api/extraction/ports/sticky_session_runtime.py @@ -0,0 +1,36 @@ +"""Port for preparing sticky session containers before graph-management chat.""" + +from __future__ import annotations + +from collections.abc import AsyncIterator +from typing import Any, Protocol + +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode + + +class IStickySessionRuntimeService(Protocol): + """Starts sticky containers and streams transparent readiness progress.""" + + async def stream_runtime_warmup( + self, + *, + tenant_id: str, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, + ) -> AsyncIterator[dict[str, Any]]: + ... + + async def ensure_runtime_for_chat( + self, + *, + tenant_id: str, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, + session: ExtractionAgentSession, + ) -> AsyncIterator[dict[str, Any]]: + ... diff --git a/src/api/extraction/presentation/models.py b/src/api/extraction/presentation/models.py index e912f19f2..9d57ed426 100644 --- a/src/api/extraction/presentation/models.py +++ b/src/api/extraction/presentation/models.py @@ -137,3 +137,9 @@ class ExtractionChatTurnRequest(BaseModel): message: str = Field(min_length=1) graph_management_ui_mode: GraphManagementUiMode = GraphManagementUiMode.INITIAL_SCHEMA_DESIGN + + +class StickyRuntimeWarmupRequest(BaseModel): + """Request model for proactive sticky runtime warmup.""" + + graph_management_ui_mode: GraphManagementUiMode = GraphManagementUiMode.INITIAL_SCHEMA_DESIGN diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index 815352479..fbd922010 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -22,6 +22,7 @@ ExtractionSessionHistoryResponse, ExtractionSessionListResponse, ExtractionSessionResponse, + StickyRuntimeWarmupRequest, ) from iam.application.value_objects import CurrentUser from iam.dependencies.user import get_current_user @@ -162,6 +163,36 @@ async def clear_chat( return ExtractionSessionResponse.from_domain(session) +@router.post( + "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}/runtime/warm", +) +async def stream_runtime_warmup( + knowledge_graph_id: str, + mode: ExtractionSessionMode, + request: StickyRuntimeWarmupRequest, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[ExtractionChatTurnService, Depends(get_extraction_chat_turn_service)], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], +) -> StreamingResponse: + await _assert_kg_edit_permission( + authz=authz, + current_user=current_user, + knowledge_graph_id=knowledge_graph_id, + ) + + async def event_stream(): + async for event in service.stream_runtime_warmup( + user_id=current_user.user_id.value, + tenant_id=current_user.tenant_id.value, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ui_mode=request.graph_management_ui_mode, + ): + yield json.dumps(event) + "\n" + + return StreamingResponse(event_stream(), media_type="application/x-ndjson") + + @router.post( "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}/chat", ) diff --git a/src/api/tests/unit/extraction/application/test_chat_turn_service.py b/src/api/tests/unit/extraction/application/test_chat_turn_service.py index 7c6a01748..e990fe1c7 100644 --- a/src/api/tests/unit/extraction/application/test_chat_turn_service.py +++ b/src/api/tests/unit/extraction/application/test_chat_turn_service.py @@ -8,7 +8,7 @@ from extraction.application.agent_session_service import ExtractionAgentSessionService from extraction.application.chat_turn_service import ExtractionChatTurnService -from extraction.application.skill_resolution_service import ExtractionSkillResolutionService +from extraction.application.sticky_session_runtime_service import StickySessionRuntimeService from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.value_objects import ( ExtractionSessionMode, @@ -84,21 +84,40 @@ async def build(self, **kwargs): return None -@pytest.mark.asyncio -async def test_stream_chat_turn_persists_assistant_reply() -> None: +class _InstantHealthChecker: + async def wait_until_healthy(self, **kwargs): + yield "Assistant container is healthy" + return + + +def _build_chat_turn_service( + *, + readiness: IngestionReadinessSnapshot, +) -> tuple[ExtractionChatTurnService, _InMemoryAgentSessionRepository]: repo = _InMemoryAgentSessionRepository() sticky = InMemoryStickySessionRuntimeManager() session_service = ExtractionAgentSessionService(repository=repo) - service = ExtractionChatTurnService( + runtime_service = StickySessionRuntimeService( session_service=session_service, skill_resolution_service=_StaticSkillResolutionService(), - ingestion_readiness_reader=_StaticIngestionReadinessReader( - IngestionReadinessSnapshot(1, 1), - ), + ingestion_readiness_reader=_StaticIngestionReadinessReader(readiness), sticky_runtime_manager=sticky, - chat_agent=DeterministicExtractionChatAgent(), bootstrap_builder=_StaticBootstrapBuilder(), + health_checker=_InstantHealthChecker(), + runtime_backend="memory", + sticky_health_timeout_seconds=5.0, + ) + service = ExtractionChatTurnService( + session_service=session_service, + runtime_service=runtime_service, + chat_agent=DeterministicExtractionChatAgent(), ) + return service, repo + + +@pytest.mark.asyncio +async def test_stream_chat_turn_persists_assistant_reply() -> None: + service, repo = _build_chat_turn_service(readiness=IngestionReadinessSnapshot(1, 1)) events = [ event @@ -123,19 +142,7 @@ async def test_stream_chat_turn_persists_assistant_reply() -> None: @pytest.mark.asyncio async def test_stream_chat_turn_wait_when_job_package_unprepared() -> None: - repo = _InMemoryAgentSessionRepository() - sticky = InMemoryStickySessionRuntimeManager() - session_service = ExtractionAgentSessionService(repository=repo) - service = ExtractionChatTurnService( - session_service=session_service, - skill_resolution_service=_StaticSkillResolutionService(), - ingestion_readiness_reader=_StaticIngestionReadinessReader( - IngestionReadinessSnapshot(2, 0), - ), - sticky_runtime_manager=sticky, - chat_agent=DeterministicExtractionChatAgent(), - bootstrap_builder=_StaticBootstrapBuilder(), - ) + service, repo = _build_chat_turn_service(readiness=IngestionReadinessSnapshot(2, 0)) events = [ event @@ -158,3 +165,25 @@ async def test_stream_chat_turn_wait_when_job_package_unprepared() -> None: ) assert active is not None assert active.runtime_context["job_package"]["phase"] == "awaiting_job_package" + + +@pytest.mark.asyncio +async def test_stream_runtime_warmup_marks_memory_backend_ready() -> None: + service, _repo = _build_chat_turn_service(readiness=IngestionReadinessSnapshot(1, 1)) + + events = [ + event + async for event in service.stream_runtime_warmup( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + ) + ] + + assert any(event.get("type") == "ready" for event in events) + done = events[-1] + assert done["type"] == "done" + assert done["ok"] is True + assert done.get("ready") is True diff --git a/src/api/tests/unit/extraction/infrastructure/test_vertex_runtime_env.py b/src/api/tests/unit/extraction/infrastructure/test_vertex_runtime_env.py new file mode 100644 index 000000000..a5fe91d39 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_vertex_runtime_env.py @@ -0,0 +1,42 @@ +"""Unit tests for Vertex runtime environment helpers.""" + +from __future__ import annotations + +import pytest + +from extraction.infrastructure.vertex_runtime_env import ( + build_vertex_container_env, + vertex_enabled_from_env, +) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("1", True), + ("true", True), + ("yes", True), + ("0", False), + ("", False), + (None, False), + ], +) +def test_vertex_enabled_from_env( + monkeypatch: pytest.MonkeyPatch, value: str | None, expected: bool +) -> None: + if value is None: + monkeypatch.delenv("CLAUDE_CODE_USE_VERTEX", raising=False) + else: + monkeypatch.setenv("CLAUDE_CODE_USE_VERTEX", value) + assert vertex_enabled_from_env() is expected + + +def test_build_vertex_container_env_includes_project_and_region() -> None: + env = build_vertex_container_env( + project_id="my-gcp-project", + region="us-central1", + ) + assert env["CLAUDE_CODE_USE_VERTEX"] == "1" + assert env["ANTHROPIC_VERTEX_PROJECT_ID"] == "my-gcp-project" + assert env["CLOUD_ML_REGION"] == "us-central1" + assert env["VERTEXAI_LOCATION"] == "us-central1" diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 47e6995d8..e75957954 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -96,7 +96,7 @@ import { type MutationLogEntryPreviewPage, type MutationLogRunRecord, } from '@/utils/kgMutationLogs' -import { streamExtractionChatTurn } from '@/utils/kgExtractionChat' +import { streamExtractionChatTurn, streamRuntimeWarmup } from '@/utils/kgExtractionChat' import { useGraphApi } from '@/composables/api/useGraphApi' const runtimeConfig = useRuntimeConfig() @@ -209,6 +209,10 @@ const sessionForbidden = ref(false) const sessionForbiddenReason = ref<string | null>(null) const clearingChat = ref(false) const sendingChat = ref(false) +const runtimeWarming = ref(false) +const runtimeReady = ref(false) +const runtimeWarmupError = ref<string | null>(null) +let runtimeWarmupGeneration = 0 const extractionSession = ref<ExtractionSessionResponse | null>(null) const sessionHistory = ref<ExtractionSessionHistoryItem[]>([]) const draftMessage = ref('') @@ -300,6 +304,8 @@ const graphManagementInputPlaceholder = computed( ) const sessionStatusLabel = computed(() => { + if (runtimeWarming.value) return 'Starting assistant' + if (!runtimeReady.value && runtimeWarmupError.value) return 'Runtime unavailable' if (sessionLoading.value) return 'Loading session' if (clearingChat.value) return 'Resetting chat' if (extractionSession.value?.id) { @@ -308,6 +314,19 @@ const sessionStatusLabel = computed(() => { return 'No active session' }) +const chatInputDisabled = computed( + () => workspaceForbidden.value || runtimeWarming.value || !runtimeReady.value, +) + +const chatInputDisabledReason = computed(() => { + if (workspaceForbidden.value) return workspaceForbiddenReason.value + if (runtimeWarming.value) return 'Starting Graph Management Assistant…' + if (!runtimeReady.value) { + return runtimeWarmupError.value ?? 'Assistant runtime is not ready yet.' + } + return null +}) + const graphManagementRailItems = computed(() => { if (!statusProjection.value) return [] return buildGraphManagementRailItems({ @@ -878,11 +897,64 @@ function syncActivityLinesFromSession() { sessionActivityLines.value = candidate.filter( (line): line is string => typeof line === 'string' && line.trim().length > 0, ) - } else { + } else if (!runtimeWarming.value) { sessionActivityLines.value = [] } } +async function warmupAssistantRuntime() { + if (!kgId.value || activeStep.value !== 'graph-management') return + if (sessionForbidden.value || workspaceForbidden.value) { + runtimeReady.value = false + return + } + + const generation = ++runtimeWarmupGeneration + runtimeWarming.value = true + runtimeReady.value = false + runtimeWarmupError.value = null + sessionActivityLines.value = ['Preparing Graph Management Assistant runtime…'] + + try { + for await (const event of streamRuntimeWarmup({ + apiBaseUrl: String(runtimeConfig.public.apiBaseUrl ?? ''), + accessToken: accessToken.value, + tenantId: currentTenantId.value, + kgId: kgId.value, + sessionMode: sharedSessionMode.value, + uiMode: graphManagementMode.value, + })) { + if (generation !== runtimeWarmupGeneration) return + if (event.type === 'thinking' && Array.isArray(event.recent)) { + sessionActivityLines.value = event.recent.filter(Boolean) + } + if (event.type === 'wait' && event.message) { + sessionActivityLines.value = [event.message] + } + if (event.type === 'ready') { + sessionActivityLines.value = ['Assistant container ready'] + } + if (event.type === 'done') { + if (event.ok !== true) { + throw new Error(event.error?.message ?? 'Runtime warmup failed') + } + runtimeReady.value = event.ready === true || event.wait === true + } + } + await loadExtractionSession() + } catch (err) { + runtimeWarmupError.value = extractErrorMessage(err) + runtimeReady.value = false + toast.error('Failed to start Graph Management Assistant', { + description: runtimeWarmupError.value, + }) + } finally { + if (generation === runtimeWarmupGeneration) { + runtimeWarming.value = false + } + } +} + async function sendChatMessage(message: string) { if (sessionForbidden.value || !shouldApplyMutationResult(sessionForbidden.value)) { toast.error('Chat unavailable', { @@ -1051,22 +1123,29 @@ watch(tenantVersion, () => { watch( () => statusProjection.value?.workspace_mode, - () => { + async () => { if (activeStep.value === 'graph-management') { syncGraphManagementState() - loadExtractionSession() + await loadExtractionSession() + await warmupAssistantRuntime() } }, ) watch( - () => [activeStep.value, route.query.gm_mode] as const, - () => { + () => [activeStep.value, route.query.gm_mode, sharedSessionMode.value] as const, + async () => { if (activeStep.value === 'graph-management') { syncGraphManagementState() - loadExtractionSession() + await loadExtractionSession() loadSessionHistory() loadGraphManagementDataSources() + await warmupAssistantRuntime() + } else { + runtimeWarmupGeneration += 1 + runtimeWarming.value = false + runtimeReady.value = false + runtimeWarmupError.value = null } }, ) @@ -1675,8 +1754,8 @@ watch(selectedOpsDataSourceId, () => { :activity-lines="sessionActivityLines" :forbidden="sessionForbidden" :forbidden-reason="sessionForbiddenReason" - :input-disabled="workspaceForbidden" - :input-disabled-reason="workspaceForbiddenReason" + :input-disabled="chatInputDisabled" + :input-disabled-reason="chatInputDisabledReason" @refresh="loadExtractionSession" @clear-chat="clearChat" @send-message="sendChatMessage" diff --git a/src/dev-ui/app/tests/kg-extraction-chat.test.ts b/src/dev-ui/app/tests/kg-extraction-chat.test.ts index efdfd0a37..ce4cbd96c 100644 --- a/src/dev-ui/app/tests/kg-extraction-chat.test.ts +++ b/src/dev-ui/app/tests/kg-extraction-chat.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest' -import { streamExtractionChatTurn } from '../utils/kgExtractionChat' +import { streamExtractionChatTurn, streamRuntimeWarmup } from '../utils/kgExtractionChat' describe('kgExtractionChat', () => { it('targets the extraction chat NDJSON endpoint with UI mode in body', async () => { @@ -40,4 +40,50 @@ describe('kgExtractionChat', () => { globalThis.fetch = originalFetch } }) + + it('targets the proactive runtime warmup NDJSON endpoint with UI mode in body', async () => { + const originalFetch = globalThis.fetch + const calls: Array<{ url: string; init?: RequestInit }> = [] + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + calls.push({ url: String(input), init }) + const body = new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode( + '{"type":"ready","runtime_base_url":"http://runtime:8787"}\n{"type":"done","ok":true,"ready":true}\n', + ), + ) + controller.close() + }, + }) + return new Response(body, { status: 200, headers: { 'Content-Type': 'application/x-ndjson' } }) + }) as typeof fetch + + try { + const events = [] + for await (const event of streamRuntimeWarmup({ + apiBaseUrl: 'http://api.test', + accessToken: 'token', + tenantId: 'tenant-1', + kgId: 'kg-1', + sessionMode: 'schema_bootstrap', + uiMode: 'initial-schema-design', + })) { + events.push(event) + } + + expect(events).toEqual([ + { type: 'ready', runtime_base_url: 'http://runtime:8787' }, + { type: 'done', ok: true, ready: true }, + ]) + expect(calls[0]?.url).toContain( + '/extraction/knowledge-graphs/kg-1/sessions/schema_bootstrap/runtime/warm', + ) + expect(JSON.parse(String(calls[0]?.init?.body))).toEqual({ + graph_management_ui_mode: 'initial-schema-design', + }) + } finally { + globalThis.fetch = originalFetch + } + }) }) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index aa4ef8086..0a17e3b08 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -553,6 +553,8 @@ describe('KG-MANAGE-017 - chat input keyboard contract', () => { expect(sharedConversationPanelVue).toContain('Shift+Enter for a new line') expect(sharedConversationPanelVue).toContain("emit('sendMessage'") expect(manageWorkspaceVue).toContain('streamExtractionChatTurn') + expect(manageWorkspaceVue).toContain('streamRuntimeWarmup') + expect(manageWorkspaceVue).toContain('warmupAssistantRuntime') expect(manageWorkspaceVue).toContain('@send-message="sendChatMessage"') }) }) diff --git a/src/dev-ui/app/utils/kgExtractionChat.ts b/src/dev-ui/app/utils/kgExtractionChat.ts index 9ed1cd13a..847eb7b20 100644 --- a/src/dev-ui/app/utils/kgExtractionChat.ts +++ b/src/dev-ui/app/utils/kgExtractionChat.ts @@ -1,14 +1,16 @@ -/** Stream graph-management chat turns over NDJSON. */ +/** Stream graph-management chat turns and proactive runtime warmup over NDJSON. */ import type { GraphManagementMode } from '@/utils/kgGraphManagement' export interface ExtractionChatStreamEvent { - type: 'thinking' | 'wait' | 'done' + type: 'thinking' | 'wait' | 'ready' | 'done' recent?: string[] phase?: string message?: string + runtime_base_url?: string ok?: boolean reply?: string | null + ready?: boolean wait?: boolean error?: { code: string; message: string } } @@ -23,35 +25,29 @@ export interface StreamExtractionChatOptions { message: string } -export async function* streamExtractionChatTurn( - options: StreamExtractionChatOptions, -): AsyncGenerator<ExtractionChatStreamEvent> { - const headers: Record<string, string> = { - 'Content-Type': 'application/json', - Accept: 'application/x-ndjson', - } - if (options.accessToken) { - headers.Authorization = `Bearer ${options.accessToken}` - } - if (options.tenantId) { - headers['X-Tenant-ID'] = options.tenantId - } +export interface StreamRuntimeWarmupOptions { + apiBaseUrl: string + accessToken: string | null + tenantId: string | null + kgId: string + sessionMode: 'schema_bootstrap' | 'extraction_operations' + uiMode: GraphManagementMode +} - const response = await fetch( - `${options.apiBaseUrl}/extraction/knowledge-graphs/${encodeURIComponent(options.kgId)}/sessions/${options.sessionMode}/chat`, - { - method: 'POST', - headers, - body: JSON.stringify({ - message: options.message, - graph_management_ui_mode: options.uiMode, - }), - }, - ) +async function* streamNdjsonPost( + url: string, + headers: Record<string, string>, + body: Record<string, unknown>, +): AsyncGenerator<ExtractionChatStreamEvent> { + const response = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(body), + }) if (!response.ok) { - const body = await response.text().catch(() => '') - throw new Error(body || `${response.status} ${response.statusText}`) + const text = await response.text().catch(() => '') + throw new Error(text || `${response.status} ${response.statusText}`) } const reader = response.body?.getReader() @@ -80,3 +76,41 @@ export async function* streamExtractionChatTurn( yield JSON.parse(tail) as ExtractionChatStreamEvent } } + +function buildExtractionHeaders( + accessToken: string | null, + tenantId: string | null, +): Record<string, string> { + const headers: Record<string, string> = { + 'Content-Type': 'application/json', + Accept: 'application/x-ndjson', + } + if (accessToken) { + headers.Authorization = `Bearer ${accessToken}` + } + if (tenantId) { + headers['X-Tenant-ID'] = tenantId + } + return headers +} + +export async function* streamRuntimeWarmup( + options: StreamRuntimeWarmupOptions, +): AsyncGenerator<ExtractionChatStreamEvent> { + const headers = buildExtractionHeaders(options.accessToken, options.tenantId) + const url = `${options.apiBaseUrl}/extraction/knowledge-graphs/${encodeURIComponent(options.kgId)}/sessions/${options.sessionMode}/runtime/warm` + yield* streamNdjsonPost(url, headers, { + graph_management_ui_mode: options.uiMode, + }) +} + +export async function* streamExtractionChatTurn( + options: StreamExtractionChatOptions, +): AsyncGenerator<ExtractionChatStreamEvent> { + const headers = buildExtractionHeaders(options.accessToken, options.tenantId) + const url = `${options.apiBaseUrl}/extraction/knowledge-graphs/${encodeURIComponent(options.kgId)}/sessions/${options.sessionMode}/chat` + yield* streamNdjsonPost(url, headers, { + message: options.message, + graph_management_ui_mode: options.uiMode, + }) +} From 7c9c7676412f2a5c3818a21a6ff1a129b71e5360 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 29 May 2026 10:40:11 -0400 Subject: [PATCH 065/153] fix(iam): pin stable Keycloak user IDs for dev alice and bob Prevent JIT provisioning conflicts when Keycloak re-imports the realm and Postgres still holds rows keyed by the previous SSO subject. Co-authored-by: Cursor <cursoragent@cursor.com> --- keycloak/realm.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/keycloak/realm.json b/keycloak/realm.json index 85bd32af7..209cdc656 100644 --- a/keycloak/realm.json +++ b/keycloak/realm.json @@ -76,6 +76,7 @@ ], "users": [ { + "id": "91bd9b81-5c1d-4307-8dcd-3b80dcc68894", "username": "alice", "enabled": true, "email": "alice@example.com", @@ -90,6 +91,7 @@ ] }, { + "id": "7ac7083e-42c8-4643-8b2f-052ffc579ea2", "username": "bob", "enabled": true, "email": "bob@example.com", From bbd91e922b7eb62ace438edcb80d512b8b3e758b Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 29 May 2026 15:53:51 -0400 Subject: [PATCH 066/153] fix(extraction): enable Vertex chat in sticky containers with host ADC Mount gcloud credentials at /gcloud/config and run sticky containers as the host UID so Claude Agent SDK can reach Vertex AI, while keeping the API root for Docker-out-of-Docker in dev. Co-authored-by: Cursor <cursoragent@cursor.com> --- Makefile | 3 +- compose.dev.yaml | 24 +- src/agent-runtime/Dockerfile | 8 +- .../kartograph_agent_runtime/executor.py | 94 +++++++- .../kartograph_agent_runtime/server.py | 51 +++- .../kartograph_agent_runtime/settings.py | 3 + .../application/chat_turn_service.py | 19 ++ .../sticky_session_runtime_service.py | 60 +++-- src/api/extraction/dependencies.py | 36 ++- .../container_workload_runtime.py | 139 ++++++++++- .../remote_sticky_container_chat_agent.py | 15 +- .../repositories/agent_session_repository.py | 1 + .../infrastructure/workload_runtime.py | 42 ++++ .../workload_runtime_factory.py | 3 + .../workload_runtime_settings.py | 32 ++- src/api/extraction/ports/runtime.py | 24 ++ src/api/extraction/presentation/routes.py | 4 +- .../container_runtime/cli_runtime.py | 19 ++ .../shared_kernel/container_runtime/ports.py | 5 + .../application/test_chat_turn_service.py | 96 ++++++++ .../test_sticky_session_runtime_service.py | 217 ++++++++++++++++++ .../test_container_workload_runtime.py | 26 +++ ...test_sticky_session_container_bootstrap.py | 16 +- .../test_workload_runtime_settings.py | 5 + .../extraction/presentation/test_routes.py | 8 +- .../extraction/SharedConversationPanel.vue | 18 +- .../pages/knowledge-graphs/[kgId]/manage.vue | 74 +++++- .../app/tests/kg-extraction-chat.test.ts | 36 +++ .../knowledge-graph-manage-workspace.test.ts | 4 +- src/dev-ui/app/utils/kgExtractionChat.ts | 17 +- 30 files changed, 1019 insertions(+), 80 deletions(-) create mode 100644 src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py diff --git a/Makefile b/Makefile index bda68a19e..4ff4f7b86 100755 --- a/Makefile +++ b/Makefile @@ -23,8 +23,9 @@ certs: .PHONY: dev dev: certs @echo "🧰 [Development] Starting application containers..." + docker compose -f compose.yaml -f compose.dev.yaml --profile build-only build agent-runtime docker compose -f compose.yaml build - docker compose -f compose.yaml -f compose.dev.yaml --profile ui up -d + HOST_UID=$$(id -u) HOST_GID=$$(id -g) docker compose -f compose.yaml -f compose.dev.yaml --profile ui up -d @echo "Done." @echo "----------------------------" @echo "API Root: http://localhost:8000" diff --git a/compose.dev.yaml b/compose.dev.yaml index 480388cdc..e48dc6de8 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -8,28 +8,36 @@ services: profiles: ["build-only"] api: - # Run as root in dev to handle host file permissions (any umask) - user: "${UID}:${GID}" + # Root required for Docker-out-of-Docker via mounted /var/run/docker.sock in dev + user: "0:0" environment: UV_CACHE_DIR: /tmp/uv-cache + HOST_UID: ${HOST_UID} + HOST_GID: ${HOST_GID} KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND: container KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_ENGINE: auto KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_NETWORK: kartograph_kartograph KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_IMAGE: kartograph-agent-runtime:dev KARTOGRAPH_EXTRACTION_RUNTIME_API_BASE_URL: http://api:8000 KARTOGRAPH_EXTRACTION_RUNTIME_JOB_PACKAGE_WORK_DIR: /tmp/kartograph/job_packages - KARTOGRAPH_EXTRACTION_RUNTIME_SKILLS_DIR: /app/skills - # Vertex AI for Claude Agent SDK (mirror k-extract; set in host .env or shell) - CLAUDE_CODE_USE_VERTEX: ${CLAUDE_CODE_USE_VERTEX:-} - ANTHROPIC_VERTEX_PROJECT_ID: ${ANTHROPIC_VERTEX_PROJECT_ID:-} - CLOUD_ML_REGION: ${CLOUD_ML_REGION:-us-east5} - KARTOGRAPH_GCLOUD_CONFIG_MOUNT: ${KARTOGRAPH_GCLOUD_CONFIG_MOUNT:-} + KARTOGRAPH_EXTRACTION_RUNTIME_SKILLS_DIR: ${PWD}/skills + KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_UID: ${HOST_UID} + KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID: ${HOST_GID} + # Vertex AI for Claude Agent SDK in sticky assistant containers + CLAUDE_CODE_USE_VERTEX: "1" + ANTHROPIC_VERTEX_PROJECT_ID: itpc-gcp-hcm-pe-eng-claude + CLOUD_ML_REGION: us-east5 + KARTOGRAPH_GCLOUD_CONFIG_MOUNT: ${HOME}/.config/gcloud volumes: # Mount the entire app directory (minus venv) for hot-reload - ./src/api:/app:z - /app/.venv + # Shared with sibling sticky containers launched via the host Docker socket + - /tmp/kartograph/job_packages:/tmp/kartograph/job_packages # Allow API process to launch sibling extraction runtime containers locally - /var/run/docker.sock:/var/run/docker.sock + # Docker/Podman CLI from host (required for container runtime backend) + - ${DOCKER_BIN:-/usr/bin/docker}:/usr/bin/docker:ro command: - /bin/bash - -c diff --git a/src/agent-runtime/Dockerfile b/src/agent-runtime/Dockerfile index 54172815e..035c50698 100644 --- a/src/agent-runtime/Dockerfile +++ b/src/agent-runtime/Dockerfile @@ -4,10 +4,10 @@ WORKDIR /runtime COPY --from=ghcr.io/astral-sh/uv:0.9.18 /uv /uvx /bin/ -COPY pyproject.toml /runtime/pyproject.toml +COPY pyproject.toml uv.lock /runtime/ COPY kartograph_agent_runtime /runtime/kartograph_agent_runtime -RUN uv sync --no-dev +RUN uv sync --frozen --no-dev ENV PATH="/runtime/.venv/bin:$PATH" \ PYTHONUNBUFFERED=1 @@ -15,6 +15,6 @@ ENV PATH="/runtime/.venv/bin:$PATH" \ EXPOSE 8787 HEALTHCHECK --interval=15s --timeout=3s --start-period=10s --retries=5 \ - CMD python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8787/health').read()" || exit 1 + CMD /runtime/.venv/bin/python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8787/health').read()" || exit 1 -CMD ["python", "-m", "kartograph_agent_runtime"] +CMD ["/runtime/.venv/bin/python", "-m", "kartograph_agent_runtime"] diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index e55b1d58b..b4b54eb40 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import os from collections.abc import AsyncIterator from typing import Any @@ -10,6 +11,8 @@ from kartograph_agent_runtime.tools import RuntimeTooling from kartograph_agent_runtime.vertex import build_claude_agent_env +_DEFAULT_TURN_TIMEOUT_SECONDS = 180.0 + def _build_system_prompt(agent_configuration: dict[str, Any]) -> str: system_prompt = str(agent_configuration.get("system_prompt") or "").strip() @@ -31,6 +34,41 @@ def _apply_model_env(settings: AgentRuntimeSettings) -> str: return "unconfigured" +def _extract_sdk_reply(message: Any) -> str | None: + result = getattr(message, "result", None) + if isinstance(result, str) and result.strip(): + return result.strip() + + content = getattr(message, "content", None) + if isinstance(content, str) and content.strip(): + return content.strip() + if isinstance(content, list): + parts: list[str] = [] + for block in content: + text = getattr(block, "text", None) + if isinstance(text, str) and text.strip(): + parts.append(text.strip()) + if parts: + return parts[-1] + return None + + +def _build_sdk_env(settings: AgentRuntimeSettings) -> dict[str, str]: + env = build_claude_agent_env(settings) + if settings.gcloud_config_dir.strip(): + env.setdefault("CLOUDSDK_CONFIG", settings.gcloud_config_dir.strip()) + if settings.google_application_credentials.strip(): + env.setdefault( + "GOOGLE_APPLICATION_CREDENTIALS", + settings.google_application_credentials.strip(), + ) + env.setdefault("HOME", settings.home_dir.strip() or "/tmp") + env.setdefault("API_TIMEOUT_MS", "120000") + env.setdefault("CLAUDE_CODE_MAX_RETRIES", "2") + env.setdefault("CLAUDE_ASYNC_AGENT_STALL_TIMEOUT_MS", "120000") + return env + + async def stream_turn_events( *, settings: AgentRuntimeSettings, @@ -38,6 +76,7 @@ async def stream_turn_events( ui_mode: str, agent_configuration: dict[str, Any], message_history: list[dict[str, Any]], + turn_timeout_seconds: float = _DEFAULT_TURN_TIMEOUT_SECONDS, ) -> AsyncIterator[dict[str, Any]]: auth_mode = _apply_model_env(settings) yield { @@ -58,6 +97,7 @@ async def stream_turn_events( agent_configuration=agent_configuration, message_history=message_history, auth_mode=auth_mode, + turn_timeout_seconds=turn_timeout_seconds, ): yield event return @@ -91,6 +131,7 @@ async def _stream_with_claude_sdk( agent_configuration: dict[str, Any], message_history: list[dict[str, Any]], auth_mode: str, + turn_timeout_seconds: float, ) -> AsyncIterator[dict[str, Any]]: from claude_agent_sdk import ClaudeAgentOptions, query @@ -113,18 +154,49 @@ async def _stream_with_claude_sdk( ], } - chunks: list[str] = [] + sdk_env = _build_sdk_env(settings) options = ClaudeAgentOptions( system_prompt=system_prompt, - env=build_claude_agent_env(settings), - ) - async for sdk_message in query(prompt=prompt, options=options): - text = getattr(sdk_message, "result", None) or getattr(sdk_message, "content", None) - if isinstance(text, str) and text.strip(): - chunks.append(text.strip()) - - reply = chunks[-1] if chunks else ( - "Claude Agent SDK completed without a textual response. " - "Retry with a more specific graph-management request." + env=sdk_env, + permission_mode="bypassPermissions", + max_turns=8, + setting_sources=[], ) + + reply: str | None = None + try: + async with asyncio.timeout(turn_timeout_seconds): + async for sdk_message in query(prompt=prompt, options=options): + extracted = _extract_sdk_reply(sdk_message) + if extracted: + reply = extracted + except TimeoutError: + yield { + "type": "done", + "ok": False, + "error": { + "code": "AGENT_TURN_TIMEOUT", + "message": ( + f"Claude Agent SDK did not complete within {int(turn_timeout_seconds)}s. " + "Check Vertex credentials and model access for this project." + ), + }, + } + return + except Exception as exc: # noqa: BLE001 + yield { + "type": "done", + "ok": False, + "error": { + "code": "AGENT_TURN_FAILED", + "message": str(exc), + }, + } + return + + if not reply: + reply = ( + "Claude Agent SDK completed without a textual response. " + "Retry with a more specific graph-management request." + ) yield {"type": "done", "ok": True, "reply": reply} diff --git a/src/agent-runtime/kartograph_agent_runtime/server.py b/src/agent-runtime/kartograph_agent_runtime/server.py index 7df4322f5..87b89a70e 100644 --- a/src/agent-runtime/kartograph_agent_runtime/server.py +++ b/src/agent-runtime/kartograph_agent_runtime/server.py @@ -3,6 +3,7 @@ from __future__ import annotations import json +import logging from collections.abc import AsyncIterator from typing import Any @@ -13,6 +14,8 @@ from kartograph_agent_runtime.executor import stream_turn_events from kartograph_agent_runtime.settings import AgentRuntimeSettings +logger = logging.getLogger(__name__) + app = FastAPI(title="Kartograph Agent Runtime", version="0.1.0") settings = AgentRuntimeSettings() @@ -31,14 +34,46 @@ async def health() -> dict[str, str]: @app.post("/v1/turn") async def stream_turn(request: TurnRequest) -> StreamingResponse: + logger.info( + "agent_runtime_turn_started session_id=%s ui_mode=%s message_len=%s", + settings.session_id, + request.ui_mode, + len(request.message), + ) + async def event_stream() -> AsyncIterator[str]: - async for event in stream_turn_events( - settings=settings, - message=request.message, - ui_mode=request.ui_mode, - agent_configuration=request.agent_configuration, - message_history=request.message_history, - ): - yield json.dumps(event) + "\n" + try: + async for event in stream_turn_events( + settings=settings, + message=request.message, + ui_mode=request.ui_mode, + agent_configuration=request.agent_configuration, + message_history=request.message_history, + ): + if event.get("type") == "done": + logger.info( + "agent_runtime_turn_finished session_id=%s ok=%s", + settings.session_id, + event.get("ok"), + ) + yield json.dumps(event) + "\n" + except Exception: + logger.exception( + "agent_runtime_turn_failed session_id=%s", + settings.session_id, + ) + yield ( + json.dumps( + { + "type": "done", + "ok": False, + "error": { + "code": "AGENT_RUNTIME_INTERNAL_ERROR", + "message": "Agent runtime failed while processing the turn.", + }, + } + ) + + "\n" + ) return StreamingResponse(event_stream(), media_type="application/x-ndjson") diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py index 724e190f9..fd8e6048f 100644 --- a/src/agent-runtime/kartograph_agent_runtime/settings.py +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -25,6 +25,9 @@ class AgentRuntimeSettings(BaseSettings): anthropic_api_key: str = Field(default="", alias="ANTHROPIC_API_KEY") vertex_project_id: str = Field(default="", alias="ANTHROPIC_VERTEX_PROJECT_ID") vertex_region: str = Field(default="us-east5", alias="CLOUD_ML_REGION") + gcloud_config_dir: str = Field(default="", alias="CLOUDSDK_CONFIG") + google_application_credentials: str = Field(default="", alias="GOOGLE_APPLICATION_CREDENTIALS") + home_dir: str = Field(default="/tmp", alias="HOME") def vertex_enabled(self) -> bool: return vertex_enabled_from_env() diff --git a/src/api/extraction/application/chat_turn_service.py b/src/api/extraction/application/chat_turn_service.py index 812771730..8046f47a9 100644 --- a/src/api/extraction/application/chat_turn_service.py +++ b/src/api/extraction/application/chat_turn_service.py @@ -127,8 +127,10 @@ async def stream_chat_turn( session.message_history.append({"role": "user", "content": trimmed}) session.updated_at = datetime.now(UTC) + await self._session_service.save_session(session) assistant_reply: str | None = None + stream_failed = False async for event in self._chat_agent.stream_turn( session=session, user_message=trimmed, @@ -143,6 +145,8 @@ async def stream_chat_turn( if event.get("type") == "done": if event.get("ok") is True and event.get("reply"): assistant_reply = str(event["reply"]) + elif event.get("ok") is not True: + stream_failed = True yield event if assistant_reply: @@ -150,3 +154,18 @@ async def stream_chat_turn( session.updated_at = datetime.now(UTC) session.runtime_context.pop("activity_lines", None) await self._session_service.save_session(session) + elif stream_failed: + session.updated_at = datetime.now(UTC) + await self._session_service.save_session(session) + else: + yield { + "type": "done", + "ok": False, + "error": { + "code": "AGENT_STREAM_INCOMPLETE", + "message": ( + "Graph Management Assistant ended the turn without a final response. " + "Check sticky container logs for Vertex or SDK errors." + ), + }, + } diff --git a/src/api/extraction/application/sticky_session_runtime_service.py b/src/api/extraction/application/sticky_session_runtime_service.py index e76604f43..f1ddb6de8 100644 --- a/src/api/extraction/application/sticky_session_runtime_service.py +++ b/src/api/extraction/application/sticky_session_runtime_service.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio from collections.abc import AsyncIterator from datetime import UTC, datetime from typing import Any @@ -19,6 +20,7 @@ from extraction.ports.runtime import IStickySessionRuntimeManager, StickySessionRuntimeLease from extraction.ports.sticky_runtime_health import IStickyRuntimeHealthChecker from extraction.ports.sticky_session_bootstrap import IStickySessionBootstrapBuilder +from shared_kernel.container_runtime.ports import ContainerRuntimeError class StickySessionRuntimeService: @@ -82,12 +84,22 @@ async def ensure_runtime_for_chat( session: ExtractionAgentSession, ) -> AsyncIterator[dict[str, Any]]: sticky = session.runtime_context.get("sticky_runtime", {}) - if ( - isinstance(sticky.get("runtime_base_url"), str) - and sticky.get("phase") == "ready" - and sticky.get("container_id") - ): + container_id = sticky.get("container_id") + persisted_container_id = container_id if isinstance(container_id, str) else None + + lease = await asyncio.to_thread( + self._sticky_runtime_manager.try_resolve_active_lease, + session_id=session.id, + container_id=persisted_container_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode.value, + ) + if lease is not None: + session.runtime_context["sticky_runtime"] = self._lease_context(lease, phase="ready") + await self._session_service.save_session(session) return + async for event in self._stream_prepare_runtime( tenant_id=tenant_id, user_id=user_id, @@ -165,7 +177,8 @@ async def _stream_prepare_runtime( return if self._runtime_backend != "container": - lease = self._sticky_runtime_manager.get_or_start_runtime( + lease = await asyncio.to_thread( + self._sticky_runtime_manager.get_or_start_runtime, session_id=session.id, user_id=user_id, knowledge_graph_id=knowledge_graph_id, @@ -203,13 +216,34 @@ async def _stream_prepare_runtime( "Starting isolated Claude Agent SDK container", ], } - lease = self._sticky_runtime_manager.get_or_start_runtime( - session_id=session.id, - user_id=user_id, - knowledge_graph_id=knowledge_graph_id, - mode=mode.value, - bootstrap=bootstrap, - ) + lease: StickySessionRuntimeLease + try: + lease = await asyncio.to_thread( + self._sticky_runtime_manager.get_or_start_runtime, + session_id=session.id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode.value, + bootstrap=bootstrap, + ) + except ContainerRuntimeError as exc: + session.runtime_context["sticky_runtime"] = { + "phase": "failed", + "status": "failed", + } + if persist_session: + await self._session_service.save_session(session) + yield { + "type": "done", + "ok": False, + "ready": False, + "error": { + "code": "RUNTIME_START_FAILED", + "message": str(exc), + }, + } + return + session.runtime_context["sticky_runtime"] = self._lease_context(lease, phase="starting") yield { "type": "thinking", diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index d81916c98..343c8fcd8 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -51,13 +51,11 @@ def get_ephemeral_extraction_worker_launcher() -> IEphemeralExtractionWorkerLaun return create_ephemeral_extraction_worker_launcher() -def get_extraction_agent_session_service( - session: Annotated[AsyncSession, Depends(get_write_session)], - sticky_runtime_manager: Annotated[ - IStickySessionRuntimeManager, Depends(get_sticky_session_runtime_manager) - ], +def _build_extraction_agent_session_service( + session: AsyncSession, + *, + sticky_runtime_manager: IStickySessionRuntimeManager | None = None, ) -> ExtractionAgentSessionService: - """Get ExtractionAgentSessionService instance.""" skill_resolution_service = ExtractionSkillResolutionService( override_repository=ExtractionSkillOverrideRepository() ) @@ -69,6 +67,26 @@ def get_extraction_agent_session_service( ) +def get_extraction_agent_session_service( + session: Annotated[AsyncSession, Depends(get_write_session)], +) -> ExtractionAgentSessionService: + """Get ExtractionAgentSessionService for read/create session routes.""" + return _build_extraction_agent_session_service(session) + + +def get_extraction_agent_session_service_with_runtime( + session: Annotated[AsyncSession, Depends(get_write_session)], + sticky_runtime_manager: Annotated[ + IStickySessionRuntimeManager, Depends(get_sticky_session_runtime_manager) + ], +) -> ExtractionAgentSessionService: + """Get ExtractionAgentSessionService for routes that reset sticky containers.""" + return _build_extraction_agent_session_service( + session, + sticky_runtime_manager=sticky_runtime_manager, + ) + + def get_extraction_chat_turn_service( session: Annotated[AsyncSession, Depends(get_write_session)], sticky_runtime_manager: Annotated[ @@ -80,10 +98,8 @@ def get_extraction_chat_turn_service( skill_resolution_service = ExtractionSkillResolutionService( override_repository=ExtractionSkillOverrideRepository() ) - session_service = ExtractionAgentSessionService( - repository=ExtractionAgentSessionRepository(session=session), - skill_resolution_service=skill_resolution_service, - run_metrics_reader=ExtractionSessionRunMetricsReader(session=session), + session_service = _build_extraction_agent_session_service( + session, sticky_runtime_manager=sticky_runtime_manager, ) bootstrap_builder = StickySessionBootstrapBuilder( diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index eda814fb9..1a10f80af 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -29,6 +29,18 @@ def _sanitize_container_name(prefix: str, identifier: str) -> str: return name[:63].rstrip("-_.") or f"{prefix}runtime" +_GCLOUD_ADC_FILENAME = "application_default_credentials.json" + + +def _gcloud_adc_env(*, container_config_path: str) -> dict[str, str]: + base = container_config_path.rstrip("/") + return { + "CLOUDSDK_CONFIG": base, + "GOOGLE_APPLICATION_CREDENTIALS": f"{base}/{_GCLOUD_ADC_FILENAME}", + "HOME": "/tmp", + } + + class ContainerStickySessionRuntimeManager(IStickySessionRuntimeManager): """Sticky runtime manager backed by real container lifecycle operations.""" @@ -47,6 +59,9 @@ def __init__( vertex_region: str = "us-east5", vertex_enabled: bool = False, gcloud_config_mount: str | None = None, + gcloud_config_container_path: str = "/gcloud/config", + container_run_uid: int | None = None, + container_run_gid: int | None = None, ) -> None: self._container_runtime = container_runtime self._sticky_image = sticky_image @@ -60,6 +75,9 @@ def __init__( self._vertex_region = vertex_region self._vertex_enabled = vertex_enabled self._gcloud_config_mount = gcloud_config_mount + self._gcloud_config_container_path = gcloud_config_container_path + self._container_run_uid = container_run_uid + self._container_run_gid = container_run_gid self._leases: dict[str, StickySessionRuntimeLease] = {} def get_or_start_runtime( @@ -87,6 +105,18 @@ def get_or_start_runtime( self._leases[session_id] = refreshed return refreshed + adopted = self._adopt_running_container_if_present( + session_id=session_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + now=now, + container_id_hint=None, + ) + if adopted is not None: + self._leases[session_id] = adopted + return adopted + if existing is not None: self._terminate_container(existing.container_id) @@ -134,6 +164,105 @@ def cleanup_expired(self, *, now: datetime) -> list[str]: terminated.append(lease.container_id) return terminated + def try_resolve_active_lease( + self, + *, + session_id: str, + user_id: str = "", + knowledge_graph_id: str = "", + mode: str = "", + container_id: str | None = None, + ) -> StickySessionRuntimeLease | None: + now = datetime.now(UTC) + lease = self._leases.get(session_id) + if ( + lease is not None + and lease.expires_at > now + and self._container_runtime.is_running(lease.container_id) + ): + refreshed = replace( + lease, + last_activity_at=now, + expires_at=now + self._session_ttl, + status="active", + ) + self._leases[session_id] = refreshed + return refreshed + + adopt_user_id = lease.user_id if lease is not None else user_id + adopt_kg_id = lease.knowledge_graph_id if lease is not None else knowledge_graph_id + adopt_mode = lease.mode if lease is not None else mode + hints = [container_id] if container_id else [] + container_name = _sanitize_container_name("kartograph-sticky-", session_id) + named_id = self._container_runtime.container_id_for_name(container_name) + if named_id is not None: + hints.append(named_id) + + for hint in hints: + if not hint or not self._container_runtime.is_running(hint): + continue + adopted = self._adopt_running_container_if_present( + session_id=session_id, + user_id=adopt_user_id, + knowledge_graph_id=adopt_kg_id, + mode=adopt_mode, + now=now, + container_id_hint=hint, + ) + if adopted is not None: + self._leases[session_id] = adopted + return adopted + return None + + def is_runtime_active( + self, + *, + session_id: str, + container_id: str | None = None, + user_id: str = "", + knowledge_graph_id: str = "", + mode: str = "", + ) -> bool: + return ( + self.try_resolve_active_lease( + session_id=session_id, + container_id=container_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + is not None + ) + + def _adopt_running_container_if_present( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + now: datetime, + container_id_hint: str | None, + ) -> StickySessionRuntimeLease | None: + container_name = _sanitize_container_name("kartograph-sticky-", session_id) + container_id = container_id_hint or self._container_runtime.container_id_for_name( + container_name + ) + if container_id is None: + return None + runtime_base_url = f"http://{container_name}:{self._sticky_service_port}" + return StickySessionRuntimeLease( + session_id=session_id, + container_id=container_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + status="active", + last_activity_at=now, + expires_at=now + self._session_ttl, + runtime_base_url=runtime_base_url, + ) + def _start_runtime( self, *, @@ -186,8 +315,13 @@ def _start_runtime( ) ) if self._gcloud_config_mount: - binds.append(f"{self._gcloud_config_mount}:/root/.config/gcloud:ro") - env.setdefault("CLOUDSDK_CONFIG", "/root/.config/gcloud") + container_gcloud = self._gcloud_config_container_path.rstrip("/") + binds.append(f"{self._gcloud_config_mount}:{container_gcloud}:ro") + env.update(_gcloud_adc_env(container_config_path=container_gcloud)) + + container_user: str | None = None + if self._container_run_uid is not None and self._container_run_gid is not None: + container_user = f"{self._container_run_uid}:{self._container_run_gid}" launched = self._container_runtime.run( ContainerRunSpec( @@ -196,6 +330,7 @@ def _start_runtime( env=env, binds=tuple(binds), network=self._container_network, + user=container_user, labels={ "kartograph.runtime.kind": "sticky", "kartograph.session_id": session_id, diff --git a/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py index f20b5079c..34957bf45 100644 --- a/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py +++ b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py @@ -10,13 +10,21 @@ from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.value_objects import GraphManagementUiMode +from extraction.infrastructure.workload_runtime_settings import ( + get_extraction_workload_runtime_settings, +) class RemoteStickyContainerChatAgent: """Delegates conversational turns to the sticky session Claude agent runtime.""" - def __init__(self, *, request_timeout_seconds: float = 120.0) -> None: - self._request_timeout_seconds = request_timeout_seconds + def __init__(self, *, request_timeout_seconds: float | None = None) -> None: + settings = get_extraction_workload_runtime_settings() + self._request_timeout_seconds = ( + request_timeout_seconds + if request_timeout_seconds is not None + else settings.sticky_turn_timeout_seconds + 30.0 + ) async def stream_turn( self, @@ -47,7 +55,8 @@ async def stream_turn( url = f"{runtime_base_url.rstrip('/')}/v1/turn" try: - async with httpx.AsyncClient(timeout=self._request_timeout_seconds) as client: + timeout = httpx.Timeout(10.0, read=self._request_timeout_seconds) + async with httpx.AsyncClient(timeout=timeout) as client: async with client.stream("POST", url, json=payload) as response: if response.status_code >= 400: body = await response.aread() diff --git a/src/api/extraction/infrastructure/repositories/agent_session_repository.py b/src/api/extraction/infrastructure/repositories/agent_session_repository.py index 156301e48..01596dc64 100644 --- a/src/api/extraction/infrastructure/repositories/agent_session_repository.py +++ b/src/api/extraction/infrastructure/repositories/agent_session_repository.py @@ -42,6 +42,7 @@ async def save(self, session: ExtractionAgentSession) -> None: model.updated_at = session.updated_at model.archived_at = session.archived_at await self._session.flush() + await self._session.commit() async def get_by_id(self, session_id: str) -> ExtractionAgentSession | None: stmt = select(ExtractionAgentSessionModel).where( diff --git a/src/api/extraction/infrastructure/workload_runtime.py b/src/api/extraction/infrastructure/workload_runtime.py index 6af2f5655..7544854f7 100644 --- a/src/api/extraction/infrastructure/workload_runtime.py +++ b/src/api/extraction/infrastructure/workload_runtime.py @@ -89,6 +89,48 @@ def cleanup_expired(self, *, now: datetime) -> list[str]: terminated.append(lease.container_id) return terminated + def try_resolve_active_lease( + self, + *, + session_id: str, + user_id: str = "", + knowledge_graph_id: str = "", + mode: str = "", + container_id: str | None = None, + ) -> StickySessionRuntimeLease | None: + now = datetime.now(UTC) + lease = self._leases.get(session_id) + if lease is not None and lease.expires_at > now: + refreshed = replace( + lease, + last_activity_at=now, + expires_at=now + self._session_ttl, + status="active", + ) + self._leases[session_id] = refreshed + return refreshed + return None + + def is_runtime_active( + self, + *, + session_id: str, + container_id: str | None = None, + user_id: str = "", + knowledge_graph_id: str = "", + mode: str = "", + ) -> bool: + return ( + self.try_resolve_active_lease( + session_id=session_id, + container_id=container_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + is not None + ) + class ScopedWorkloadCredentialIssuer: """Issues short-lived tenant/KG scoped credentials for extraction workloads.""" diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py index f04a17760..8642c89f5 100644 --- a/src/api/extraction/infrastructure/workload_runtime_factory.py +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -73,6 +73,9 @@ def create_sticky_session_runtime_manager( vertex_region=resolved.vertex_region, vertex_enabled=resolved.vertex_enabled(), gcloud_config_mount=resolved.gcloud_config_mount, + gcloud_config_container_path=resolved.gcloud_config_container_path, + container_run_uid=resolved.container_run_uid, + container_run_gid=resolved.container_run_gid, ) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index da2541971..7c1ae8f34 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -27,7 +27,13 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): container_network: str | None = Field(default=None) sticky_image: str = Field(default="kartograph-agent-runtime:dev") worker_image: str = Field(default="docker.io/library/busybox:1.36") - sticky_command: tuple[str, ...] = Field(default=("python", "-m", "kartograph_agent_runtime")) + sticky_command: tuple[str, ...] = Field( + default=(), + description=( + "Optional container entrypoint override. Empty uses the image CMD " + "(kartograph-agent-runtime invokes the venv interpreter)." + ), + ) worker_command: tuple[str, ...] = Field(default=("sleep", "3600")) sticky_service_port: int = Field(default=8787, ge=1024, le=65535) container_skills_mount: str = Field(default="/app/skills") @@ -37,9 +43,13 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): skills_dir: str = Field(default="/app/skills") api_base_url: str = Field(default="http://api:8000") sticky_health_timeout_seconds: float = Field(default=90.0, ge=5.0, le=600.0) + sticky_turn_timeout_seconds: float = Field(default=180.0, ge=30.0, le=900.0) vertex_project_id: str = Field(default="") vertex_region: str = Field(default="us-east5") gcloud_config_mount: str | None = Field(default=None) + gcloud_config_container_path: str = Field(default="/gcloud/config") + container_run_uid: int | None = Field(default=None) + container_run_gid: int | None = Field(default=None) def vertex_enabled(self) -> bool: return vertex_enabled_from_env() @@ -63,6 +73,26 @@ def _apply_vertex_env_aliases(self) -> "ExtractionWorkloadRuntimeSettings": gcloud = os.getenv("KARTOGRAPH_GCLOUD_CONFIG_MOUNT", "").strip() if gcloud: object.__setattr__(self, "gcloud_config_mount", gcloud) + if self.container_run_uid is None: + for key in ( + "KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_UID", + "HOST_UID", + "UID", + ): + raw = os.getenv(key, "").strip() + if raw.isdigit(): + object.__setattr__(self, "container_run_uid", int(raw)) + break + if self.container_run_gid is None: + for key in ( + "KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID", + "HOST_GID", + "GID", + ): + raw = os.getenv(key, "").strip() + if raw.isdigit(): + object.__setattr__(self, "container_run_gid", int(raw)) + break return self @field_validator("sticky_command", "worker_command", mode="before") diff --git a/src/api/extraction/ports/runtime.py b/src/api/extraction/ports/runtime.py index b24ccae88..5a46b12e8 100644 --- a/src/api/extraction/ports/runtime.py +++ b/src/api/extraction/ports/runtime.py @@ -103,6 +103,30 @@ def cleanup_expired(self, *, now: datetime) -> list[str]: """Terminate and remove expired sticky runtimes; return container IDs.""" ... + def try_resolve_active_lease( + self, + *, + session_id: str, + user_id: str = "", + knowledge_graph_id: str = "", + mode: str = "", + container_id: str | None = None, + ) -> StickySessionRuntimeLease | None: + """Return an active lease for the session, adopting a running container if needed.""" + ... + + def is_runtime_active( + self, + *, + session_id: str, + container_id: str | None = None, + user_id: str = "", + knowledge_graph_id: str = "", + mode: str = "", + ) -> bool: + """Return True when the sticky runtime for the session is running.""" + ... + class IEphemeralExtractionWorkerLauncher(Protocol): """Launches short-lived extraction workers with scoped credentials.""" diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index fbd922010..4e6dba76f 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -12,6 +12,7 @@ from extraction.application.chat_turn_service import ExtractionChatTurnService from extraction.dependencies import ( get_extraction_agent_session_service, + get_extraction_agent_session_service_with_runtime, get_extraction_chat_turn_service, ) from extraction.domain.value_objects import ExtractionSessionMode @@ -146,7 +147,8 @@ async def clear_chat( mode: ExtractionSessionMode, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[ - ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) + ExtractionAgentSessionService, + Depends(get_extraction_agent_session_service_with_runtime), ], authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], ) -> ExtractionSessionResponse: diff --git a/src/api/shared_kernel/container_runtime/cli_runtime.py b/src/api/shared_kernel/container_runtime/cli_runtime.py index 29ee2e817..865ae7d15 100644 --- a/src/api/shared_kernel/container_runtime/cli_runtime.py +++ b/src/api/shared_kernel/container_runtime/cli_runtime.py @@ -36,6 +36,8 @@ def run(self, spec: ContainerRunSpec) -> ContainerRunResult: command.extend(["--volume", bind]) if spec.network is not None: command.extend(["--network", spec.network]) + if spec.user is not None: + command.extend(["--user", spec.user]) command.append(spec.image) if spec.command: command.extend(spec.command) @@ -76,6 +78,23 @@ def is_running(self, container_id: str) -> bool: ) return result.stdout.strip().lower() == "true" + def container_id_for_name(self, name: str) -> str | None: + """Return the running container ID for a fixed container name, if any.""" + result = subprocess.run( + [self._binary, "inspect", "-f", "{{.Id}}", name], + capture_output=True, + text=True, + check=False, + ) + if result.returncode != 0: + return None + container_id = result.stdout.strip() + if not container_id: + return None + if not self.is_running(container_id): + return None + return container_id + def _execute(self, command: list[str]) -> str: result = subprocess.run( command, diff --git a/src/api/shared_kernel/container_runtime/ports.py b/src/api/shared_kernel/container_runtime/ports.py index 3824eb4bb..97a464806 100644 --- a/src/api/shared_kernel/container_runtime/ports.py +++ b/src/api/shared_kernel/container_runtime/ports.py @@ -23,6 +23,7 @@ class ContainerRunSpec: network: str | None = None detach: bool = True remove_on_exit: bool = False + user: str | None = None @dataclass(frozen=True) @@ -51,3 +52,7 @@ def remove(self, container_id: str, *, force: bool = False) -> None: def is_running(self, container_id: str) -> bool: """Return True when the container exists and is running.""" ... + + def container_id_for_name(self, name: str) -> str | None: + """Return the running container ID for a fixed container name, if any.""" + ... diff --git a/src/api/tests/unit/extraction/application/test_chat_turn_service.py b/src/api/tests/unit/extraction/application/test_chat_turn_service.py index e990fe1c7..77f71c573 100644 --- a/src/api/tests/unit/extraction/application/test_chat_turn_service.py +++ b/src/api/tests/unit/extraction/application/test_chat_turn_service.py @@ -187,3 +187,99 @@ async def test_stream_runtime_warmup_marks_memory_backend_ready() -> None: assert done["type"] == "done" assert done["ok"] is True assert done.get("ready") is True + + +class _FailingChatAgent: + async def stream_turn(self, **kwargs): + yield { + "type": "done", + "ok": False, + "error": {"code": "MODEL_ERROR", "message": "Vertex request failed"}, + } + + +class _IncompleteChatAgent: + async def stream_turn(self, **kwargs): + yield {"type": "thinking", "recent": ["Working…"]} + + +@pytest.mark.asyncio +async def test_stream_chat_turn_emits_error_when_agent_stream_incomplete() -> None: + repo = _InMemoryAgentSessionRepository() + sticky = InMemoryStickySessionRuntimeManager() + session_service = ExtractionAgentSessionService(repository=repo) + runtime_service = StickySessionRuntimeService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader( + IngestionReadinessSnapshot(1, 1) + ), + sticky_runtime_manager=sticky, + bootstrap_builder=_StaticBootstrapBuilder(), + health_checker=_InstantHealthChecker(), + runtime_backend="memory", + sticky_health_timeout_seconds=5.0, + ) + service = ExtractionChatTurnService( + session_service=session_service, + runtime_service=runtime_service, + chat_agent=_IncompleteChatAgent(), + ) + + events = [ + event + async for event in service.stream_chat_turn( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + message="Hello!", + ) + ] + + done = events[-1] + assert done["type"] == "done" + assert done["ok"] is False + assert done["error"]["code"] == "AGENT_STREAM_INCOMPLETE" + + +@pytest.mark.asyncio +async def test_stream_chat_turn_persists_user_message_when_agent_fails() -> None: + repo = _InMemoryAgentSessionRepository() + sticky = InMemoryStickySessionRuntimeManager() + session_service = ExtractionAgentSessionService(repository=repo) + runtime_service = StickySessionRuntimeService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader( + IngestionReadinessSnapshot(1, 1) + ), + sticky_runtime_manager=sticky, + bootstrap_builder=_StaticBootstrapBuilder(), + health_checker=_InstantHealthChecker(), + runtime_backend="memory", + sticky_health_timeout_seconds=5.0, + ) + service = ExtractionChatTurnService( + session_service=session_service, + runtime_service=runtime_service, + chat_agent=_FailingChatAgent(), + ) + + events = [ + event + async for event in service.stream_chat_turn( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + message="Hello!", + ) + ] + + assert events[-1]["ok"] is False + active = await repo.find_active_by_scope("user-1", "kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP) + assert active is not None + assert active.message_history[-1] == {"role": "user", "content": "Hello!"} diff --git a/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py new file mode 100644 index 000000000..337a5919c --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py @@ -0,0 +1,217 @@ +"""Unit tests for StickySessionRuntimeService.""" + +from __future__ import annotations + +import pytest + +from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.application.sticky_session_runtime_service import StickySessionRuntimeService +from extraction.domain.value_objects import ( + ExtractionSessionMode, + GraphManagementUiMode, + IngestionReadinessSnapshot, +) +from extraction.infrastructure.workload_runtime import InMemoryStickySessionRuntimeManager +from shared_kernel.container_runtime.ports import ContainerRuntimeError + + +class _InMemoryAgentSessionRepository: + def __init__(self) -> None: + self._sessions = {} + + async def save(self, session) -> None: + from dataclasses import replace + + self._sessions[session.id] = replace(session) + + async def get_by_id(self, session_id: str): + session = self._sessions.get(session_id) + if session is None: + return None + from dataclasses import replace + + return replace(session) + + async def find_active_by_scope(self, user_id: str, knowledge_graph_id: str, mode): + for session in self._sessions.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.mode == mode + and session.archived_at is None + ): + from dataclasses import replace + + return replace(session) + return None + + async def list_by_scope(self, user_id: str, knowledge_graph_id: str, mode=None): + return [] + + +class _StaticSkillResolutionService: + async def resolve_for_graph_management_turn(self, **kwargs): + return type( + "_Resolved", + (), + { + "system_prompt": "system", + "prompt_hierarchy": ("platform",), + "guardrails": ("scope",), + "skills": {}, + }, + )() + + +class _StaticIngestionReadinessReader: + async def read_for_knowledge_graph(self, *, knowledge_graph_id: str): + return IngestionReadinessSnapshot(0, 0) + + +class _StaticBootstrapBuilder: + async def build(self, **kwargs): + return None + + +class _InstantHealthChecker: + async def wait_until_healthy(self, **kwargs): + return + yield # pragma: no cover + + +class _FailingStickyRuntimeManager(InMemoryStickySessionRuntimeManager): + def get_or_start_runtime(self, **kwargs): + raise ContainerRuntimeError("docker run failed: image not found") + + +@pytest.mark.asyncio +async def test_stream_runtime_warmup_surfaces_container_start_failure() -> None: + repo = _InMemoryAgentSessionRepository() + session_service = ExtractionAgentSessionService(repository=repo) + service = StickySessionRuntimeService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader(), + sticky_runtime_manager=_FailingStickyRuntimeManager(), + bootstrap_builder=_StaticBootstrapBuilder(), + health_checker=_InstantHealthChecker(), + runtime_backend="container", + sticky_health_timeout_seconds=5.0, + ) + + events = [ + event + async for event in service.stream_runtime_warmup( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + ) + ] + + done = events[-1] + assert done["type"] == "done" + assert done["ok"] is False + assert done["error"]["code"] == "RUNTIME_START_FAILED" + assert "image not found" in done["error"]["message"] + + +class _OnceInactiveStickyRuntimeManager(InMemoryStickySessionRuntimeManager): + def __init__(self) -> None: + super().__init__() + self._checked = False + + def try_resolve_active_lease(self, **kwargs): + if not self._checked: + self._checked = True + return None + return super().try_resolve_active_lease(**kwargs) + + +@pytest.mark.asyncio +async def test_ensure_runtime_for_chat_reprepares_when_persisted_runtime_is_inactive() -> None: + repo = _InMemoryAgentSessionRepository() + session_service = ExtractionAgentSessionService(repository=repo) + sticky = _OnceInactiveStickyRuntimeManager() + service = StickySessionRuntimeService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader(), + sticky_runtime_manager=sticky, + bootstrap_builder=_StaticBootstrapBuilder(), + health_checker=_InstantHealthChecker(), + runtime_backend="memory", + sticky_health_timeout_seconds=5.0, + ) + session = await session_service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + session.runtime_context["sticky_runtime"] = { + "container_id": "dead-container", + "status": "active", + "runtime_base_url": "memory://sticky-runtime", + "phase": "ready", + } + await session_service.save_session(session) + + events = [ + event + async for event in service.ensure_runtime_for_chat( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + session=session, + ) + ] + + assert any(event.get("type") == "ready" for event in events) + assert session.runtime_context["sticky_runtime"]["container_id"] != "dead-container" + assert sticky.try_resolve_active_lease(session_id=session.id) is not None + + +@pytest.mark.asyncio +async def test_ensure_runtime_for_chat_reuses_running_container_without_reprepare() -> None: + repo = _InMemoryAgentSessionRepository() + session_service = ExtractionAgentSessionService(repository=repo) + sticky = InMemoryStickySessionRuntimeManager() + service = StickySessionRuntimeService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader(), + sticky_runtime_manager=sticky, + bootstrap_builder=_StaticBootstrapBuilder(), + health_checker=_InstantHealthChecker(), + runtime_backend="memory", + sticky_health_timeout_seconds=5.0, + ) + session = await session_service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + sticky.get_or_start_runtime( + session_id=session.id, + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP.value, + ) + + events = [ + event + async for event in service.ensure_runtime_for_chat( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + session=session, + ) + ] + + assert events == [] + assert session.runtime_context["sticky_runtime"]["phase"] == "ready" diff --git a/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py b/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py index 1947e8bd1..16c761822 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py +++ b/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py @@ -20,6 +20,7 @@ class TestContainerStickySessionRuntimeManager: def test_reuses_running_container_for_active_session(self) -> None: runtime = MagicMock() runtime.is_running.return_value = True + runtime.container_id_for_name.return_value = None runtime.run.return_value = ContainerRunResult( container_id="container-1", name="kartograph-sticky-session-1", @@ -47,9 +48,33 @@ def test_reuses_running_container_for_active_session(self) -> None: assert first.container_id == second.container_id == "container-1" runtime.run.assert_called_once() + def test_adopts_running_container_after_process_restart(self) -> None: + runtime = MagicMock() + runtime.is_running.return_value = True + runtime.container_id_for_name.return_value = "container-existing" + manager = ContainerStickySessionRuntimeManager( + container_runtime=runtime, + sticky_image="busybox:1.36", + sticky_command=(), + session_ttl=timedelta(minutes=30), + container_network="kartograph_kartograph", + ) + + lease = manager.try_resolve_active_lease( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + + assert lease is not None + assert lease.container_id == "container-existing" + runtime.run.assert_not_called() + def test_reset_stops_existing_container_and_starts_new_one(self) -> None: runtime = MagicMock() runtime.is_running.return_value = True + runtime.container_id_for_name.return_value = None runtime.run.side_effect = [ ContainerRunResult(container_id="container-1", name="name-1"), ContainerRunResult(container_id="container-2", name="name-2"), @@ -81,6 +106,7 @@ def test_reset_stops_existing_container_and_starts_new_one(self) -> None: def test_cleanup_expired_terminates_and_returns_container_ids(self) -> None: runtime = MagicMock() runtime.is_running.return_value = True + runtime.container_id_for_name.return_value = None runtime.run.return_value = ContainerRunResult( container_id="container-1", name="name-1", diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py index 8a297b999..75a0c45fb 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py @@ -15,13 +15,19 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: runtime = MagicMock() + runtime.is_running.return_value = False + runtime.container_id_for_name.return_value = None runtime.run.return_value = ContainerRunResult(container_id="container-1", name="name-1") manager = ContainerStickySessionRuntimeManager( container_runtime=runtime, sticky_image="kartograph-agent-runtime:dev", - sticky_command=("python", "-m", "kartograph_agent_runtime"), + sticky_command=(), session_ttl=timedelta(minutes=30), container_network="kartograph_kartograph", + gcloud_config_mount="/host/.config/gcloud", + gcloud_config_container_path="/gcloud/config", + container_run_uid=1000, + container_run_gid=1000, ) issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) credentials = issuer.issue_for_sticky_session(tenant_id="tenant-1", knowledge_graph_id="kg-1") @@ -42,8 +48,16 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: ) spec: ContainerRunSpec = runtime.run.call_args.args[0] + assert spec.command == () assert spec.network == "kartograph_kartograph" assert spec.env["KARTOGRAPH_WORKLOAD_TOKEN"] == credentials.token assert "/tmp/skills:/app/skills:ro" in spec.binds assert "/tmp/session-work:/workspace:ro" in spec.binds + assert "/host/.config/gcloud:/gcloud/config:ro" in spec.binds + assert spec.env["CLOUDSDK_CONFIG"] == "/gcloud/config" + assert spec.env["GOOGLE_APPLICATION_CREDENTIALS"] == ( + "/gcloud/config/application_default_credentials.json" + ) + assert spec.env["HOME"] == "/tmp" + assert spec.user == "1000:1000" assert lease.runtime_base_url.startswith("http://kartograph-sticky-") diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py index a5902bca3..f03834f1e 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py @@ -8,6 +8,11 @@ class TestExtractionWorkloadRuntimeSettings: + def test_default_sticky_command_uses_image_entrypoint(self) -> None: + settings = ExtractionWorkloadRuntimeSettings() + + assert settings.sticky_command == () + def test_parses_command_strings_into_tuple(self) -> None: settings = ExtractionWorkloadRuntimeSettings( sticky_command="sleep 3600", diff --git a/src/api/tests/unit/extraction/presentation/test_routes.py b/src/api/tests/unit/extraction/presentation/test_routes.py index f82a6f5c1..5b6d479a6 100644 --- a/src/api/tests/unit/extraction/presentation/test_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_routes.py @@ -116,7 +116,10 @@ async def read_relationships( @pytest.fixture def extraction_client(): - from extraction.dependencies import get_extraction_agent_session_service + from extraction.dependencies import ( + get_extraction_agent_session_service, + get_extraction_agent_session_service_with_runtime, + ) from extraction.presentation import router from iam.dependencies.user import get_current_user from infrastructure.authorization_dependencies import get_spicedb_client @@ -125,6 +128,9 @@ def extraction_client(): repo = _InMemoryAgentSessionRepository() service = ExtractionAgentSessionService(repository=repo) app.dependency_overrides[get_extraction_agent_session_service] = lambda: service + app.dependency_overrides[get_extraction_agent_session_service_with_runtime] = ( + lambda: service + ) app.dependency_overrides[get_current_user] = lambda: CurrentUser( user_id=UserId(value="user-123"), username="alice", diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index 2fb7482ab..dbd43ee16 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -31,6 +31,7 @@ const props = withDefaults(defineProps<{ loading?: boolean clearing?: boolean sending?: boolean + preparingRuntime?: boolean draftMessage?: string activityLines?: string[] inputPlaceholder?: string @@ -46,6 +47,7 @@ const props = withDefaults(defineProps<{ loading: false, clearing: false, sending: false, + preparingRuntime: false, draftMessage: '', activityLines: () => [], inputPlaceholder: 'Describe what you want to do in this graph management session…', @@ -79,6 +81,16 @@ const chatInputDisabled = computed( () => props.loading || props.clearing || props.sending || props.inputDisabled || props.forbidden, ) +const showRuntimeActivity = computed( + () => props.preparingRuntime || props.sending, +) + +const runtimeActivityTitle = computed(() => + props.preparingRuntime && !props.sending + ? 'Starting assistant container…' + : 'Thinking...', +) + const thinkingDisplaySlots = computed(() => { const src = props.activityLines.filter(Boolean) if (src.length === 0) return [''] @@ -296,7 +308,7 @@ onMounted(() => { </div> <div - v-if="sending" + v-if="showRuntimeActivity" class="flex gap-3 text-muted-foreground" aria-live="polite" aria-busy="true" @@ -309,7 +321,7 @@ onMounted(() => { > <div class="mb-2 flex items-center gap-2 text-foreground"> <Loader2 class="size-4 shrink-0 animate-spin text-primary" aria-hidden="true" /> - <span class="font-medium tracking-tight">Thinking...</span> + <span class="font-medium tracking-tight">{{ runtimeActivityTitle }}</span> </div> <ol class="m-0 list-none space-y-2 border-l-2 border-primary/25 pl-3"> <li @@ -335,7 +347,7 @@ onMounted(() => { </div> <p - v-if="messageHistory.length === 0 && !sending" + v-if="messageHistory.length === 0 && !showRuntimeActivity" class="py-8 text-center text-sm text-muted-foreground" > No messages yet. Send a prompt or use validate/transition actions to drive session activity. diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index e75957954..87701447d 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -303,9 +303,28 @@ const graphManagementInputPlaceholder = computed( () => GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS[graphManagementMode.value], ) +const conversationSessionForPanel = computed<ExtractionSessionResponse | null>(() => { + if (!extractionSession.value) return null + if (!runtimeReady.value) { + return { + ...extractionSession.value, + message_history: [], + } + } + return extractionSession.value +}) + const sessionStatusLabel = computed(() => { - if (runtimeWarming.value) return 'Starting assistant' - if (!runtimeReady.value && runtimeWarmupError.value) return 'Runtime unavailable' + const latestActivity = sessionActivityLines.value.filter(Boolean).at(-1) + if (runtimeWarming.value) { + return latestActivity ?? 'Starting assistant' + } + if (!runtimeReady.value && latestActivity) { + return latestActivity + } + if (!runtimeReady.value && runtimeWarmupError.value) { + return runtimeWarmupError.value + } if (sessionLoading.value) return 'Loading session' if (clearingChat.value) return 'Resetting chat' if (extractionSession.value?.id) { @@ -314,6 +333,16 @@ const sessionStatusLabel = computed(() => { return 'No active session' }) +const showRuntimeWarmupProgress = computed( + () => + runtimeWarming.value + || (!runtimeReady.value && sessionActivityLines.value.some((line) => line.trim().length > 0)), +) + +const conversationPanelLoading = computed( + () => sessionLoading.value && !showRuntimeWarmupProgress.value, +) + const chatInputDisabled = computed( () => workspaceForbidden.value || runtimeWarming.value || !runtimeReady.value, ) @@ -787,6 +816,11 @@ async function loadMutationLogEntryPreviews(offset = 0) { } } +async function refreshGraphManagementSession() { + await loadExtractionSession() + await warmupAssistantRuntime() +} + async function loadExtractionSession() { if (!kgId.value || activeStep.value !== 'graph-management') return sessionLoading.value = true @@ -796,6 +830,15 @@ async function loadExtractionSession() { `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/active`, ) syncActivityLinesFromSession() + const stickyPhase = extractionSession.value?.runtime_context?.sticky_runtime + if ( + stickyPhase + && typeof stickyPhase === 'object' + && (stickyPhase as { phase?: string }).phase === 'ready' + && !runtimeWarming.value + ) { + runtimeReady.value = true + } sessionForbidden.value = false sessionForbiddenReason.value = null } catch (err) { @@ -891,6 +934,7 @@ function onMutationRunKeydown(event: KeyboardEvent, runId: string) { } function syncActivityLinesFromSession() { + if (runtimeWarming.value || showRuntimeWarmupProgress.value) return const context = extractionSession.value?.runtime_context ?? {} const candidate = context.activity_lines ?? context.ndjson_activity_lines ?? context.thinking_lines if (Array.isArray(candidate)) { @@ -936,7 +980,10 @@ async function warmupAssistantRuntime() { } if (event.type === 'done') { if (event.ok !== true) { - throw new Error(event.error?.message ?? 'Runtime warmup failed') + throw new Error( + event.error?.message + ?? 'Runtime warmup failed before the assistant container was ready.', + ) } runtimeReady.value = event.ready === true || event.wait === true } @@ -945,6 +992,11 @@ async function warmupAssistantRuntime() { } catch (err) { runtimeWarmupError.value = extractErrorMessage(err) runtimeReady.value = false + const lines = sessionActivityLines.value.filter(Boolean) + sessionActivityLines.value = [ + ...lines, + `Runtime startup failed: ${runtimeWarmupError.value}`, + ] toast.error('Failed to start Graph Management Assistant', { description: runtimeWarmupError.value, }) @@ -1127,7 +1179,6 @@ watch( if (activeStep.value === 'graph-management') { syncGraphManagementState() await loadExtractionSession() - await warmupAssistantRuntime() } }, ) @@ -1137,9 +1188,11 @@ watch( async () => { if (activeStep.value === 'graph-management') { syncGraphManagementState() - await loadExtractionSession() - loadSessionHistory() - loadGraphManagementDataSources() + await Promise.all([ + loadExtractionSession(), + loadSessionHistory(), + loadGraphManagementDataSources(), + ]) await warmupAssistantRuntime() } else { runtimeWarmupGeneration += 1 @@ -1747,16 +1800,17 @@ watch(selectedOpsDataSourceId, () => { :description="graphManagementChatDescription" :input-placeholder="graphManagementInputPlaceholder" :session-status-label="sessionStatusLabel" - :session="extractionSession" - :loading="sessionLoading" + :session="conversationSessionForPanel" + :loading="conversationPanelLoading" :clearing="clearingChat" :sending="sendingChat" + :preparing-runtime="runtimeWarming" :activity-lines="sessionActivityLines" :forbidden="sessionForbidden" :forbidden-reason="sessionForbiddenReason" :input-disabled="chatInputDisabled" :input-disabled-reason="chatInputDisabledReason" - @refresh="loadExtractionSession" + @refresh="refreshGraphManagementSession" @clear-chat="clearChat" @send-message="sendChatMessage" /> diff --git a/src/dev-ui/app/tests/kg-extraction-chat.test.ts b/src/dev-ui/app/tests/kg-extraction-chat.test.ts index ce4cbd96c..e1155a39f 100644 --- a/src/dev-ui/app/tests/kg-extraction-chat.test.ts +++ b/src/dev-ui/app/tests/kg-extraction-chat.test.ts @@ -86,4 +86,40 @@ describe('kgExtractionChat', () => { globalThis.fetch = originalFetch } }) + + it('throws when the NDJSON stream ends without a terminal done event', async () => { + const originalFetch = globalThis.fetch + globalThis.fetch = (async () => { + const body = new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode('{"type":"thinking","recent":["Still working…"]}\n'), + ) + controller.close() + }, + }) + return new Response(body, { status: 200, headers: { 'Content-Type': 'application/x-ndjson' } }) + }) as typeof fetch + + try { + const iterator = streamExtractionChatTurn({ + apiBaseUrl: 'http://api.test', + accessToken: 'token', + tenantId: 'tenant-1', + kgId: 'kg-1', + sessionMode: 'schema_bootstrap', + uiMode: 'initial-schema-design', + message: 'Hello', + }) + + await expect(async () => { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _event of iterator) { + // drain stream + } + }).rejects.toThrow('stream ended before completion') + } finally { + globalThis.fetch = originalFetch + } + }) }) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 0a17e3b08..c4c935fc8 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -197,7 +197,7 @@ describe('KG-MANAGE-002 - workspace hub tile set', () => { expect(manageWorkspaceVue).toContain('Relationship Types') expect(manageWorkspaceVue).toContain('Mutation Runs') expect(manageWorkspaceHubTs).toContain('Data sources') - expect(manageWorkspaceHubTs).toContain('Design') + expect(manageWorkspaceHubTs).toContain('Graph Management') expect(manageWorkspaceHubTs).toContain('Mutation logs') expect(manageWorkspaceHubTs).toContain('Maintain') }) @@ -555,6 +555,8 @@ describe('KG-MANAGE-017 - chat input keyboard contract', () => { expect(manageWorkspaceVue).toContain('streamExtractionChatTurn') expect(manageWorkspaceVue).toContain('streamRuntimeWarmup') expect(manageWorkspaceVue).toContain('warmupAssistantRuntime') + expect(manageWorkspaceVue).toContain('preparing-runtime') + expect(manageWorkspaceVue).toContain('conversationSessionForPanel') expect(manageWorkspaceVue).toContain('@send-message="sendChatMessage"') }) }) diff --git a/src/dev-ui/app/utils/kgExtractionChat.ts b/src/dev-ui/app/utils/kgExtractionChat.ts index 847eb7b20..93912c172 100644 --- a/src/dev-ui/app/utils/kgExtractionChat.ts +++ b/src/dev-ui/app/utils/kgExtractionChat.ts @@ -57,6 +57,7 @@ async function* streamNdjsonPost( const decoder = new TextDecoder() let buffer = '' + let sawTerminalDone = false while (true) { const { done, value } = await reader.read() @@ -67,13 +68,25 @@ async function* streamNdjsonPost( for (const line of parts) { const trimmed = line.trim() if (!trimmed) continue - yield JSON.parse(trimmed) as ExtractionChatStreamEvent + const event = JSON.parse(trimmed) as ExtractionChatStreamEvent + if (event.type === 'done') { + sawTerminalDone = true + } + yield event } } const tail = buffer.trim() if (tail) { - yield JSON.parse(tail) as ExtractionChatStreamEvent + const event = JSON.parse(tail) as ExtractionChatStreamEvent + if (event.type === 'done') { + sawTerminalDone = true + } + yield event + } + + if (!sawTerminalDone) { + throw new Error('Graph Management Assistant stream ended before completion.') } } From 2f0b6f5a6b3d741aee181c826105e5a68da7fc86 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 29 May 2026 15:55:48 -0400 Subject: [PATCH 067/153] refactor(ui): rename workspace hub Design phase to Graph Management Drop the redundant branch tip column from the KG data sources table. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../knowledge-graphs/[kgId]/data-sources/index.vue | 9 +-------- src/dev-ui/app/tests/kg-data-sources-phase1.test.ts | 2 +- src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts | 9 +++++++-- src/dev-ui/app/utils/kgManageWorkspaceHub.ts | 12 ++++++------ 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue index f28fe0d80..728feaa65 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue @@ -669,7 +669,7 @@ watch(tenantVersion, async () => { </div> <div v-else class="overflow-x-auto rounded-md border"> - <table class="w-full min-w-[1120px] text-sm"> + <table class="w-full min-w-[1000px] text-sm"> <thead> <tr class="border-b bg-muted/50 text-left"> <th class="px-3 py-2 font-medium">Source</th> @@ -679,7 +679,6 @@ watch(tenantVersion, async () => { <th class="px-3 py-2 font-medium">Last extraction baseline</th> <th class="px-3 py-2 font-medium">Ingested at</th> <th class="px-3 py-2 font-medium">Newest unpulled</th> - <th class="px-3 py-2 font-medium">Branch tip</th> <th class="px-3 py-2 font-medium">Actions</th> </tr> </thead> @@ -779,12 +778,6 @@ watch(tenantVersion, async () => { }} </div> </td> - <td class="px-3 py-2 font-mono text-xs text-muted-foreground"> - <span :title="resolveBranchTipCommit(ds) || ''"> - {{ shortCommitHash(resolveBranchTipCommit(ds)) }} - </span> - <div class="mt-0.5 text-[10px] text-muted-foreground">remote tip</div> - </td> <td class="px-3 py-2"> <div class="flex flex-wrap gap-1"> <Button size="sm" variant="ghost" class="h-7 px-2 text-[10px]" @click="openEditConfig(ds)"> diff --git a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts index 35768bbf0..a290ed065 100644 --- a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts +++ b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts @@ -62,7 +62,7 @@ describe('KG data sources phase1 layout', () => { expect(phase1Vue).toContain('Newest unpulled') expect(phase1Vue).toContain('Last extraction baseline') expect(phase1Vue).toContain('Ingested at') - expect(phase1Vue).toContain('Branch tip') + expect(phase1Vue).not.toContain('Branch tip') expect(phase1Vue).toContain('resolveNewestUnpulledCommit') }) }) diff --git a/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts b/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts index 858bafad6..7bf307675 100644 --- a/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts +++ b/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts @@ -49,6 +49,11 @@ describe('kgManageWorkspaceHub', () => { expect(tiles.find((tile) => tile.key === 'maintain')?.enabled).toBe(false) }) + it('labels the graph-management hub tile as Graph Management', () => { + const tiles = buildWorkspaceHubTiles(baseInput) + expect(tiles.find((tile) => tile.key === 'graph-management')?.title).toBe('Graph Management') + }) + it('marks sources phase complete when all sources are prepared', () => { const tiles = buildWorkspaceHubTiles({ ...baseInput, @@ -62,7 +67,7 @@ describe('kgManageWorkspaceHub', () => { ...baseInput, dataSourceCount: 2, preparedSourceCount: 2, - }).label).toBe('Design') + }).label).toBe('Graph Management') }) it('builds a primary next-step CTA while sources phase is incomplete', () => { @@ -84,6 +89,6 @@ describe('kgManageWorkspaceHub', () => { ...baseInput, dataSourceCount: 1, preparedSourceCount: 1, - })).toContain('Design') + })).toContain('Graph Management') }) }) diff --git a/src/dev-ui/app/utils/kgManageWorkspaceHub.ts b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts index 3566df4ed..5a696a132 100644 --- a/src/dev-ui/app/utils/kgManageWorkspaceHub.ts +++ b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts @@ -60,7 +60,7 @@ export function resolveWorkspaceHubPhaseBadge(input: WorkspaceHubOverview): Work return { label: 'Operations', variant: 'success' } } if (sourcesPhaseComplete(input)) { - return { label: 'Design', variant: 'warning' } + return { label: 'Graph Management', variant: 'warning' } } return { label: 'Data sources', variant: 'secondary' } } @@ -129,11 +129,11 @@ export function buildWorkspaceHubTiles(input: WorkspaceHubOverview): WorkspaceHu { step: 2, key: 'graph-management', - title: 'Design', + title: 'Graph Management', subtitle: designDone ? 'Schema validated · extraction operations available' : sourcesDone - ? 'Design assistant, schema bootstrap, and validation' + ? 'Graph management assistant, schema bootstrap, and validation' : 'Open anytime; prepare data sources to clear later gates', to: resolveStepDestination(input.kgId, 'graph-management'), enabled: true, @@ -167,7 +167,7 @@ export function buildWorkspaceHubTiles(input: WorkspaceHubOverview): WorkspaceHu : 'Incremental graph updates from new commits', to: resolveStepDestination(input.kgId, 'maintain'), enabled: designDone, - lockedReason: designDone ? null : 'Complete design validation before maintenance.', + lockedReason: designDone ? null : 'Complete graph management validation before maintenance.', highlight: highlightKey === 'maintain', tone: toneFor(4, maintainCard.status === 'ready' && input.maintenanceReadyCount === 0, designDone, maintainCard.status), linkLabel: linkLabelFor(maintainCard.actionLabel, maintainCard.status === 'ready' && input.maintenanceReadyCount === 0), @@ -254,10 +254,10 @@ export function workspaceHubStepBadgeClass(item: { export function workspaceHubDescription(input: WorkspaceHubOverview): string { if (!sourcesPhaseComplete(input)) { - return 'Finish ingestion under Data sources, then continue through Design. Green tiles mark completed gates; the highlighted tile is your current focus.' + return 'Finish ingestion under Data sources, then continue through Graph Management. Green tiles mark completed gates; the highlighted tile is your current focus.' } if (!designPhaseComplete(input)) { - return 'Use Design for the assistant and schema bootstrap. Green tiles use Revisit; the highlighted tile is your suggested next step.' + return 'Use Graph Management for the assistant and schema bootstrap. Green tiles use Revisit; the highlighted tile is your suggested next step.' } return 'Continue with mutation logs or maintenance, or Revisit any completed step below.' } From 9f7a04e7c804a5547eca2d0a3795de5a487bd24a Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 29 May 2026 15:56:59 -0400 Subject: [PATCH 068/153] repair env/api.env --- env/api.env | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/env/api.env b/env/api.env index 868ea6bf0..6cdd3da20 100644 --- a/env/api.env +++ b/env/api.env @@ -10,10 +10,7 @@ SPICEDB_ENDPOINT="spicedb:50051" SPICEDB_PRESHARED_KEY="changeme" KARTOGRAPH_CORS_ORIGINS=["http://localhost:3000"] KARTOGRAPH_IAM_BOOTSTRAP_ADMIN_USERNAMES='["alice"]' -KARTOGRAPH_IAM_SINGLE_TENANT_MODE=true -# Generate with uv run python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())" +KARTOGRAPH_IAM_SINGLE_TENANT_MODE=false KARTOGRAPH_MGMT_ENCRYPTION_KEY="vwN4rUcH-KL-UyJsL8hc6apftRUTovwec6L2M5uF5OE=" -# Extraction runtime defaults to in-memory adapters. Set backend=container and -# mount /var/run/docker.sock (see compose.dev.yaml) for local container execution. KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND=memory KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_ENGINE=auto \ No newline at end of file From e6a9305d14dc415b550b485272795b7edea3d50a Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 29 May 2026 16:24:35 -0400 Subject: [PATCH 069/153] fix(extraction): materialize JobPackages into sticky assistant workspace Load prepared archives even in schema-design mode, refresh the workspace on chat reuse, point Claude SDK at /workspace, and remove sibling sticky and worker containers during make down. Co-authored-by: Cursor <cursoragent@cursor.com> --- Makefile | 3 + .../kartograph_agent_runtime/executor.py | 60 ++++++++++++++++++- src/agent-runtime/tests/test_executor.py | 33 +++++++++- .../sticky_session_materialization.py | 25 ++++++++ .../sticky_session_runtime_service.py | 20 ++++++- .../sticky_session_workdir_materializer.py | 6 +- .../test_sticky_session_materialization.py | 40 +++++++++++++ .../test_sticky_session_runtime_service.py | 29 ++++++++- ...est_sticky_session_workdir_materializer.py | 14 +++++ 9 files changed, 221 insertions(+), 9 deletions(-) create mode 100644 src/api/extraction/application/sticky_session_materialization.py create mode 100644 src/api/tests/unit/extraction/application/test_sticky_session_materialization.py diff --git a/Makefile b/Makefile index 4ff4f7b86..62679cad5 100755 --- a/Makefile +++ b/Makefile @@ -36,6 +36,9 @@ dev: certs .PHONY: down down: docker compose -f compose.yaml -f compose.dev.yaml down + @echo "Stopping Graph Management sticky and worker containers..." + -@docker ps -aq --filter name=kartograph-sticky- | xargs -r docker rm -f + -@docker ps -aq --filter name=kartograph-worker- | xargs -r docker rm -f .PHONY: run diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index b4b54eb40..5afea4bd7 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -14,16 +14,64 @@ _DEFAULT_TURN_TIMEOUT_SECONDS = 180.0 -def _build_system_prompt(agent_configuration: dict[str, Any]) -> str: +def _build_system_prompt( + agent_configuration: dict[str, Any], + *, + workspace_appendix: str = "", +) -> str: system_prompt = str(agent_configuration.get("system_prompt") or "").strip() guardrails = agent_configuration.get("guardrails") or [] skills = agent_configuration.get("skills") or {} skill_lines = "\n".join(f"- {key}: {value}" for key, value in sorted(skills.items())) guardrail_lines = "\n".join(f"- {item}" for item in guardrails if str(item).strip()) - sections = [section for section in (system_prompt, guardrail_lines, skill_lines) if section] + sections = [ + section + for section in (system_prompt, guardrail_lines, skill_lines, workspace_appendix.strip()) + if section + ] return "\n\n".join(sections) or "You are the Graph Management Assistant." +def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: + from pathlib import Path + + root = Path(settings.workspace_dir) + repo_root = root / "repository-files" + if not repo_root.is_dir(): + return ( + f"## Session workspace\n" + f"Workspace mount: `{settings.workspace_dir}`\n" + "No prepared JobPackage repository files are materialized yet." + ) + + package_dirs = sorted(path for path in repo_root.iterdir() if path.is_dir()) + if not package_dirs: + return ( + f"## Session workspace\n" + f"Workspace mount: `{settings.workspace_dir}`\n" + "Prepared data sources exist, but repository files have not been extracted yet. " + "If the user asks about repository content, explain that ingestion context may " + "need to be re-prepared under Data sources." + ) + + lines = [ + "## Session workspace", + f"Workspace mount: `{settings.workspace_dir}`", + ( + "Prepared repository files live under " + "`repository-files/<job_package_id>/` relative to the workspace mount. " + "Use Read, Grep, and Glob tools against those paths." + ), + ] + for package_dir in package_dirs[:8]: + files = sorted(path for path in package_dir.rglob("*") if path.is_file()) + lines.append(f"- `{package_dir.name}`: {len(files)} file(s)") + for file_path in files[:4]: + rel = file_path.relative_to(package_dir).as_posix() + lines.append(f" - `{rel}`") + return "\n".join(lines) + + def _apply_model_env(settings: AgentRuntimeSettings) -> str: for key, value in build_claude_agent_env(settings).items(): os.environ[key] = value @@ -135,7 +183,10 @@ async def _stream_with_claude_sdk( ) -> AsyncIterator[dict[str, Any]]: from claude_agent_sdk import ClaudeAgentOptions, query - system_prompt = _build_system_prompt(agent_configuration) + system_prompt = _build_system_prompt( + agent_configuration, + workspace_appendix=_build_workspace_prompt_appendix(settings), + ) history_lines = [ f"{entry.get('role', 'unknown')}: {entry.get('content', '')}" for entry in message_history[-6:] @@ -155,12 +206,15 @@ async def _stream_with_claude_sdk( } sdk_env = _build_sdk_env(settings) + workspace_dir = settings.workspace_dir.strip() or "/workspace" options = ClaudeAgentOptions( system_prompt=system_prompt, env=sdk_env, permission_mode="bypassPermissions", max_turns=8, setting_sources=[], + cwd=workspace_dir, + add_dirs=[workspace_dir], ) reply: str | None = None diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index 0aa472fd0..865513b8c 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -2,12 +2,43 @@ from __future__ import annotations +from pathlib import Path + import pytest -from kartograph_agent_runtime.executor import stream_turn_events +from kartograph_agent_runtime.executor import ( + _build_system_prompt, + _build_workspace_prompt_appendix, + stream_turn_events, +) from kartograph_agent_runtime.settings import AgentRuntimeSettings +def test_build_workspace_prompt_appendix_lists_materialized_repository_files( + tmp_path: Path, +) -> None: + package_root = tmp_path / "repository-files" / "pkg-1" / "pkg" / "api" + package_root.mkdir(parents=True) + (package_root / "adapter_status_types_test.go").write_text("package api\n", encoding="utf-8") + + appendix = _build_workspace_prompt_appendix( + AgentRuntimeSettings(KARTOGRAPH_WORKSPACE_DIR=str(tmp_path)) + ) + + assert "repository-files/<job_package_id>/" in appendix + assert "pkg/api/adapter_status_types_test.go" in appendix + + +def test_build_system_prompt_includes_workspace_appendix() -> None: + prompt = _build_system_prompt( + {"system_prompt": "Base prompt"}, + workspace_appendix="## Session workspace\nFiles here", + ) + + assert "Base prompt" in prompt + assert "Files here" in prompt + + @pytest.mark.asyncio async def test_stream_turn_events_without_api_key_returns_done_reply( monkeypatch: pytest.MonkeyPatch, diff --git a/src/api/extraction/application/sticky_session_materialization.py b/src/api/extraction/application/sticky_session_materialization.py new file mode 100644 index 000000000..f83d7d737 --- /dev/null +++ b/src/api/extraction/application/sticky_session_materialization.py @@ -0,0 +1,25 @@ +"""Helpers for deciding when sticky sessions should load JobPackage material.""" + +from __future__ import annotations + +from extraction.application.job_package_gate import JobPackageGateDecision +from extraction.domain.value_objects import ( + IngestionReadinessSnapshot, + SessionJobPackagePhase, +) + + +def should_materialize_job_packages( + *, + readiness: IngestionReadinessSnapshot, + gate: JobPackageGateDecision, +) -> bool: + """Return whether prepared JobPackage archives should be loaded into the workspace. + + UI-mode gates control whether chat must *wait* for JobPackage readiness. + When prepared packages exist for the knowledge graph, materialize them even + in modes that do not require the gate (e.g. Initial Schema Design). + """ + if readiness.prepared_source_count > 0: + return True + return gate.phase != SessionJobPackagePhase.NOT_REQUIRED diff --git a/src/api/extraction/application/sticky_session_runtime_service.py b/src/api/extraction/application/sticky_session_runtime_service.py index f1ddb6de8..8396c1594 100644 --- a/src/api/extraction/application/sticky_session_runtime_service.py +++ b/src/api/extraction/application/sticky_session_runtime_service.py @@ -9,6 +9,7 @@ from extraction.application.agent_session_service import ExtractionAgentSessionService from extraction.application.job_package_gate import resolve_job_package_gate +from extraction.application.sticky_session_materialization import should_materialize_job_packages from extraction.application.skill_resolution_service import ExtractionSkillResolutionService from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.value_objects import ( @@ -96,6 +97,20 @@ async def ensure_runtime_for_chat( mode=mode.value, ) if lease is not None: + readiness = await self._ingestion_readiness_reader.read_for_knowledge_graph( + knowledge_graph_id=knowledge_graph_id, + ) + gate = resolve_job_package_gate(ui_mode=ui_mode, readiness=readiness) + if self._runtime_backend == "container": + await self._bootstrap_builder.build( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + session_id=session.id, + include_job_packages=should_materialize_job_packages( + readiness=readiness, + gate=gate, + ), + ) session.runtime_context["sticky_runtime"] = self._lease_context(lease, phase="ready") await self._session_service.save_session(session) return @@ -207,7 +222,10 @@ async def _stream_prepare_runtime( tenant_id=tenant_id, knowledge_graph_id=knowledge_graph_id, session_id=session.id, - include_job_packages=gate.phase != SessionJobPackagePhase.NOT_REQUIRED, + include_job_packages=should_materialize_job_packages( + readiness=readiness, + gate=gate, + ), ) yield { "type": "thinking", diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index 1fe5b5db3..0fd3f9b66 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -33,7 +33,11 @@ def prepare( ingestion_context_dir.mkdir(parents=True, exist_ok=True) repository_files_dir.mkdir(parents=True, exist_ok=True) - discovered = job_package_ids or self._discover_job_package_ids() + discovered = ( + self._discover_job_package_ids() + if job_package_ids is None + else job_package_ids + ) for package_id in discovered: archive_path = self._job_package_work_dir / JobPackageId(value=package_id).archive_name() if not archive_path.exists(): diff --git a/src/api/tests/unit/extraction/application/test_sticky_session_materialization.py b/src/api/tests/unit/extraction/application/test_sticky_session_materialization.py new file mode 100644 index 000000000..7dc65580a --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_sticky_session_materialization.py @@ -0,0 +1,40 @@ +"""Unit tests for sticky session JobPackage materialization policy.""" + +from __future__ import annotations + +from extraction.application.job_package_gate import resolve_job_package_gate +from extraction.application.sticky_session_materialization import should_materialize_job_packages +from extraction.domain.value_objects import ( + GraphManagementUiMode, + IngestionReadinessSnapshot, +) + + +def test_schema_design_materializes_when_prepared_sources_exist() -> None: + readiness = IngestionReadinessSnapshot(data_source_count=1, prepared_source_count=1) + gate = resolve_job_package_gate( + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + readiness=readiness, + ) + + assert should_materialize_job_packages(readiness=readiness, gate=gate) is True + + +def test_schema_design_skips_materialization_without_prepared_sources() -> None: + readiness = IngestionReadinessSnapshot(data_source_count=0, prepared_source_count=0) + gate = resolve_job_package_gate( + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + readiness=readiness, + ) + + assert should_materialize_job_packages(readiness=readiness, gate=gate) is False + + +def test_extraction_jobs_materializes_when_gate_ready() -> None: + readiness = IngestionReadinessSnapshot(data_source_count=2, prepared_source_count=2) + gate = resolve_job_package_gate( + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + readiness=readiness, + ) + + assert should_materialize_job_packages(readiness=readiness, gate=gate) is True diff --git a/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py index 337a5919c..46fd32977 100644 --- a/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py +++ b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py @@ -174,19 +174,34 @@ async def test_ensure_runtime_for_chat_reprepares_when_persisted_runtime_is_inac assert sticky.try_resolve_active_lease(session_id=session.id) is not None +class _RecordingBootstrapBuilder: + def __init__(self) -> None: + self.calls: list[dict[str, object]] = [] + + async def build(self, **kwargs): + self.calls.append(kwargs) + return None + + +class _PreparedIngestionReadinessReader: + async def read_for_knowledge_graph(self, *, knowledge_graph_id: str): + return IngestionReadinessSnapshot(data_source_count=1, prepared_source_count=1) + + @pytest.mark.asyncio async def test_ensure_runtime_for_chat_reuses_running_container_without_reprepare() -> None: repo = _InMemoryAgentSessionRepository() session_service = ExtractionAgentSessionService(repository=repo) sticky = InMemoryStickySessionRuntimeManager() + bootstrap = _RecordingBootstrapBuilder() service = StickySessionRuntimeService( session_service=session_service, skill_resolution_service=_StaticSkillResolutionService(), - ingestion_readiness_reader=_StaticIngestionReadinessReader(), + ingestion_readiness_reader=_PreparedIngestionReadinessReader(), sticky_runtime_manager=sticky, - bootstrap_builder=_StaticBootstrapBuilder(), + bootstrap_builder=bootstrap, health_checker=_InstantHealthChecker(), - runtime_backend="memory", + runtime_backend="container", sticky_health_timeout_seconds=5.0, ) session = await session_service.get_or_create_active_session( @@ -215,3 +230,11 @@ async def test_ensure_runtime_for_chat_reuses_running_container_without_reprepar assert events == [] assert session.runtime_context["sticky_runtime"]["phase"] == "ready" + assert bootstrap.calls == [ + { + "tenant_id": "tenant-1", + "knowledge_graph_id": "kg-1", + "session_id": session.id, + "include_job_packages": True, + } + ] diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index e6373bb60..b9fbf1acb 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -57,3 +57,17 @@ def test_materializer_extracts_job_package_into_session_workspace(tmp_path: Path repo_file = session_root / "repository-files" / package_id / "README.md" assert repo_file.read_text(encoding="utf-8") == "# hello\n" + + +def test_materializer_does_not_discover_archives_when_package_ids_empty(tmp_path: Path) -> None: + package_id = "01JTESTPACK0000000000000001" + _build_package(tmp_path, package_id) + materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) + + session_root = materializer.prepare( + session_id="session-2", + knowledge_graph_id="kg-1", + job_package_ids=(), + ) + + assert not any((session_root / "repository-files").iterdir()) From 4dde3a780978b1556b0de7e195c6acd30c1cc3f1 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 31 May 2026 18:42:03 -0400 Subject: [PATCH 070/153] fix(ingestion): report total branch files instead of changeset size Incremental prepares were overwriting last_prepared_file_count with the number of changed files, so the data sources table showed the wrong "Files on branch" value after subsequent prepares. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../application/services/ingestion_service.py | 1 + .../ingestion/application/value_objects.py | 1 + .../infrastructure/adapters/github.py | 24 ++++++ .../ingestion/infrastructure/event_handler.py | 2 +- src/api/ingestion/ports/adapters.py | 3 + .../application/test_ingestion_service.py | 2 + .../adapters/test_github_adapter.py | 75 +++++++++++++++++++ .../test_ingestion_event_handler.py | 3 +- .../tests/unit/management/test_data_source.py | 7 ++ 9 files changed, 116 insertions(+), 2 deletions(-) diff --git a/src/api/ingestion/application/services/ingestion_service.py b/src/api/ingestion/application/services/ingestion_service.py index 293554c78..3abfd9472 100644 --- a/src/api/ingestion/application/services/ingestion_service.py +++ b/src/api/ingestion/application/services/ingestion_service.py @@ -145,6 +145,7 @@ async def run( return IngestionRunResult( job_package_id=builder._package_id, entry_count=len(result.changeset_entries), + branch_file_count=result.branch_file_count, prepared_commit_sha=( str(prepared_commit_sha) if prepared_commit_sha is not None else None ), diff --git a/src/api/ingestion/application/value_objects.py b/src/api/ingestion/application/value_objects.py index 5aa28ec68..819dd671e 100644 --- a/src/api/ingestion/application/value_objects.py +++ b/src/api/ingestion/application/value_objects.py @@ -13,4 +13,5 @@ class IngestionRunResult: job_package_id: JobPackageId entry_count: int + branch_file_count: int | None prepared_commit_sha: str | None diff --git a/src/api/ingestion/infrastructure/adapters/github.py b/src/api/ingestion/infrastructure/adapters/github.py index ce2059cd3..03d6e5204 100644 --- a/src/api/ingestion/infrastructure/adapters/github.py +++ b/src/api/ingestion/infrastructure/adapters/github.py @@ -183,12 +183,16 @@ async def extract( files_to_fetch = await self._get_all_tree_blobs( client, headers, owner, repo, head_sha ) + branch_file_count = len(files_to_fetch) else: assert checkpoint is not None # narrowed above base_sha = checkpoint.data[_COMMIT_SHA_KEY] files_to_fetch = await self._get_changed_files( client, headers, owner, repo, base_sha, head_sha ) + branch_file_count = await self._count_tree_blobs( + client, headers, owner, repo, head_sha + ) # Step 3: Fetch content for each file changeset_entries, content_blobs = await self._fetch_file_contents( @@ -209,6 +213,7 @@ async def extract( changeset_entries=changeset_entries, content_blobs=content_blobs, new_checkpoint=new_checkpoint, + branch_file_count=branch_file_count, ) # ------------------------------------------------------------------ @@ -288,6 +293,25 @@ async def _get_all_tree_blobs( ) return result + async def _count_tree_blobs( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + owner: str, + repo: str, + tree_sha: str, + ) -> int: + """Count blob entries in the repository tree at a commit.""" + url = ( + f"{_GITHUB_API_BASE}/repos/{owner}/{repo}/git/trees/{tree_sha}?recursive=1" + ) + response = await client.get(url, headers=headers) + response.raise_for_status() + tree_data: dict[str, Any] = response.json() + return sum( + 1 for item in tree_data.get("tree", []) if item.get("type") == "blob" + ) + async def _get_changed_files( self, client: httpx.AsyncClient, diff --git a/src/api/ingestion/infrastructure/event_handler.py b/src/api/ingestion/infrastructure/event_handler.py index 052f6b9bc..ceec6fd32 100644 --- a/src/api/ingestion/infrastructure/event_handler.py +++ b/src/api/ingestion/infrastructure/event_handler.py @@ -173,7 +173,7 @@ async def handle( "knowledge_graph_id": knowledge_graph_id, "job_package_id": str(ingestion_result.job_package_id), "prepared_commit_sha": ingestion_result.prepared_commit_sha, - "prepared_file_count": ingestion_result.entry_count, + "prepared_file_count": ingestion_result.branch_file_count, "occurred_at": now.isoformat(), }, occurred_at=now, diff --git a/src/api/ingestion/ports/adapters.py b/src/api/ingestion/ports/adapters.py index 0553b2ee5..cec85b6ed 100644 --- a/src/api/ingestion/ports/adapters.py +++ b/src/api/ingestion/ports/adapters.py @@ -40,11 +40,14 @@ class ExtractionResult: new_checkpoint: Opaque adapter-specific state capturing the extraction position (e.g., the current commit SHA for GitHub). Must be persisted by the caller so the next incremental run starts here. + branch_file_count: Total blob files on the source branch at the + extraction HEAD commit, when the adapter can determine it. """ changeset_entries: list[ChangesetEntry] content_blobs: dict[str, bytes] new_checkpoint: AdapterCheckpoint + branch_file_count: int | None = None @runtime_checkable diff --git a/src/api/tests/unit/ingestion/application/test_ingestion_service.py b/src/api/tests/unit/ingestion/application/test_ingestion_service.py index 3a06cf64f..8da91a1d9 100644 --- a/src/api/tests/unit/ingestion/application/test_ingestion_service.py +++ b/src/api/tests/unit/ingestion/application/test_ingestion_service.py @@ -45,6 +45,7 @@ def _make_extraction_result( changeset_entries=[entry], content_blobs={content_ref.hex_digest: content}, new_checkpoint=checkpoint, + branch_file_count=1, ) @@ -108,6 +109,7 @@ async def test_run_returns_job_package_id(self): assert isinstance(result, IngestionRunResult) assert isinstance(result.job_package_id, JobPackageId) assert result.entry_count == 1 + assert result.branch_file_count == 1 assert result.prepared_commit_sha == "deadbeef" async def test_run_creates_zip_archive(self): diff --git a/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py b/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py index 08ec50525..5f90bca74 100644 --- a/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py +++ b/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py @@ -79,6 +79,14 @@ def _tree_response( return {"sha": HEAD_SHA, "tree": files, "truncated": False} +def _head_tree_response(files: list[dict] | None = None) -> dict: + """Default tree at HEAD for incremental branch file count.""" + return _tree_response(files) + + +HEAD_TREE_PATH = f"/git/trees/{HEAD_SHA}" + + def _compare_response( changed_files: list[dict] | None = None, ) -> dict: @@ -179,6 +187,26 @@ def incremental_transport() -> FakeGitHubTransport: { # Branch tip "/branches/main": _branch_response(HEAD_SHA), + # Tree at HEAD for branch file count (3 blobs on branch) + f"/git/trees/{HEAD_SHA}": _tree_response( + [ + { + "path": "README.md", + "type": "blob", + "sha": BLOB_SHA_README, + }, + { + "path": "src/main.py", + "type": "blob", + "sha": BLOB_SHA_MAIN, + }, + { + "path": "src/utils.py", + "type": "blob", + "sha": BLOB_SHA_UTILS, + }, + ] + ), # Compare endpoint f"/compare/{BASE_SHA}...{HEAD_SHA}": _compare_response( [ @@ -374,6 +402,29 @@ async def test_incremental_returns_only_changed_files( assert len(result.changeset_entries) == 1 assert result.changeset_entries[0].path == "src/utils.py" + assert result.branch_file_count == 3 + + @pytest.mark.asyncio + async def test_incremental_reports_branch_file_count_separate_from_changeset( + self, connection_config, credentials, incremental_transport + ): + """Branch file count reflects total blobs, not just changed files.""" + client = httpx.AsyncClient(transport=incremental_transport) + adapter = GitHubAdapter(http_client=client) + + checkpoint = AdapterCheckpoint( + schema_version="1.0.0", data={"commit_sha": BASE_SHA} + ) + + result = await adapter.extract( + connection_config=connection_config, + credentials=credentials, + checkpoint=checkpoint, + sync_mode=SyncMode.INCREMENTAL, + ) + + assert len(result.changeset_entries) == 1 + assert result.branch_file_count == 3 @pytest.mark.asyncio async def test_incremental_maps_added_status_to_add_operation( @@ -405,6 +456,7 @@ async def test_incremental_maps_modified_status_to_modify_operation( transport = FakeGitHubTransport( { "/branches/main": _branch_response(HEAD_SHA), + HEAD_TREE_PATH: _head_tree_response(), f"/compare/{BASE_SHA}...{HEAD_SHA}": _compare_response( [ { @@ -442,6 +494,7 @@ async def test_incremental_maps_renamed_status_to_modify_operation( transport = FakeGitHubTransport( { "/branches/main": _branch_response(HEAD_SHA), + HEAD_TREE_PATH: _head_tree_response(), f"/compare/{BASE_SHA}...{HEAD_SHA}": _compare_response( [ { @@ -482,6 +535,7 @@ async def test_incremental_ignores_removed_files( transport = FakeGitHubTransport( { "/branches/main": _branch_response(HEAD_SHA), + HEAD_TREE_PATH: _head_tree_response(), f"/compare/{BASE_SHA}...{HEAD_SHA}": _compare_response( [ { @@ -518,6 +572,7 @@ async def test_incremental_no_changes_returns_empty_result( transport = FakeGitHubTransport( { "/branches/main": _branch_response(HEAD_SHA), + HEAD_TREE_PATH: _head_tree_response(), f"/compare/{BASE_SHA}...{HEAD_SHA}": _compare_response([]), } ) @@ -722,6 +777,26 @@ async def handle_async_request( url_path = request.url.path if url_path.endswith("/branches/main"): data: dict = _branch_response(HEAD_SHA) + elif f"/git/trees/{HEAD_SHA}" in url_path: + data = _head_tree_response( + [ + { + "path": "README.md", + "type": "blob", + "sha": BLOB_SHA_README, + }, + { + "path": "src/main.py", + "type": "blob", + "sha": BLOB_SHA_MAIN, + }, + { + "path": "src/utils.py", + "type": "blob", + "sha": BLOB_SHA_UTILS, + }, + ] + ) elif f"/compare/{BASE_SHA}...{HEAD_SHA}" in url_path: data = _compare_response( [ diff --git a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py index ce23acb01..07ac2d446 100644 --- a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py +++ b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py @@ -86,6 +86,7 @@ async def run( return IngestionRunResult( job_package_id=JobPackageId(value="01HRZZZZZZZZZZZZZZZZZZZZZ0"), entry_count=42, + branch_file_count=99, prepared_commit_sha="abc123def456", ) @@ -258,7 +259,7 @@ async def test_emits_ingestion_prepared_when_ingest_only( assert event["event_type"] == "IngestionPrepared" assert event["payload"]["job_package_id"] is not None assert event["payload"]["prepared_commit_sha"] == "abc123def456" - assert event["payload"]["prepared_file_count"] == 42 + assert event["payload"]["prepared_file_count"] == 99 async def test_no_changes_ingest_only_emits_ingestion_prepared( self, diff --git a/src/api/tests/unit/management/test_data_source.py b/src/api/tests/unit/management/test_data_source.py index 49c20ae0d..184b2be45 100644 --- a/src/api/tests/unit/management/test_data_source.py +++ b/src/api/tests/unit/management/test_data_source.py @@ -465,6 +465,13 @@ def test_record_ingestion_prepared_preserves_file_count_when_none(self): assert ds.last_prepared_commit == "abc123" assert ds.last_prepared_file_count == 10 + def test_record_ingestion_prepared_updates_branch_file_count_on_incremental(self): + """Incremental prepares must store total branch files, not changeset size.""" + ds = self._create_ds() + ds.last_prepared_file_count = 120 + ds.record_ingestion_prepared(prepared_commit="def456", prepared_file_count=124) + assert ds.last_prepared_file_count == 124 + class TestDataSourceMarkForDeletion: """Tests for DataSource.mark_for_deletion() method.""" From 8df2591d7cf9330e00eb93688360fee6c8f1736a Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 31 May 2026 20:40:27 -0400 Subject: [PATCH 071/153] fix(ui): keep data sources table visible during sync polling Background refreshes no longer toggle the page-level loading gate, so prepare polling updates status in place with a subtle updating indicator. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../[kgId]/data-sources/index.vue | 29 +++++++++++++++---- .../app/tests/kg-data-sources-phase1.test.ts | 6 ++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue index 728feaa65..162f0afb5 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue @@ -127,6 +127,7 @@ const { apiFetch } = useApiClient() const kgName = ref('') const dataSources = ref<DataSourceItem[]>([]) const loading = ref(false) +const refreshing = ref(false) const expandedDiffLists = ref<Record<string, boolean>>({}) const checkingAllCommits = ref(false) const preparingAll = ref(false) @@ -187,7 +188,7 @@ function stopPolling() { function startPolling() { if (pollInterval.value !== null) return pollInterval.value = setInterval(async () => { - await loadDataSources() + await loadDataSources({ silent: true }) if (!hasAnyActiveSync(dataSources.value)) { stopPolling() } @@ -294,9 +295,14 @@ async function loadKnowledgeGraph() { } } -async function loadDataSources() { +async function loadDataSources(options: { silent?: boolean } = {}) { if (!hasTenant.value) return - loading.value = true + const silent = options.silent ?? dataSources.value.length > 0 + if (silent) { + refreshing.value = true + } else { + loading.value = true + } try { const sources = await apiFetch<DataSourceItem[]>( `/management/knowledge-graphs/${kgId.value}/data-sources`, @@ -319,9 +325,15 @@ async function loadDataSources() { } dataSources.value = sources } catch { - dataSources.value = [] + if (!silent) { + dataSources.value = [] + } } finally { - loading.value = false + if (silent) { + refreshing.value = false + } else { + loading.value = false + } } } @@ -630,6 +642,13 @@ watch(tenantVersion, async () => { <CardTitle class="flex items-center gap-2 text-base"> <GitBranch class="size-4 text-primary" /> Data sources overview + <span + v-if="refreshing" + class="inline-flex items-center gap-1 text-xs font-normal text-muted-foreground" + > + <Loader2 class="size-3 animate-spin" /> + Updating… + </span> </CardTitle> </div> <div class="flex flex-wrap gap-2"> diff --git a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts index a290ed065..1254800df 100644 --- a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts +++ b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts @@ -58,6 +58,12 @@ describe('KG data sources phase1 layout', () => { expect(phase1Vue).toContain('formatPreparedFileCount') }) + it('refreshes data sources silently while polling', () => { + expect(phase1Vue).toContain('loadDataSources({ silent: true })') + expect(phase1Vue).toContain('refreshing') + expect(phase1Vue).toContain('Updating…') + }) + it('shows unpulled commit columns', () => { expect(phase1Vue).toContain('Newest unpulled') expect(phase1Vue).toContain('Last extraction baseline') From 85e5716361fd2ffe94b3e087b431c88e895804c5 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 31 May 2026 20:42:10 -0400 Subject: [PATCH 072/153] fix(ui): constrain KG manage workspace to max-w-7xl Graph Management and other manage steps no longer stretch edge-to-edge on wide screens, matching the data sources workspace layout. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue | 2 +- src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 87701447d..29fe10dc3 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -1216,7 +1216,7 @@ watch(selectedOpsDataSourceId, () => { </script> <template> - <div class="space-y-6"> + <div class="mx-auto max-w-7xl space-y-6"> <template v-if="showOverview"> <NuxtLink to="/knowledge-graphs" diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index c4c935fc8..a0cdc8aac 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -388,6 +388,10 @@ describe('KG-MANAGE-006 - graph management conversation-first layout', () => { expect(manageWorkspaceVue).toContain('graph-management-controls') }) + it('uses a centered max-width page container like other KG workspace steps', () => { + expect(manageWorkspaceVue).toContain('mx-auto max-w-7xl') + }) + it('uses one shared session endpoint across UI mode changes', () => { expect(manageWorkspaceVue).toContain('sharedSessionMode') expect(manageWorkspaceVue).toContain('/sessions/${sharedSessionMode.value}/active') From a5daa9cc5bd14004df3180bdcd7a2ab287ee2e99 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 31 May 2026 20:46:19 -0400 Subject: [PATCH 073/153] feat(management): detect missing prepared JobPackage archives Expose job_package_available on data source listings, rematerialize when the ZIP is gone, and skip ingest-only no-changes short-circuit without it. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/api/main.py | 32 ++++++++- .../job_package_archive_reader.py | 36 ++++++++++ .../presentation/data_sources/models.py | 10 ++- .../presentation/data_sources/routes.py | 25 ++++++- .../job_package/archive_availability.py | 19 ++++++ .../job_package/test_archive_availability.py | 47 +++++++++++++ .../unit/test_sessioned_ingestion_handler.py | 68 +++++++++++++++++++ src/dev-ui/app/utils/kgDataSourcesCommits.ts | 14 +++- 8 files changed, 245 insertions(+), 6 deletions(-) create mode 100644 src/api/management/infrastructure/job_package_archive_reader.py create mode 100644 src/api/shared_kernel/job_package/archive_availability.py create mode 100644 src/api/tests/unit/shared_kernel/job_package/test_archive_availability.py diff --git a/src/api/main.py b/src/api/main.py index dc3d05eb1..93be300ea 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -195,6 +195,29 @@ async def _resolve_github_tracked_head_commit( sha = payload.get("commit", {}).get("sha") return str(sha) if sha else None + async def _ingest_only_archive_available( + self, + *, + session: Any, + data_source_id: str, + ) -> bool: + """Return whether a previously prepared JobPackage archive still exists on disk.""" + from management.infrastructure.job_package_archive_reader import ( + SqlJobPackageArchiveReader, + ) + from shared_kernel.job_package.archive_availability import ( + job_package_archive_exists, + ) + + reader = SqlJobPackageArchiveReader(session=session) + package_id = await reader.latest_job_package_id_for_data_source( + data_source_id=data_source_id, + ) + return job_package_archive_exists( + work_dir=_JOB_PACKAGE_WORK_DIR, + job_package_id=package_id, + ) + async def handle(self, event_type: str, payload: dict[str, Any]) -> None: from ingestion.infrastructure.adapters.github import GitHubAdapter from ingestion.application.services.ingestion_service import IngestionService @@ -283,7 +306,14 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: and baseline_commit and baseline_commit == tracked_head ): - enriched_payload["no_changes_detected"] = True + if pipeline_mode == "ingest_only": + if await self._ingest_only_archive_available( + session=session, + data_source_id=data_source_id, + ): + enriched_payload["no_changes_detected"] = True + else: + enriched_payload["no_changes_detected"] = True await ingestion_handler.handle( event_type, diff --git a/src/api/management/infrastructure/job_package_archive_reader.py b/src/api/management/infrastructure/job_package_archive_reader.py new file mode 100644 index 000000000..00e4d678d --- /dev/null +++ b/src/api/management/infrastructure/job_package_archive_reader.py @@ -0,0 +1,36 @@ +"""Read latest JobPackage identifiers for data source archive availability checks.""" + +from __future__ import annotations + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + + +class SqlJobPackageArchiveReader: + """Resolve the latest JobPackage id emitted for one data source.""" + + def __init__(self, *, session: AsyncSession) -> None: + self._session = session + + async def latest_job_package_id_for_data_source( + self, *, data_source_id: str + ) -> str | None: + result = await self._session.execute( + text( + """ + SELECT payload->>'job_package_id' AS job_package_id + FROM outbox + WHERE event_type IN ('IngestionPrepared', 'JobPackageProduced') + AND payload->>'data_source_id' = :data_source_id + AND payload->>'job_package_id' IS NOT NULL + ORDER BY occurred_at DESC + LIMIT 1 + """ + ), + {"data_source_id": data_source_id}, + ) + row = result.one_or_none() + if row is None or row.job_package_id is None: + return None + package_id = str(row.job_package_id).strip() + return package_id or None diff --git a/src/api/management/presentation/data_sources/models.py b/src/api/management/presentation/data_sources/models.py index ec4aca13a..699fbbf64 100644 --- a/src/api/management/presentation/data_sources/models.py +++ b/src/api/management/presentation/data_sources/models.py @@ -207,7 +207,8 @@ class DataSourceResponse(BaseModel): None, description="Commit SHA captured during the last ingest-only prepare" ) last_prepared_file_count: int | None = Field( - None, description="Number of files in the JobPackage from the last prepare" + None, + description="Total files on the tracked branch at the last prepare commit", ) ingested_head_commit: str | None = Field( None, @@ -220,6 +221,10 @@ class DataSourceResponse(BaseModel): "null when up to date with branch tip" ), ) + job_package_available: bool | None = Field( + None, + description="Whether the latest prepared JobPackage archive exists on disk", + ) connection_config: dict[str, str] = Field( default_factory=dict, description="Adapter connection configuration (non-secret)", @@ -505,7 +510,8 @@ class DataSourceWithSyncResponse(BaseModel): None, description="Commit SHA captured during the last ingest-only prepare" ) last_prepared_file_count: int | None = Field( - None, description="Number of files in the JobPackage from the last prepare" + None, + description="Total files on the tracked branch at the last prepare commit", ) ingested_head_commit: str | None = Field( None, diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index d063859ee..33ce5aa11 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -2,12 +2,18 @@ from __future__ import annotations +from pathlib import Path from typing import Annotated from fastapi import APIRouter, Depends, HTTPException, Query, status +from sqlalchemy.ext.asyncio import AsyncSession +from extraction.infrastructure.workload_runtime_settings import ( + get_extraction_workload_runtime_settings, +) from iam.application.value_objects import CurrentUser from iam.dependencies.user import get_current_user +from infrastructure.database.dependencies import get_write_session from management.application.services.data_source_service import DataSourceService from management.dependencies.data_source import ( get_data_source_service, @@ -19,8 +25,10 @@ GitCommitReferenceService, ) from management.infrastructure.git_diff_summary_service import GitDiffSummaryService +from management.infrastructure.job_package_archive_reader import SqlJobPackageArchiveReader from management.ports.exceptions import UnauthorizedError from management.ports.repositories import IDataSourceSyncRunRepository +from shared_kernel.job_package.archive_availability import job_package_archive_exists from management.presentation.data_sources.models import ( CreateDataSourceRequest, DataSourceDiffSummaryResponse, @@ -263,6 +271,7 @@ async def list_data_sources( kg_id: str, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[DataSourceService, Depends(get_data_source_service)], + session: Annotated[AsyncSession, Depends(get_write_session)], ) -> list[DataSourceResponse]: """List all data sources for a knowledge graph. @@ -272,6 +281,7 @@ async def list_data_sources( kg_id: Knowledge Graph ID to list data sources for current_user: Current authenticated user with tenant context service: Data source service for orchestration + session: Database session for JobPackage archive lookups Returns: List of DataSourceResponse objects for the knowledge graph @@ -285,7 +295,20 @@ async def list_data_sources( user_id=current_user.user_id.value, kg_id=kg_id, ) - return [DataSourceResponse.from_domain(ds) for ds in data_sources] + archive_reader = SqlJobPackageArchiveReader(session=session) + work_dir = get_extraction_workload_runtime_settings().job_package_work_dir + responses: list[DataSourceResponse] = [] + for ds in data_sources: + response = DataSourceResponse.from_domain(ds) + package_id = await archive_reader.latest_job_package_id_for_data_source( + data_source_id=ds.id.value, + ) + response.job_package_available = job_package_archive_exists( + work_dir=Path(work_dir), + job_package_id=package_id, + ) + responses.append(response) + return responses except UnauthorizedError: raise HTTPException( diff --git a/src/api/shared_kernel/job_package/archive_availability.py b/src/api/shared_kernel/job_package/archive_availability.py new file mode 100644 index 000000000..1f8d484da --- /dev/null +++ b/src/api/shared_kernel/job_package/archive_availability.py @@ -0,0 +1,19 @@ +"""Helpers for checking JobPackage archive presence on disk.""" + +from __future__ import annotations + +from pathlib import Path + +from shared_kernel.job_package.value_objects import JobPackageId + + +def job_package_archive_path(*, work_dir: Path, job_package_id: str) -> Path: + """Return the expected on-disk path for one JobPackage archive.""" + return work_dir / JobPackageId(value=job_package_id).archive_name() + + +def job_package_archive_exists(*, work_dir: Path, job_package_id: str | None) -> bool: + """Return whether the JobPackage ZIP archive exists locally.""" + if not job_package_id or not job_package_id.strip(): + return False + return job_package_archive_path(work_dir=work_dir, job_package_id=job_package_id).is_file() diff --git a/src/api/tests/unit/shared_kernel/job_package/test_archive_availability.py b/src/api/tests/unit/shared_kernel/job_package/test_archive_availability.py new file mode 100644 index 000000000..60d82e745 --- /dev/null +++ b/src/api/tests/unit/shared_kernel/job_package/test_archive_availability.py @@ -0,0 +1,47 @@ +"""Unit tests for JobPackage archive availability helpers.""" + +from __future__ import annotations + +from pathlib import Path + +from shared_kernel.job_package.archive_availability import job_package_archive_exists +from shared_kernel.job_package.builder import JobPackageBuilder +from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, + ChangeOperation, + ChangesetEntry, + ContentRef, + JobPackageId, + SyncMode, +) + + +def test_job_package_archive_exists_when_file_present(tmp_path: Path) -> None: + package_id = "01JTESTPACK0000000000000099" + content_bytes = b"# hello\n" + builder = JobPackageBuilder( + data_source_id="ds-1", + knowledge_graph_id="kg-1", + sync_mode=SyncMode.FULL_REFRESH, + package_id=JobPackageId(value=package_id), + ) + ref = builder.add_content(content_bytes) + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id="file-1", + type="io.kartograph.change.file", + path="README.md", + content_ref=ref, + content_type="text/markdown", + metadata={}, + ) + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={})) + builder.build(tmp_path) + + assert job_package_archive_exists(work_dir=tmp_path, job_package_id=package_id) is True + + +def test_job_package_archive_exists_when_file_missing(tmp_path: Path) -> None: + assert job_package_archive_exists(work_dir=tmp_path, job_package_id="missing") is False diff --git a/src/api/tests/unit/test_sessioned_ingestion_handler.py b/src/api/tests/unit/test_sessioned_ingestion_handler.py index 661a14df6..79a6715ea 100644 --- a/src/api/tests/unit/test_sessioned_ingestion_handler.py +++ b/src/api/tests/unit/test_sessioned_ingestion_handler.py @@ -189,6 +189,7 @@ async def test_sessioned_ingestion_handler_uses_last_prepared_for_ingest_only(): session_factory = _make_session_factory(session) handler = _SessionedIngestionEventHandler(session_factory=session_factory) handler._resolve_github_tracked_head_commit = AsyncMock(return_value="prepared123") # type: ignore[attr-defined] + handler._ingest_only_archive_available = AsyncMock(return_value=True) # type: ignore[attr-defined] outbox_repo = MagicMock() ds_repo = MagicMock() @@ -245,3 +246,70 @@ async def test_sessioned_ingestion_handler_uses_last_prepared_for_ingest_only(): assert call_payload["baseline_commit"] == "prepared123" assert call_payload["no_changes_detected"] is True + +@pytest.mark.asyncio +async def test_sessioned_ingestion_handler_runs_ingest_only_when_archive_missing(): + """ingest_only at branch head should still run when the JobPackage ZIP was lost.""" + from main import _SessionedIngestionEventHandler + + session = AsyncMock() + session_factory = _make_session_factory(session) + handler = _SessionedIngestionEventHandler(session_factory=session_factory) + handler._resolve_github_tracked_head_commit = AsyncMock(return_value="prepared123") # type: ignore[attr-defined] + handler._ingest_only_archive_available = AsyncMock(return_value=False) # type: ignore[attr-defined] + + outbox_repo = MagicMock() + ds_repo = MagicMock() + secret_store = MagicMock() + ingestion_handler = MagicMock() + ingestion_handler.handle = AsyncMock() + ingestion_service = MagicMock() + + ds = _make_data_source() + ds.last_prepared_commit = "prepared123" + ds_repo.get_by_id = AsyncMock(return_value=ds) + ds_repo.save = AsyncMock() + secret_store.retrieve = AsyncMock(return_value={"token": "tok"}) + + payload = { + "sync_run_id": "run-004", + "data_source_id": ds.id.value, + "knowledge_graph_id": ds.knowledge_graph_id, + "tenant_id": ds.tenant_id, + "adapter_type": "github", + "connection_config": ds.connection_config, + "credentials_path": ds.credentials_path, + "pipeline_mode": "ingest_only", + } + + management_settings = MagicMock() + management_settings.encryption_key.get_secret_value.return_value = ( + "WlAwWU83a2hSODl2SVY4MHBzQWpwaDBSUHhOU3NfQ3R6aXpvNTJfNE5odz0=" + ) + + with ( + patch("infrastructure.outbox.repository.OutboxRepository", return_value=outbox_repo), + patch( + "management.infrastructure.repositories.data_source_repository.DataSourceRepository", + return_value=ds_repo, + ), + patch( + "management.infrastructure.repositories.fernet_secret_store.FernetSecretStore", + return_value=secret_store, + ), + patch( + "ingestion.application.services.ingestion_service.IngestionService", + return_value=ingestion_service, + ), + patch( + "ingestion.infrastructure.event_handler.IngestionEventHandler", + return_value=ingestion_handler, + ), + patch("main.get_management_settings", return_value=management_settings), + ): + await handler.handle("SyncStarted", payload) + + call_payload = ingestion_handler.handle.call_args.args[1] + assert call_payload["baseline_commit"] == "prepared123" + assert "no_changes_detected" not in call_payload + diff --git a/src/dev-ui/app/utils/kgDataSourcesCommits.ts b/src/dev-ui/app/utils/kgDataSourcesCommits.ts index a5ea69801..a1856bd60 100644 --- a/src/dev-ui/app/utils/kgDataSourcesCommits.ts +++ b/src/dev-ui/app/utils/kgDataSourcesCommits.ts @@ -74,8 +74,18 @@ export function unpulledCommitStatusLabel( return 'new commit on branch (not ingested yet)' } -export function needsIngestionPrepare(ds: Parameters<typeof hasUnpulledCommits>[0]): boolean { - return hasUnpulledCommits(ds) +export function needsJobPackageRematerialize(ds: { + last_prepared_commit?: string | null + job_package_available?: boolean | null +}): boolean { + return Boolean(ds.last_prepared_commit) && ds.job_package_available === false +} + +export function needsIngestionPrepare(ds: Parameters<typeof hasUnpulledCommits>[0] & { + last_prepared_commit?: string | null + job_package_available?: boolean | null +}): boolean { + return hasUnpulledCommits(ds) || needsJobPackageRematerialize(ds) } export function isIngestionPreparedAtHead(ds: Parameters<typeof hasUnpulledCommits>[0]): boolean { From be91d2a388292b002aace5f785d203fd7d8be9ab Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 31 May 2026 20:46:43 -0400 Subject: [PATCH 074/153] fix(extraction): reuse healthy sticky runtime and defer chat persistence Skip workspace rematerialization when the container is healthy and JobPackage IDs match, report 503 until the agent workspace is ready, and only save user messages after the assistant turn completes or fails. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/server.py | 19 ++- src/agent-runtime/tests/test_server.py | 36 +++++ .../application/chat_turn_service.py | 6 +- .../sticky_session_runtime_service.py | 47 ++++-- .../infrastructure/sticky_runtime_health.py | 12 ++ .../sticky_session_bootstrap_builder.py | 13 ++ .../sticky_session_workdir_materializer.py | 14 +- .../extraction/ports/sticky_runtime_health.py | 4 + .../application/test_chat_turn_service.py | 3 + .../test_sticky_session_runtime_service.py | 151 +++++++++++++++--- ...est_sticky_session_workdir_materializer.py | 20 +++ 11 files changed, 281 insertions(+), 44 deletions(-) create mode 100644 src/agent-runtime/tests/test_server.py diff --git a/src/agent-runtime/kartograph_agent_runtime/server.py b/src/agent-runtime/kartograph_agent_runtime/server.py index 87b89a70e..7a1df58ed 100644 --- a/src/agent-runtime/kartograph_agent_runtime/server.py +++ b/src/agent-runtime/kartograph_agent_runtime/server.py @@ -7,8 +7,10 @@ from collections.abc import AsyncIterator from typing import Any +from pathlib import Path + from fastapi import FastAPI -from fastapi.responses import StreamingResponse +from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel, Field from kartograph_agent_runtime.executor import stream_turn_events @@ -27,8 +29,21 @@ class TurnRequest(BaseModel): message_history: list[dict[str, Any]] = Field(default_factory=list) +def _workspace_ready() -> bool: + marker = Path(settings.workspace_dir) / "knowledge-graph-id" + return marker.is_file() + + @app.get("/health") -async def health() -> dict[str, str]: +async def health(): + if not _workspace_ready(): + return JSONResponse( + status_code=503, + content={ + "status": "workspace_unavailable", + "session_id": settings.session_id, + }, + ) return {"status": "ok", "session_id": settings.session_id} diff --git a/src/agent-runtime/tests/test_server.py b/src/agent-runtime/tests/test_server.py new file mode 100644 index 000000000..131606439 --- /dev/null +++ b/src/agent-runtime/tests/test_server.py @@ -0,0 +1,36 @@ +"""Unit tests for agent runtime HTTP health endpoints.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from kartograph_agent_runtime import server + + +@pytest.fixture +def client(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> TestClient: + monkeypatch.setattr(server.settings, "workspace_dir", str(tmp_path)) + monkeypatch.setattr(server.settings, "session_id", "session-test") + return TestClient(server.app) + + +def test_health_returns_ok_when_workspace_marker_present( + client: TestClient, + tmp_path: Path, +) -> None: + (tmp_path / "knowledge-graph-id").write_text("kg-1", encoding="utf-8") + + response = client.get("/health") + + assert response.status_code == 200 + assert response.json()["status"] == "ok" + + +def test_health_returns_unavailable_when_workspace_marker_missing(client: TestClient) -> None: + response = client.get("/health") + + assert response.status_code == 503 + assert response.json()["status"] == "workspace_unavailable" diff --git a/src/api/extraction/application/chat_turn_service.py b/src/api/extraction/application/chat_turn_service.py index 8046f47a9..84220026b 100644 --- a/src/api/extraction/application/chat_turn_service.py +++ b/src/api/extraction/application/chat_turn_service.py @@ -125,10 +125,6 @@ async def stream_chat_turn( ], } - session.message_history.append({"role": "user", "content": trimmed}) - session.updated_at = datetime.now(UTC) - await self._session_service.save_session(session) - assistant_reply: str | None = None stream_failed = False async for event in self._chat_agent.stream_turn( @@ -150,11 +146,13 @@ async def stream_chat_turn( yield event if assistant_reply: + session.message_history.append({"role": "user", "content": trimmed}) session.message_history.append({"role": "assistant", "content": assistant_reply}) session.updated_at = datetime.now(UTC) session.runtime_context.pop("activity_lines", None) await self._session_service.save_session(session) elif stream_failed: + session.message_history.append({"role": "user", "content": trimmed}) session.updated_at = datetime.now(UTC) await self._session_service.save_session(session) else: diff --git a/src/api/extraction/application/sticky_session_runtime_service.py b/src/api/extraction/application/sticky_session_runtime_service.py index 8396c1594..e304089d8 100644 --- a/src/api/extraction/application/sticky_session_runtime_service.py +++ b/src/api/extraction/application/sticky_session_runtime_service.py @@ -97,23 +97,30 @@ async def ensure_runtime_for_chat( mode=mode.value, ) if lease is not None: + runtime_base_url = lease.runtime_base_url or "" readiness = await self._ingestion_readiness_reader.read_for_knowledge_graph( knowledge_graph_id=knowledge_graph_id, ) gate = resolve_job_package_gate(ui_mode=ui_mode, readiness=readiness) - if self._runtime_backend == "container": - await self._bootstrap_builder.build( - tenant_id=tenant_id, - knowledge_graph_id=knowledge_graph_id, - session_id=session.id, - include_job_packages=should_materialize_job_packages( - readiness=readiness, - gate=gate, - ), + include_job_packages = should_materialize_job_packages( + readiness=readiness, + gate=gate, + ) + expected_package_ids = await self._bootstrap_builder.resolve_job_package_ids( + knowledge_graph_id=knowledge_graph_id, + include_job_packages=include_job_packages, + ) + stored_materialization = session.runtime_context.get("workspace_materialization", {}) + stored_package_ids = tuple(stored_materialization.get("job_package_ids") or ()) + if ( + await self._health_checker.is_healthy(runtime_base_url=runtime_base_url) + and stored_package_ids == expected_package_ids + ): + session.runtime_context["sticky_runtime"] = self._lease_context( + lease, phase="ready" ) - session.runtime_context["sticky_runtime"] = self._lease_context(lease, phase="ready") - await self._session_service.save_session(session) - return + await self._session_service.save_session(session) + return async for event in self._stream_prepare_runtime( tenant_id=tenant_id, @@ -218,15 +225,23 @@ async def _stream_prepare_runtime( "Materializing workspace and skills for sticky container", ], } + include_job_packages = should_materialize_job_packages( + readiness=readiness, + gate=gate, + ) + package_ids = await self._bootstrap_builder.resolve_job_package_ids( + knowledge_graph_id=knowledge_graph_id, + include_job_packages=include_job_packages, + ) bootstrap = await self._bootstrap_builder.build( tenant_id=tenant_id, knowledge_graph_id=knowledge_graph_id, session_id=session.id, - include_job_packages=should_materialize_job_packages( - readiness=readiness, - gate=gate, - ), + include_job_packages=include_job_packages, ) + session.runtime_context["workspace_materialization"] = { + "job_package_ids": list(package_ids), + } yield { "type": "thinking", "recent": [ diff --git a/src/api/extraction/infrastructure/sticky_runtime_health.py b/src/api/extraction/infrastructure/sticky_runtime_health.py index 01f99523b..65910ade6 100644 --- a/src/api/extraction/infrastructure/sticky_runtime_health.py +++ b/src/api/extraction/infrastructure/sticky_runtime_health.py @@ -45,3 +45,15 @@ async def wait_until_healthy( raise TimeoutError( f"Sticky session runtime did not become healthy within {int(timeout_seconds)}s" ) + + async def is_healthy(self, *, runtime_base_url: str) -> bool: + """Return whether the sticky runtime currently responds on /health.""" + if runtime_base_url.startswith("memory://"): + return True + url = f"{runtime_base_url.rstrip('/')}/health" + try: + async with httpx.AsyncClient(timeout=self._request_timeout_seconds) as client: + response = await client.get(url) + return response.status_code == 200 + except httpx.HTTPError: + return False diff --git a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py index 1777db9e4..c646970c0 100644 --- a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py +++ b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py @@ -30,6 +30,19 @@ def __init__( self._workdir_materializer = workdir_materializer self._runtime_settings = runtime_settings or get_extraction_workload_runtime_settings() + async def resolve_job_package_ids( + self, + *, + knowledge_graph_id: str, + include_job_packages: bool, + ) -> tuple[str, ...]: + """Return JobPackage IDs that would be materialized for one session.""" + if not include_job_packages: + return () + return await self._prepared_job_package_reader.list_latest_for_knowledge_graph( + knowledge_graph_id=knowledge_graph_id, + ) + async def build( self, *, diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index 0fd3f9b66..74a0ef8c9 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -11,6 +11,13 @@ from shared_kernel.job_package.value_objects import JobPackageId +def _replace_directory(path: Path) -> None: + """Replace a directory tree without removing its parent mount point.""" + if path.exists(): + shutil.rmtree(path) + path.mkdir(parents=True, exist_ok=True) + + class StickySessionWorkdirMaterializer: """Materialize JobPackage archives into a session-scoped work directory.""" @@ -26,12 +33,11 @@ def prepare( ) -> Path: """Create or refresh the host work directory for one sticky session.""" session_root = self._job_package_work_dir / "sticky-sessions" / session_id - if session_root.exists(): - shutil.rmtree(session_root) + session_root.mkdir(parents=True, exist_ok=True) ingestion_context_dir = session_root / "ingestion-context" repository_files_dir = session_root / "repository-files" - ingestion_context_dir.mkdir(parents=True, exist_ok=True) - repository_files_dir.mkdir(parents=True, exist_ok=True) + _replace_directory(ingestion_context_dir) + _replace_directory(repository_files_dir) discovered = ( self._discover_job_package_ids() diff --git a/src/api/extraction/ports/sticky_runtime_health.py b/src/api/extraction/ports/sticky_runtime_health.py index be72eaa0f..c23c9d4ac 100644 --- a/src/api/extraction/ports/sticky_runtime_health.py +++ b/src/api/extraction/ports/sticky_runtime_health.py @@ -17,3 +17,7 @@ async def wait_until_healthy( ) -> AsyncIterator[str]: """Yield human-readable progress lines until healthy or timeout.""" ... + + async def is_healthy(self, *, runtime_base_url: str) -> bool: + """Return whether the sticky runtime currently responds on /health.""" + ... diff --git a/src/api/tests/unit/extraction/application/test_chat_turn_service.py b/src/api/tests/unit/extraction/application/test_chat_turn_service.py index 77f71c573..b579281c1 100644 --- a/src/api/tests/unit/extraction/application/test_chat_turn_service.py +++ b/src/api/tests/unit/extraction/application/test_chat_turn_service.py @@ -89,6 +89,9 @@ async def wait_until_healthy(self, **kwargs): yield "Assistant container is healthy" return + async def is_healthy(self, **kwargs) -> bool: + return True + def _build_chat_turn_service( *, diff --git a/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py index 46fd32977..f16bc3e61 100644 --- a/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py +++ b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py @@ -69,15 +69,47 @@ async def read_for_knowledge_graph(self, *, knowledge_graph_id: str): class _StaticBootstrapBuilder: + async def resolve_job_package_ids(self, **kwargs): + return () + + async def build(self, **kwargs): + return None + + +class _RecordingBootstrapBuilder: + def __init__(self) -> None: + self.calls: list[dict[str, object]] = [] + + async def resolve_job_package_ids(self, **kwargs): + return ("pkg-1",) + async def build(self, **kwargs): + self.calls.append(kwargs) return None +class _PreparedIngestionReadinessReader: + async def read_for_knowledge_graph(self, *, knowledge_graph_id: str): + return IngestionReadinessSnapshot(data_source_count=1, prepared_source_count=1) + + class _InstantHealthChecker: async def wait_until_healthy(self, **kwargs): return yield # pragma: no cover + async def is_healthy(self, **kwargs) -> bool: + return True + + +class _UnhealthyHealthChecker: + async def wait_until_healthy(self, **kwargs): + return + yield # pragma: no cover + + async def is_healthy(self, **kwargs) -> bool: + return False + class _FailingStickyRuntimeManager(InMemoryStickySessionRuntimeManager): def get_or_start_runtime(self, **kwargs): @@ -171,21 +203,59 @@ async def test_ensure_runtime_for_chat_reprepares_when_persisted_runtime_is_inac assert any(event.get("type") == "ready" for event in events) assert session.runtime_context["sticky_runtime"]["container_id"] != "dead-container" - assert sticky.try_resolve_active_lease(session_id=session.id) is not None -class _RecordingBootstrapBuilder: - def __init__(self) -> None: - self.calls: list[dict[str, object]] = [] - - async def build(self, **kwargs): - self.calls.append(kwargs) - return None +@pytest.mark.asyncio +async def test_ensure_runtime_for_chat_restarts_when_job_package_materialization_changes() -> None: + repo = _InMemoryAgentSessionRepository() + session_service = ExtractionAgentSessionService(repository=repo) + sticky = InMemoryStickySessionRuntimeManager() + bootstrap = _RecordingBootstrapBuilder() + service = StickySessionRuntimeService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_PreparedIngestionReadinessReader(), + sticky_runtime_manager=sticky, + bootstrap_builder=bootstrap, + health_checker=_InstantHealthChecker(), + runtime_backend="container", + sticky_health_timeout_seconds=5.0, + ) + session = await session_service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + sticky.get_or_start_runtime( + session_id=session.id, + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP.value, + ) + lease = sticky.try_resolve_active_lease(session_id=session.id) + session.runtime_context["workspace_materialization"] = {"job_package_ids": ["stale-pkg"]} + session.runtime_context["sticky_runtime"] = { + "container_id": lease.container_id, + "status": "active", + "runtime_base_url": lease.runtime_base_url, + "phase": "ready", + } + await session_service.save_session(session) + events = [ + event + async for event in service.ensure_runtime_for_chat( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + session=session, + ) + ] -class _PreparedIngestionReadinessReader: - async def read_for_knowledge_graph(self, *, knowledge_graph_id: str): - return IngestionReadinessSnapshot(data_source_count=1, prepared_source_count=1) + assert any(event.get("type") == "ready" for event in events) + assert bootstrap.calls @pytest.mark.asyncio @@ -215,6 +285,8 @@ async def test_ensure_runtime_for_chat_reuses_running_container_without_reprepar knowledge_graph_id="kg-1", mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP.value, ) + session.runtime_context["workspace_materialization"] = {"job_package_ids": ["pkg-1"]} + await session_service.save_session(session) events = [ event @@ -230,11 +302,54 @@ async def test_ensure_runtime_for_chat_reuses_running_container_without_reprepar assert events == [] assert session.runtime_context["sticky_runtime"]["phase"] == "ready" - assert bootstrap.calls == [ - { - "tenant_id": "tenant-1", - "knowledge_graph_id": "kg-1", - "session_id": session.id, - "include_job_packages": True, - } + assert bootstrap.calls == [] + + +@pytest.mark.asyncio +async def test_ensure_runtime_for_chat_restarts_when_persisted_container_is_unhealthy() -> None: + repo = _InMemoryAgentSessionRepository() + session_service = ExtractionAgentSessionService(repository=repo) + sticky = InMemoryStickySessionRuntimeManager() + service = StickySessionRuntimeService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader(), + sticky_runtime_manager=sticky, + bootstrap_builder=_StaticBootstrapBuilder(), + health_checker=_UnhealthyHealthChecker(), + runtime_backend="memory", + sticky_health_timeout_seconds=5.0, + ) + session = await session_service.get_or_create_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + sticky.get_or_start_runtime( + session_id=session.id, + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP.value, + ) + session.runtime_context["sticky_runtime"] = { + "container_id": "dead-container", + "status": "active", + "runtime_base_url": "memory://sticky-runtime", + "phase": "ready", + } + await session_service.save_session(session) + + events = [ + event + async for event in service.ensure_runtime_for_chat( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + session=session, + ) ] + + assert any(event.get("type") == "ready" for event in events) + assert session.runtime_context["sticky_runtime"]["container_id"] != "dead-container" diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index b9fbf1acb..70f96778a 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -71,3 +71,23 @@ def test_materializer_does_not_discover_archives_when_package_ids_empty(tmp_path ) assert not any((session_root / "repository-files").iterdir()) + + +def test_materializer_refresh_preserves_session_root_directory(tmp_path: Path) -> None: + package_id = "01JTESTPACK0000000000000002" + _build_package(tmp_path, package_id) + materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) + + first_root = materializer.prepare( + session_id="session-3", + knowledge_graph_id="kg-1", + job_package_ids=(package_id,), + ) + second_root = materializer.prepare( + session_id="session-3", + knowledge_graph_id="kg-1", + job_package_ids=(package_id,), + ) + + assert first_root == second_root + assert (second_root / "repository-files" / package_id / "README.md").exists() From d478495533c950c5222b840d80597f402775dcb9 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 31 May 2026 20:58:58 -0400 Subject: [PATCH 075/153] refactor(dev-ui): align graph management artifacts panel with k-extract layout Split the combined schema nav/detail card into a sticky left navigator and right detail column to match k-extract's Design Artifacts pattern. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../pages/knowledge-graphs/[kgId]/manage.vue | 195 +++++++++++------- .../knowledge-graph-manage-workspace.test.ts | 15 +- 2 files changed, 125 insertions(+), 85 deletions(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 29fe10dc3..845ff3b9e 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -1,5 +1,5 @@ <script setup lang="ts"> -import { computed, onMounted, ref, watch } from 'vue' +import { computed, nextTick, onMounted, ref, watch } from 'vue' import { toast } from 'vue-sonner' import { ArrowLeft, @@ -374,10 +374,6 @@ const visibleRailItems = computed(() => const schemaRailItems = computed(() => filterSchemaRailItems(visibleRailItems.value)) -const selectedSchemaRailItem = computed(() => - schemaRailItems.value.find((item) => item.id === selectedRailItemId.value) ?? null, -) - const graphManagementModeGate = computed((): GraphManagementModeGateInput => ({ workspaceMode: statusProjection.value?.workspace_mode ?? 'schema_bootstrap', transitionEligible: statusProjection.value?.transition_eligible === true, @@ -915,6 +911,12 @@ function setGraphManagementMode(mode: GraphManagementMode) { function selectSchemaRailItem(itemId: GraphManagementRailItemId) { selectedRailItemId.value = itemId + void nextTick(() => { + document.getElementById('graph-management-artifact-detail')?.scrollIntoView({ + behavior: 'smooth', + block: 'start', + }) + }) } function onSchemaRailKeydown(event: KeyboardEvent, itemId: GraphManagementRailItemId) { @@ -1815,20 +1817,21 @@ watch(selectedOpsDataSourceId, () => { @send-message="sendChatMessage" /> - <div class="graph-management-artifacts grid gap-6 lg:grid-cols-2 lg:items-start"> - <Card id="graph-management-schema-artifacts" class="graph-management-schema-panel scroll-mt-6"> + <div class="graph-management-artifacts grid gap-6 lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)] lg:items-start"> + <Card + id="graph-management-schema-artifacts" + class="graph-management-schema-panel lg:sticky lg:top-4 lg:self-start" + > <CardHeader class="pb-2"> <CardTitle class="text-sm font-semibold">Schema & artifacts</CardTitle> <CardDescription class="text-xs"> Workspace signals for <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. - <template v-if="schemaRailItems.length > 1"> - Select an artifact to inspect its detail below. - </template> + Select an artifact to open it in the detail panel to the right. </CardDescription> </CardHeader> - <CardContent class="space-y-4 p-3 pt-0 text-sm"> - <div v-if="schemaRailItems.length > 1" class="space-y-1.5"> + <CardContent class="space-y-1.5 p-3 pt-0"> + <template v-if="schemaRailItems.length > 0"> <button v-for="item in schemaRailItems" :key="item.id" @@ -1843,35 +1846,40 @@ watch(selectedOpsDataSourceId, () => { <span class="font-medium leading-tight">{{ item.label }}</span> <span class="text-xs text-muted-foreground">{{ graphManagementArtifactHint(item) }}</span> </button> - </div> + </template> <p - v-else-if="schemaRailItems.length === 0" + v-else class="rounded-lg border border-dashed p-3 text-xs text-muted-foreground" > No schema artifacts for this mode. </p> + </CardContent> + </Card> - <div class="graph-management-detail space-y-4 border-t pt-4"> - <div> - <p class="text-sm font-semibold"> - {{ selectedSchemaRailItem?.label ?? 'Schema & artifacts' }} - </p> - <p class="text-xs text-muted-foreground"> - Mode: {{ graphManagementModeLabel }} - </p> - </div> - <template v-if="selectedRailItemId === 'schema-readiness'"> - <div class="rounded border p-3"> - <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> - Bootstrap Progress Checklist + <div id="graph-management-artifact-detail" class="graph-management-detail scroll-mt-6 space-y-6"> + <Card v-if="selectedRailItemId === 'schema-readiness'"> + <CardHeader> + <CardTitle class="text-base flex items-center gap-2"> + <CheckCircle2 class="size-4" /> + Schema readiness + </CardTitle> + <CardDescription> + Bootstrap checklist, validate, and transition controls for + <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4 text-sm"> + <div class="space-y-2 rounded-lg border bg-muted/30 p-3"> + <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground"> + Bootstrap progress checklist </p> <div class="space-y-2"> <div v-for="item in progressChecklist" :key="item.id" - class="rounded border px-3 py-2" + class="rounded-lg border bg-card px-3 py-2" > - <div class="flex items-center justify-between"> + <div class="flex items-center justify-between gap-2"> <p class="font-medium">{{ item.label }}</p> <Badge :variant="item.passed ? 'default' : 'destructive'"> {{ item.passed ? 'Pass' : 'Fail' }} @@ -1899,16 +1907,24 @@ watch(selectedOpsDataSourceId, () => { Go to Extraction/Mutations </Button> </div> - </template> - - <template v-else-if="selectedRailItemId === 'validation-diagnostics'"> - <div class="rounded border p-3"> - <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> - Validation Diagnostics - </p> + </CardContent> + </Card> + + <Card v-else-if="selectedRailItemId === 'validation-diagnostics'"> + <CardHeader> + <CardTitle class="text-base flex items-center gap-2"> + <ShieldAlert class="size-4" /> + Validation diagnostics + </CardTitle> + <CardDescription> + Blocking reasons and prepopulated type gaps before transitioning to extraction. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4 text-sm"> + <div class="space-y-3 rounded-lg border bg-muted/30 p-3"> <div v-if="statusProjection.readiness.prepopulated_types_without_instances.length > 0" - class="rounded border border-amber-400/60 bg-amber-50/60 p-2 text-xs dark:border-amber-800 dark:bg-amber-950/20" + class="rounded-lg border border-amber-400/60 bg-amber-50/60 p-3 text-xs dark:border-amber-800 dark:bg-amber-950/20" > <p class="font-medium text-amber-800 dark:text-amber-300"> Prepopulated types missing instances @@ -1924,7 +1940,7 @@ watch(selectedOpsDataSourceId, () => { </div> <div v-if="statusProjection.readiness.blocking_reasons.length > 0" - class="mt-2 rounded border border-destructive/50 p-3" + class="rounded-lg border border-destructive/50 bg-card p-3" > <p class="mb-1 flex items-center gap-1.5 text-xs font-medium text-destructive"> <ShieldAlert class="size-3.5" /> @@ -1943,21 +1959,29 @@ watch(selectedOpsDataSourceId, () => { No validation diagnostics are currently blocking transition. </p> </div> - <div class="rounded border p-3"> - <p class="mb-2 text-xs font-medium uppercase tracking-wider text-muted-foreground"> - Next Steps + <div class="rounded-lg border bg-muted/30 p-3"> + <p class="mb-2 text-xs font-medium uppercase tracking-wide text-muted-foreground"> + Next steps </p> <ul class="list-disc space-y-1 pl-4 text-xs text-muted-foreground"> <li v-for="step in nextSteps" :key="step">{{ step }}</li> </ul> </div> - </template> - - <template v-else-if="selectedRailItemId === 'extraction-jobs-setup'"> - <p class="text-muted-foreground"> + </CardContent> + </Card> + + <Card v-else-if="selectedRailItemId === 'extraction-jobs-setup'"> + <CardHeader> + <CardTitle class="text-base flex items-center gap-2"> + <Wrench class="size-4" /> + Extraction jobs setup + </CardTitle> + <CardDescription> Trigger extraction jobs, inspect run history, and view run logs without leaving this workspace. - </p> - <div class="space-y-3 rounded border p-3"> + </CardDescription> + </CardHeader> + <CardContent class="space-y-4 text-sm"> + <div class="space-y-3 rounded-lg border bg-muted/30 p-3"> <p class="text-xs font-medium text-muted-foreground">Data source</p> <div v-if="graphManagementDataSourcesLoading" @@ -2007,7 +2031,7 @@ watch(selectedOpsDataSourceId, () => { </Button> </div> <div class="grid gap-3 xl:grid-cols-[300px_1fr]"> - <div class="rounded border"> + <div class="rounded-lg border bg-card"> <div class="border-b px-3 py-2 text-xs font-medium text-muted-foreground">Sync runs</div> <div v-if="inlineSyncRunsLoading" @@ -2026,7 +2050,7 @@ watch(selectedOpsDataSourceId, () => { <button v-for="run in inlineSyncRuns" :key="run.id" - class="w-full rounded border px-2 py-1.5 text-left text-xs transition-colors" + class="w-full rounded-lg border px-2 py-1.5 text-left text-xs transition-colors" :class="selectedInlineRunId === run.id ? 'border-primary bg-primary/5' : 'hover:bg-muted/40'" @click="loadInlineRunLogs(run.id)" > @@ -2040,7 +2064,7 @@ watch(selectedOpsDataSourceId, () => { </button> </div> </div> - <div class="rounded border p-3"> + <div class="rounded-lg border bg-muted/30 p-3"> <p class="mb-2 text-xs font-medium text-muted-foreground"> Run logs <span v-if="selectedOpsDataSource" class="font-normal text-muted-foreground/80"> @@ -2059,24 +2083,32 @@ watch(selectedOpsDataSourceId, () => { </div> <pre v-else - class="max-h-72 overflow-auto rounded border bg-muted/20 p-2 text-[11px]" + class="max-h-72 overflow-auto rounded-lg border bg-background p-2 text-[11px]" >{{ inlineRunLogs.join('\n') }}</pre> </div> </div> - </template> - - <template v-else-if="selectedRailItemId === 'mutation-authoring'"> - <p class="text-muted-foreground"> + </CardContent> + </Card> + + <Card v-else-if="selectedRailItemId === 'mutation-authoring'"> + <CardHeader> + <CardTitle class="text-base flex items-center gap-2"> + <PencilRuler class="size-4" /> + Mutation authoring + </CardTitle> + <CardDescription> Author and apply one-off JSONL mutations directly in this workspace. - </p> - <div class="space-y-3 rounded border p-3"> + </CardDescription> + </CardHeader> + <CardContent class="space-y-3 text-sm"> + <div class="space-y-3 rounded-lg border bg-muted/30 p-3"> <p class="text-xs font-medium text-muted-foreground">Mutation payload (JSONL)</p> <textarea v-model="inlineMutationJsonl" - class="min-h-44 w-full rounded border bg-background px-3 py-2 font-mono text-xs" + class="min-h-44 w-full rounded-md border border-input bg-background px-3 py-2 font-mono text-xs leading-relaxed shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" placeholder='{"op":"CREATE","type":"node","label":"repo","id":"repo:example","set_properties":{"name":"example"}}' /> - <div class="flex items-center gap-2"> + <div class="flex flex-wrap items-center gap-2"> <Button size="sm" :disabled="inlineMutationApplying" @click="applyInlineMutations"> <Loader2 v-if="inlineMutationApplying" class="mr-1.5 size-3.5 animate-spin" /> Apply Mutations @@ -2089,39 +2121,43 @@ watch(selectedOpsDataSourceId, () => { {{ inlineMutationApplyError }} </p> </div> - </template> - - <template v-else> - <p class="text-xs text-muted-foreground"> - Select a schema artifact to inspect mode-specific workspace content. - </p> - </template> - </div> - </CardContent> - </Card> + </CardContent> + </Card> - <Card id="graph-management-session-pointers" class="graph-management-session-pointers scroll-mt-6 lg:sticky lg:top-4 lg:self-start"> - <CardHeader class="pb-3"> - <CardTitle class="text-base">Session pointers</CardTitle> + <Card v-else> + <CardHeader> + <CardTitle class="text-base">Schema & artifacts</CardTitle> + <CardDescription> + Select a schema artifact from the list to inspect mode-specific workspace content. + </CardDescription> + </CardHeader> + </Card> + + <Card id="graph-management-session-pointers" class="graph-management-session-pointers"> + <CardHeader> + <CardTitle class="text-base flex items-center gap-2"> + <ScrollText class="size-4" /> + Session pointers + </CardTitle> <CardDescription> Active bootstrap and extraction sessions, plus archived history for this knowledge graph. </CardDescription> </CardHeader> <CardContent class="space-y-4 text-sm"> <div class="grid gap-2 md:grid-cols-3 text-xs"> - <div class="rounded-lg border px-3 py-2"> + <div class="rounded-lg border bg-muted/30 px-3 py-2"> <p class="text-muted-foreground">Active schema bootstrap session</p> <p class="mt-1 break-all font-mono"> {{ statusProjection.session_pointers.active_schema_bootstrap_session_id ?? 'None' }} </p> </div> - <div class="rounded-lg border px-3 py-2"> + <div class="rounded-lg border bg-muted/30 px-3 py-2"> <p class="text-muted-foreground">Active extraction operations session</p> <p class="mt-1 break-all font-mono"> {{ statusProjection.session_pointers.active_extraction_operations_session_id ?? 'None' }} </p> </div> - <div class="rounded-lg border px-3 py-2"> + <div class="rounded-lg border bg-muted/30 px-3 py-2"> <p class="text-muted-foreground">Most recent completed session</p> <p class="mt-1 break-all font-mono"> {{ statusProjection.session_pointers.most_recent_completed_session_id ?? 'None' }} @@ -2130,8 +2166,8 @@ watch(selectedOpsDataSourceId, () => { </div> <div class="space-y-3 border-t pt-3"> <div class="flex items-center justify-between"> - <p class="text-xs font-medium uppercase tracking-wider text-muted-foreground"> - Session History + <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground"> + Session history </p> <Button size="sm" @@ -2160,7 +2196,7 @@ watch(selectedOpsDataSourceId, () => { <div v-for="entry in sessionHistory" :key="entry.id" - class="rounded-lg border px-3 py-2 text-xs" + class="rounded-lg border bg-card px-3 py-2 text-xs" > <div class="flex flex-wrap items-center justify-between gap-2"> <p class="font-mono break-all">{{ entry.id }}</p> @@ -2198,7 +2234,8 @@ watch(selectedOpsDataSourceId, () => { </div> </div> </CardContent> - </Card> + </Card> + </div> </div> </section> </template> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index a0cdc8aac..7cfde7900 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -81,7 +81,7 @@ describe('Knowledge Graph Manage Workspace - graph management controls', () => { expect(manageWorkspaceVue).toContain('/sessions/${sharedSessionMode.value}/history') expect(manageWorkspaceVue).toContain('sessionHistory') expect(manageWorkspaceVue).toContain('run_metrics') - expect(manageWorkspaceVue).toContain('Session History') + expect(manageWorkspaceVue).toContain('Session history') }) }) @@ -156,7 +156,7 @@ describe('KG-MANAGE-014 - no-preview fallback state', () => { describe('Knowledge Graph Manage Workspace - bootstrap readiness guidance', () => { it('renders a bootstrap progress checklist section with explicit checks', () => { - expect(manageWorkspaceVue).toContain('Bootstrap Progress Checklist') + expect(manageWorkspaceVue).toContain('Bootstrap progress checklist') expect(manageWorkspaceVue).toContain('progressChecklist') expect(manageWorkspaceVue).toContain('Minimum entity types') expect(manageWorkspaceVue).toContain('Minimum relationship types') @@ -164,13 +164,13 @@ describe('Knowledge Graph Manage Workspace - bootstrap readiness guidance', () = }) it('renders diagnostics panel with prepopulated type failures and blocking reasons', () => { - expect(manageWorkspaceVue).toContain('Validation Diagnostics') + expect(manageWorkspaceVue).toContain('Validation diagnostics') expect(manageWorkspaceVue).toContain('prepopulated_types_without_instances') expect(manageWorkspaceVue).toContain('blocking_reasons') }) it('renders explicit next steps guidance for transition readiness', () => { - expect(manageWorkspaceVue).toContain('Next Steps') + expect(manageWorkspaceVue).toContain('Next steps') expect(manageWorkspaceVue).toContain('Run Validate to refresh readiness signals') expect(manageWorkspaceVue).toContain('Transition is enabled') }) @@ -423,12 +423,14 @@ describe('KG-MANAGE-007 - graph management modes', () => { }) describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { - it('renders side-by-side schema artifacts and session pointers panels', () => { + it('renders artifact navigator and detail panel in k-extract-style layout', () => { expect(manageWorkspaceVue).toContain('graph-management-artifacts') expect(manageWorkspaceVue).toContain('Schema & artifacts') + expect(manageWorkspaceVue).toContain('graph-management-artifact-detail') expect(manageWorkspaceVue).toContain('graph-management-session-pointers') expect(manageWorkspaceVue).toContain('graphManagementArtifactRowClass') expect(manageWorkspaceVue).toContain('schemaRailItems') + expect(manageWorkspaceVue).toContain('lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)]') }) it('builds rail items with status and last-updated metadata', () => { @@ -450,6 +452,7 @@ describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { it('renders mode-specific detail panel content regions', () => { + expect(manageWorkspaceVue).toContain('graph-management-artifact-detail') expect(manageWorkspaceVue).toContain('graph-management-detail') expect(manageWorkspaceVue).toContain('selectedRailItemId') expect(manageWorkspaceVue).toContain("selectedRailItemId === 'schema-readiness'") @@ -479,7 +482,7 @@ describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { describe('KG-MANAGE-010 - schema design parity behavior', () => { it('exposes schema readiness and validation detail in initial schema design mode', () => { expect(manageWorkspaceVue).toContain('progressChecklist') - expect(manageWorkspaceVue).toContain('Bootstrap Progress Checklist') + expect(manageWorkspaceVue).toContain('Bootstrap progress checklist') expect(manageWorkspaceVue).toContain('blocking_reasons') expect(manageWorkspaceVue).toContain('prepopulated_types_without_instances') }) From 2072434f4c20377f4fb94eff84a4b3366bbddfc6 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 31 May 2026 21:08:32 -0400 Subject: [PATCH 076/153] feat(agent-runtime): stream intermediate thinking lines during SDK turns Surface tool use, reasoning, task progress, and compose previews as NDJSON thinking events so the Graph Management Assistant panel updates while Vertex work is in flight. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/executor.py | 27 ++- .../thinking_stream.py | 175 ++++++++++++++++++ .../tests/test_thinking_stream.py | 104 +++++++++++ .../extraction/SharedConversationPanel.vue | 8 +- .../pages/knowledge-graphs/[kgId]/manage.vue | 10 +- .../knowledge-graph-manage-workspace.test.ts | 2 +- 6 files changed, 311 insertions(+), 15 deletions(-) create mode 100644 src/agent-runtime/kartograph_agent_runtime/thinking_stream.py create mode 100644 src/agent-runtime/tests/test_thinking_stream.py diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 5afea4bd7..e00879fba 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -8,6 +8,10 @@ from typing import Any from kartograph_agent_runtime.settings import AgentRuntimeSettings +from kartograph_agent_runtime.thinking_stream import ( + initial_sdk_thinking_lines, + thinking_events_from_sdk_message, +) from kartograph_agent_runtime.tools import RuntimeTooling from kartograph_agent_runtime.vertex import build_claude_agent_env @@ -196,14 +200,8 @@ async def _stream_with_claude_sdk( if history_lines: prompt = "Recent conversation:\n" + "\n".join(history_lines) + f"\n\nUser: {message}" - yield { - "type": "thinking", - "recent": [ - f"Claude Agent SDK query started ({auth_mode})…", - f"Mode overlay: {ui_mode}", - "Tools: graph read enclave, mutation emitter", - ], - } + recent = initial_sdk_thinking_lines(auth_mode=auth_mode, ui_mode=ui_mode) + yield {"type": "thinking", "recent": list(recent)} sdk_env = _build_sdk_env(settings) workspace_dir = settings.workspace_dir.strip() or "/workspace" @@ -218,12 +216,25 @@ async def _stream_with_claude_sdk( ) reply: str | None = None + reply_parts: list[str] = [] + last_compose_at = 0 try: async with asyncio.timeout(turn_timeout_seconds): async for sdk_message in query(prompt=prompt, options=options): + thinking_events, last_compose_at = thinking_events_from_sdk_message( + sdk_message, + recent=recent, + reply_parts=reply_parts, + last_compose_at=last_compose_at, + ) + for event in thinking_events: + yield event + extracted = _extract_sdk_reply(sdk_message) if extracted: reply = extracted + elif reply_parts: + reply = "".join(reply_parts).strip() or None except TimeoutError: yield { "type": "done", diff --git a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py new file mode 100644 index 000000000..f474a0858 --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py @@ -0,0 +1,175 @@ +"""Rolling thinking-line panel updates for NDJSON chat streams.""" + +from __future__ import annotations + +from typing import Any + +_MAX_THINKING_LINES = 8 + + +def normalize_activity_line(text: str) -> str: + line = " ".join(text.split()) + if len(line) > 120: + return line[:117] + "…" + return line + + +def push_thinking(recent: list[str], line: str) -> dict[str, Any] | None: + normalized = normalize_activity_line(line) + if not normalized: + return None + if recent and recent[-1] == normalized: + return None + recent.append(normalized) + if len(recent) > _MAX_THINKING_LINES: + recent[:] = recent[-_MAX_THINKING_LINES:] + return {"type": "thinking", "recent": list(recent)} + + +def update_composing_line(recent: list[str], preview_tail: str) -> dict[str, Any] | None: + preview_tail = normalize_activity_line(preview_tail.replace("\n", " ")) + line = normalize_activity_line( + f"Composing reply · {preview_tail}" if preview_tail else "Composing reply…", + ) + prefix = "Composing reply" + if recent and str(recent[-1]).startswith(prefix): + recent[-1] = line + return {"type": "thinking", "recent": list(recent)} + return push_thinking(recent, line) + + +def _tool_use_line(name: str, tool_input: dict[str, Any]) -> str: + if name == "Read": + path = tool_input.get("file_path") or tool_input.get("path") or "" + return f"Reading {path}" if path else "Reading file…" + if name in {"Write", "Edit"}: + path = tool_input.get("file_path") or tool_input.get("path") or "" + verb = "Writing" if name == "Write" else "Editing" + return f"{verb} {path}" if path else f"{verb} file…" + if name == "Grep": + pattern = tool_input.get("pattern") or "" + return f"Searching for {pattern}" if pattern else "Searching repository…" + if name == "Glob": + pattern = tool_input.get("pattern") or "" + return f"Listing files {pattern}" if pattern else "Listing files…" + if name == "Bash": + command = tool_input.get("command") or "" + return f"Running {command}" if command else "Running shell command…" + return f"Running {name}…" + + +def _stream_event_line(event: dict[str, Any]) -> str | None: + event_type = event.get("type") + if event_type == "content_block_start": + block = event.get("content_block") or {} + block_type = block.get("type") + if block_type == "tool_use": + name = block.get("name") or "tool" + return f"Running {name}…" + if block_type == "thinking": + return "Reasoning…" + if event_type == "content_block_delta": + delta = event.get("delta") or {} + if delta.get("type") == "thinking_delta": + thinking = str(delta.get("thinking") or "").strip() + if thinking: + return f"Reasoning · {normalize_activity_line(thinking)}" + if delta.get("type") == "text_delta": + text = str(delta.get("text") or "").strip() + if text: + return None # handled via composing line from accumulated text + return None + + +def thinking_events_from_sdk_message( + sdk_message: Any, + *, + recent: list[str], + reply_parts: list[str], + last_compose_at: int, + compose_step: int = 120, +) -> tuple[list[dict[str, Any]], int]: + """Return thinking NDJSON events and updated compose offset for one SDK message.""" + events: list[dict[str, Any]] = [] + + content = getattr(sdk_message, "content", None) + if isinstance(content, list): + for block in content: + block_type = type(block).__name__ + if block_type == "ThinkingBlock" or hasattr(block, "thinking"): + thinking = normalize_activity_line(getattr(block, "thinking", "") or "") + if thinking: + event = push_thinking(recent, f"Reasoning · {thinking}") + if event: + events.append(event) + elif block_type == "ToolUseBlock" or hasattr(block, "name"): + name = str(getattr(block, "name", "") or "tool") + tool_input = getattr(block, "input", None) or {} + if not isinstance(tool_input, dict): + tool_input = {} + event = push_thinking(recent, _tool_use_line(name, tool_input)) + if event: + events.append(event) + elif block_type == "TextBlock" or hasattr(block, "text"): + text = str(getattr(block, "text", "") or "") + if text.strip(): + reply_parts.append(text) + blob = "".join(reply_parts) + plain = text.replace("\n", "").strip() + if plain and len(blob) - last_compose_at >= compose_step: + tail = blob[-88:].replace("\n", " ").strip() + event = update_composing_line(recent, tail) + if event: + events.append(event) + last_compose_at = len(blob) + return events, last_compose_at + + task_id = getattr(sdk_message, "task_id", None) + description = str(getattr(sdk_message, "description", "") or "").strip() + if task_id and description: + last_tool = str(getattr(sdk_message, "last_tool_name", "") or "").strip() + usage = getattr(sdk_message, "usage", None) + prefix = "Task started ·" if usage is None and not last_tool else "" + line = f"{prefix}{description}".strip() + event = push_thinking(recent, line) + if event: + events.append(event) + if last_tool: + event = push_thinking(recent, f"Running {last_tool}…") + if event: + events.append(event) + return events, last_compose_at + + payload = getattr(sdk_message, "event", None) + if isinstance(payload, dict): + line = _stream_event_line(payload) + if line: + event = push_thinking(recent, line) + if event: + events.append(event) + return events, last_compose_at + + subtype = str(getattr(sdk_message, "subtype", "") or "").strip() + data = getattr(sdk_message, "data", None) or {} + if subtype == "task_progress" and isinstance(data, dict): + progress_description = str(data.get("description") or "").strip() + last_tool = str(data.get("last_tool_name") or "").strip() + if progress_description: + event = push_thinking(recent, progress_description) + if event: + events.append(event) + if last_tool: + event = push_thinking(recent, f"Running {last_tool}…") + if event: + events.append(event) + + return events, last_compose_at + + +def initial_sdk_thinking_lines(*, auth_mode: str, ui_mode: str) -> list[str]: + return [ + f"Claude Agent SDK query started ({auth_mode})…", + f"Mode overlay: {ui_mode}", + "Tools: graph read enclave, mutation emitter", + "Connected — working on your message…", + ] diff --git a/src/agent-runtime/tests/test_thinking_stream.py b/src/agent-runtime/tests/test_thinking_stream.py new file mode 100644 index 000000000..b399c70da --- /dev/null +++ b/src/agent-runtime/tests/test_thinking_stream.py @@ -0,0 +1,104 @@ +"""Unit tests for rolling thinking-line stream helpers.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from kartograph_agent_runtime.thinking_stream import ( + initial_sdk_thinking_lines, + push_thinking, + thinking_events_from_sdk_message, +) + + +@dataclass +class FakeToolUseBlock: + name: str + input: dict + + +@dataclass +class FakeThinkingBlock: + thinking: str + + +@dataclass +class FakeTextBlock: + text: str + + +@dataclass +class FakeAssistantMessage: + content: list + + +@dataclass +class FakeTaskProgressMessage: + task_id: str + description: str + last_tool_name: str | None = None + usage: dict | None = None + + +def test_initial_sdk_thinking_lines_include_connected_message() -> None: + lines = initial_sdk_thinking_lines(auth_mode="Vertex AI", ui_mode="initial-schema-design") + + assert any("Claude Agent SDK query started" in line for line in lines) + assert any("Connected" in line for line in lines) + + +def test_push_thinking_deduplicates_and_caps_recent_lines() -> None: + recent: list[str] = [] + first = push_thinking(recent, "Reading schema.yaml") + second = push_thinking(recent, "Reading schema.yaml") + third = push_thinking(recent, "Running Grep…") + + assert first is not None + assert second is None + assert third is not None + assert recent[-1] == "Running Grep…" + + +def test_thinking_events_from_assistant_message_tool_and_reasoning_blocks() -> None: + recent = initial_sdk_thinking_lines(auth_mode="Vertex AI", ui_mode="initial-schema-design") + message = FakeAssistantMessage( + content=[ + FakeThinkingBlock(thinking="Need to inspect entity ontology first."), + FakeToolUseBlock(name="Read", input={"file_path": "/workspace/entity_ontology.json"}), + FakeTextBlock(text="I reviewed the ontology and found three entity types."), + ], + ) + + events, _ = thinking_events_from_sdk_message( + message, + recent=recent, + reply_parts=[], + last_compose_at=0, + compose_step=10, + ) + + assert events + assert any("Reasoning" in line for line in events[-1]["recent"]) + assert any("Reading /workspace/entity_ontology.json" in line for line in events[-1]["recent"]) + + +def test_thinking_events_from_task_progress_message() -> None: + recent = initial_sdk_thinking_lines(auth_mode="Vertex AI", ui_mode="initial-schema-design") + message = FakeTaskProgressMessage( + task_id="task-1", + description="Inspecting repository files", + last_tool_name="Grep", + usage={"total_tokens": 1, "tool_uses": 1, "duration_ms": 1}, + ) + + events, _ = thinking_events_from_sdk_message( + message, + recent=recent, + reply_parts=[], + last_compose_at=0, + ) + + assert events + joined = "\n".join(events[-1]["recent"]) + assert "Inspecting repository files" in joined + assert "Running Grep" in joined diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index dbd43ee16..1bd788399 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -91,10 +91,10 @@ const runtimeActivityTitle = computed(() => : 'Thinking...', ) -const thinkingDisplaySlots = computed(() => { +const thinkingDisplayLines = computed(() => { const src = props.activityLines.filter(Boolean) if (src.length === 0) return [''] - return src.slice(-3) + return src }) function isUserRole(role: string | undefined): boolean { @@ -323,9 +323,9 @@ onMounted(() => { <Loader2 class="size-4 shrink-0 animate-spin text-primary" aria-hidden="true" /> <span class="font-medium tracking-tight">{{ runtimeActivityTitle }}</span> </div> - <ol class="m-0 list-none space-y-2 border-l-2 border-primary/25 pl-3"> + <ol class="m-0 max-h-48 list-none space-y-2 overflow-y-auto border-l-2 border-primary/25 pl-3"> <li - v-for="(line, lineIdx) in thinkingDisplaySlots" + v-for="(line, lineIdx) in thinkingDisplayLines" :key="`${lineIdx}-${line || 'empty'}`" class="flex gap-2 text-xs leading-snug" > diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 845ff3b9e..49e66a397 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -972,7 +972,10 @@ async function warmupAssistantRuntime() { })) { if (generation !== runtimeWarmupGeneration) return if (event.type === 'thinking' && Array.isArray(event.recent)) { - sessionActivityLines.value = event.recent.filter(Boolean) + const recent = event.recent.filter(Boolean) + sessionActivityLines.value = recent.length > 0 + ? recent + : sessionActivityLines.value } if (event.type === 'wait' && event.message) { sessionActivityLines.value = [event.message] @@ -1047,7 +1050,10 @@ async function sendChatMessage(message: string) { message: trimmed, })) { if (event.type === 'thinking' && Array.isArray(event.recent)) { - sessionActivityLines.value = event.recent.filter(Boolean) + const recent = event.recent.filter(Boolean) + sessionActivityLines.value = recent.length > 0 + ? recent + : sessionActivityLines.value } if (event.type === 'wait') { sessionActivityLines.value = event.message diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 7cfde7900..6bea76b83 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -367,7 +367,7 @@ describe('Shared conversation panel - extraction UX contract', () => { }) it('renders bubble chat, thinking state, and auto-scroll', () => { - expect(sharedConversationPanelVue).toContain('thinkingDisplaySlots') + expect(sharedConversationPanelVue).toContain('thinkingDisplayLines') expect(sharedConversationPanelVue).toContain('chatScrollRef') expect(sharedConversationPanelVue).toContain('renderAssistantHtml') expect(sharedConversationPanelVue).toContain('scrollToBottom') From cfad11f6acd320a9244e8b184359113304f72ea3 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 31 May 2026 22:31:39 -0400 Subject: [PATCH 077/153] fix(extraction): materialize non-empty prepared sources for agent workspaces Ensure ingest-only prepares full-branch JobPackages and only materialize packages that contain repository content so Graph Management sessions can reliably read repo files. Add workspace source indexing plus prompt/thinking updates so the agent reports accurate available files and tools. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/executor.py | 44 ++++++- .../thinking_stream.py | 2 +- src/agent-runtime/tests/test_executor.py | 34 ++++++ src/api/extraction/dependencies.py | 5 +- .../prepared_job_package_reader.py | 66 +++++++++-- .../sticky_session_workdir_materializer.py | 50 +++++++- .../application/services/ingestion_service.py | 14 ++- .../ingestion/infrastructure/event_handler.py | 19 ++++ .../job_package_archive_reader.py | 35 ++++-- .../presentation/data_sources/routes.py | 15 ++- .../job_package/archive_availability.py | 11 ++ .../test_prepared_job_package_reader.py | 107 ++++++++++++++++++ ...est_sticky_session_workdir_materializer.py | 51 +++++++++ .../application/test_ingestion_service.py | 26 +++++ .../test_ingestion_event_handler.py | 4 + .../presentation/test_data_sources_routes.py | 18 ++- .../job_package/test_archive_availability.py | 10 ++ 17 files changed, 475 insertions(+), 36 deletions(-) create mode 100644 src/api/tests/unit/extraction/infrastructure/test_prepared_job_package_reader.py diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index e00879fba..e96be5f1a 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -37,15 +37,54 @@ def _build_system_prompt( def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: + import json from pathlib import Path root = Path(settings.workspace_dir) + index_path = root / "sources-index.json" + if index_path.is_file(): + try: + index = json.loads(index_path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + index = None + sources = index.get("sources") if isinstance(index, dict) else None + if isinstance(sources, list) and sources: + lines = [ + "## Session workspace", + f"Workspace mount: `{settings.workspace_dir}`", + ( + "Prepared repository files live under " + "`repository-files/<job_package_id>/` relative to the workspace mount. " + "Use Read, Grep, and Glob tools against those paths." + ), + ] + for source in sources[:12]: + if not isinstance(source, dict): + continue + package_id = str(source.get("job_package_id") or "?") + entry_count = source.get("entry_count", 0) + repository_root = str( + source.get("repository_root") or f"repository-files/{package_id}" + ) + data_source_id = str(source.get("data_source_id") or "?") + lines.append( + f"- `{repository_root}`: {entry_count} file(s) " + f"(data source `{data_source_id}`)" + ) + sample_paths = source.get("sample_paths") + if isinstance(sample_paths, list): + for path in sample_paths[:6]: + if path: + lines.append(f" - `{path}`") + return "\n".join(lines) + repo_root = root / "repository-files" if not repo_root.is_dir(): return ( f"## Session workspace\n" f"Workspace mount: `{settings.workspace_dir}`\n" - "No prepared JobPackage repository files are materialized yet." + "No prepared JobPackage repository files are materialized yet. " + "Prepare data sources under Graph Management → Data sources." ) package_dirs = sorted(path for path in repo_root.iterdir() if path.is_dir()) @@ -54,8 +93,7 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: f"## Session workspace\n" f"Workspace mount: `{settings.workspace_dir}`\n" "Prepared data sources exist, but repository files have not been extracted yet. " - "If the user asks about repository content, explain that ingestion context may " - "need to be re-prepared under Data sources." + "Re-prepare data sources under Graph Management → Data sources." ) lines = [ diff --git a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py index f474a0858..b215b859d 100644 --- a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py +++ b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py @@ -170,6 +170,6 @@ def initial_sdk_thinking_lines(*, auth_mode: str, ui_mode: str) -> list[str]: return [ f"Claude Agent SDK query started ({auth_mode})…", f"Mode overlay: {ui_mode}", - "Tools: graph read enclave, mutation emitter", + "Tools: Read, Grep, Glob on workspace repository-files", "Connected — working on your message…", ] diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index 865513b8c..1af437dd2 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from pathlib import Path import pytest @@ -14,6 +15,39 @@ from kartograph_agent_runtime.settings import AgentRuntimeSettings +def test_build_workspace_prompt_appendix_prefers_sources_index(tmp_path: Path) -> None: + package_id = "pkg-1" + package_root = tmp_path / "repository-files" / package_id / "pkg" / "api" + package_root.mkdir(parents=True) + (package_root / "adapter_status_types_test.go").write_text("package api\n", encoding="utf-8") + (tmp_path / "sources-index.json").write_text( + json.dumps( + { + "version": 1, + "knowledge_graph_id": "kg-1", + "sources": [ + { + "job_package_id": package_id, + "data_source_id": "ds-hyperfleet-api", + "entry_count": 142, + "repository_root": f"repository-files/{package_id}", + "sample_paths": ["pkg/api/adapter_status_types_test.go"], + } + ], + } + ), + encoding="utf-8", + ) + + appendix = _build_workspace_prompt_appendix( + AgentRuntimeSettings(KARTOGRAPH_WORKSPACE_DIR=str(tmp_path)) + ) + + assert "ds-hyperfleet-api" in appendix + assert "142 file(s)" in appendix + assert "pkg/api/adapter_status_types_test.go" in appendix + + def test_build_workspace_prompt_appendix_lists_materialized_repository_files( tmp_path: Path, ) -> None: diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index 343c8fcd8..fbd2387fe 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -104,7 +104,10 @@ def get_extraction_chat_turn_service( ) bootstrap_builder = StickySessionBootstrapBuilder( credential_issuer=get_workload_credential_issuer(), - prepared_job_package_reader=SqlPreparedJobPackageReader(session=session), + prepared_job_package_reader=SqlPreparedJobPackageReader( + session=session, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ), workdir_materializer=StickySessionWorkdirMaterializer( job_package_work_dir=Path(runtime_settings.job_package_work_dir), ), diff --git a/src/api/extraction/infrastructure/prepared_job_package_reader.py b/src/api/extraction/infrastructure/prepared_job_package_reader.py index b99684894..1265dcf94 100644 --- a/src/api/extraction/infrastructure/prepared_job_package_reader.py +++ b/src/api/extraction/infrastructure/prepared_job_package_reader.py @@ -2,15 +2,26 @@ from __future__ import annotations +from pathlib import Path + from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession +from shared_kernel.job_package.reader import JobPackageReader +from shared_kernel.job_package.value_objects import JobPackageId + class SqlPreparedJobPackageReader: - """Reads latest prepared JobPackage ids from outbox events for one knowledge graph.""" + """Reads latest materializable JobPackage ids from outbox events for one KG.""" - def __init__(self, *, session: AsyncSession) -> None: + def __init__( + self, + *, + session: AsyncSession, + job_package_work_dir: Path, + ) -> None: self._session = session + self._job_package_work_dir = job_package_work_dir async def list_latest_for_knowledge_graph( self, *, knowledge_graph_id: str @@ -18,8 +29,10 @@ async def list_latest_for_knowledge_graph( result = await self._session.execute( text( """ - SELECT DISTINCT ON (payload->>'data_source_id') - payload->>'job_package_id' AS job_package_id + SELECT + payload->>'data_source_id' AS data_source_id, + payload->>'job_package_id' AS job_package_id, + occurred_at FROM outbox WHERE event_type IN ('IngestionPrepared', 'JobPackageProduced') AND payload->>'knowledge_graph_id' = :knowledge_graph_id @@ -29,9 +42,42 @@ async def list_latest_for_knowledge_graph( ), {"knowledge_graph_id": knowledge_graph_id}, ) - package_ids = tuple( - str(row.job_package_id) - for row in result - if row.job_package_id is not None and str(row.job_package_id).strip() - ) - return package_ids + rows = result.fetchall() + + by_source: dict[str, list] = {} + for row in rows: + data_source_id = str(row.data_source_id or "").strip() + if not data_source_id: + continue + by_source.setdefault(data_source_id, []).append(row) + + selected: list[str] = [] + for data_source_id in sorted(by_source): + package_id = self._first_materializable_package_id( + rows=by_source[data_source_id], + ) + if package_id is not None: + selected.append(package_id) + + return tuple(selected) + + def _first_materializable_package_id(self, *, rows) -> str | None: + for row in rows: + package_id = str(row.job_package_id or "").strip() + if not package_id: + continue + if self._package_has_repository_content(package_id): + return package_id + return None + + def _package_has_repository_content(self, package_id: str) -> bool: + archive_path = self._job_package_work_dir / JobPackageId( + value=package_id + ).archive_name() + if not archive_path.is_file(): + return False + try: + manifest = JobPackageReader(archive_path).read_manifest() + except (OSError, ValueError): + return False + return manifest.entry_count > 0 diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index 74a0ef8c9..bac5f08f3 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from pathlib import Path import shutil import zipfile @@ -10,6 +11,8 @@ from shared_kernel.job_package.reader import JobPackageReader from shared_kernel.job_package.value_objects import JobPackageId +_WORKSPACE_INDEX_FILENAME = "sources-index.json" + def _replace_directory(path: Path) -> None: """Replace a directory tree without removing its parent mount point.""" @@ -44,10 +47,16 @@ def prepare( if job_package_ids is None else job_package_ids ) + index_sources: list[dict[str, object]] = [] for package_id in discovered: archive_path = self._job_package_work_dir / JobPackageId(value=package_id).archive_name() if not archive_path.exists(): continue + reader = JobPackageReader(archive_path) + manifest = reader.read_manifest() + if manifest.entry_count <= 0: + continue + package_dir = ingestion_context_dir / package_id package_dir.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(archive_path) as archive: @@ -55,7 +64,7 @@ def prepare( validate_zip_entry_name(entry_name) archive.extract(entry_name, path=package_dir) - reader = JobPackageReader(archive_path) + sample_paths: list[str] = [] for change in reader.iter_changeset(): if change.content_ref is None or not change.path: continue @@ -63,9 +72,27 @@ def prepare( output_path = repository_files_dir / package_id / change.path output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_bytes(reader.read_content(change.content_ref)) + if len(sample_paths) < 8: + sample_paths.append(change.path) + + index_sources.append( + { + "job_package_id": package_id, + "data_source_id": manifest.data_source_id, + "entry_count": manifest.entry_count, + "sync_mode": str(manifest.sync_mode), + "repository_root": f"repository-files/{package_id}", + "sample_paths": sample_paths, + } + ) marker = session_root / "knowledge-graph-id" marker.write_text(knowledge_graph_id, encoding="utf-8") + self._write_workspace_index( + session_root=session_root, + knowledge_graph_id=knowledge_graph_id, + sources=index_sources, + ) return session_root def _discover_job_package_ids(self) -> tuple[str, ...]: @@ -75,3 +102,24 @@ def _discover_job_package_ids(self) -> tuple[str, ...]: if stem: package_ids.append(stem) return tuple(package_ids) + + def _write_workspace_index( + self, + *, + session_root: Path, + knowledge_graph_id: str, + sources: list[dict[str, object]], + ) -> None: + index_path = session_root / _WORKSPACE_INDEX_FILENAME + index_path.write_text( + json.dumps( + { + "version": 1, + "knowledge_graph_id": knowledge_graph_id, + "sources": sources, + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) diff --git a/src/api/ingestion/application/services/ingestion_service.py b/src/api/ingestion/application/services/ingestion_service.py index 3abfd9472..a9dd1892f 100644 --- a/src/api/ingestion/application/services/ingestion_service.py +++ b/src/api/ingestion/application/services/ingestion_service.py @@ -61,6 +61,7 @@ async def run( tenant_id: str | None = None, credentials: dict[str, str] | None = None, baseline_commit: str | None = None, + pipeline_mode: str = "full", ) -> IngestionRunResult: """Run the ingestion pipeline for a data source sync. @@ -103,25 +104,30 @@ async def run( ) checkpoint = None - if baseline_commit: + sync_mode = SyncMode.INCREMENTAL + if pipeline_mode == "ingest_only": + # Graph-management prepare must snapshot the full branch so the sticky + # session workspace contains every repository file, not just deltas. + sync_mode = SyncMode.FULL_REFRESH + elif baseline_commit: checkpoint = AdapterCheckpoint( schema_version="1.0.0", data={"commit_sha": baseline_commit}, ) - + # Extract raw items from the adapter using the new ExtractionResult API result = await adapter.extract( connection_config=connection_config, credentials=resolved_credentials, checkpoint=checkpoint, - sync_mode=SyncMode.INCREMENTAL, + sync_mode=sync_mode, ) # Build the JobPackage builder = JobPackageBuilder( data_source_id=data_source_id, knowledge_graph_id=knowledge_graph_id, - sync_mode=SyncMode.INCREMENTAL, + sync_mode=sync_mode, ) # Register content blobs (deduplication is handled by the builder) diff --git a/src/api/ingestion/infrastructure/event_handler.py b/src/api/ingestion/infrastructure/event_handler.py index ceec6fd32..b0adbc576 100644 --- a/src/api/ingestion/infrastructure/event_handler.py +++ b/src/api/ingestion/infrastructure/event_handler.py @@ -142,6 +142,7 @@ async def handle( tenant_id=payload.get("tenant_id"), credentials=runtime_credentials or payload.get("credentials"), baseline_commit=payload.get("baseline_commit"), + pipeline_mode=pipeline_mode, ) except asyncio.CancelledError: # Propagate task cancellation so the event loop can shut down @@ -165,6 +166,23 @@ async def handle( # Ingestion succeeded — append success event outside the try block so # that an outbox write failure is not misclassified as IngestionFailed. if ingest_only: + if ingestion_result.entry_count == 0: + await self._outbox.append( + event_type="IngestionPrepared", + payload={ + "sync_run_id": sync_run_id, + "data_source_id": data_source_id, + "knowledge_graph_id": knowledge_graph_id, + "no_changes_detected": True, + "prepared_commit_sha": ingestion_result.prepared_commit_sha, + "changeset_entry_count": 0, + "occurred_at": now.isoformat(), + }, + occurred_at=now, + aggregate_type="sync_run", + aggregate_id=sync_run_id, + ) + return await self._outbox.append( event_type="IngestionPrepared", payload={ @@ -174,6 +192,7 @@ async def handle( "job_package_id": str(ingestion_result.job_package_id), "prepared_commit_sha": ingestion_result.prepared_commit_sha, "prepared_file_count": ingestion_result.branch_file_count, + "changeset_entry_count": ingestion_result.entry_count, "occurred_at": now.isoformat(), }, occurred_at=now, diff --git a/src/api/management/infrastructure/job_package_archive_reader.py b/src/api/management/infrastructure/job_package_archive_reader.py index 00e4d678d..9499ac5b4 100644 --- a/src/api/management/infrastructure/job_package_archive_reader.py +++ b/src/api/management/infrastructure/job_package_archive_reader.py @@ -1,16 +1,22 @@ -"""Read latest JobPackage identifiers for data source archive availability checks.""" +"""Read latest materializable JobPackage identifiers for archive availability checks.""" from __future__ import annotations +from pathlib import Path + from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession +from shared_kernel.job_package.reader import JobPackageReader +from shared_kernel.job_package.value_objects import JobPackageId + class SqlJobPackageArchiveReader: - """Resolve the latest JobPackage id emitted for one data source.""" + """Resolve the latest non-empty JobPackage id emitted for one data source.""" - def __init__(self, *, session: AsyncSession) -> None: + def __init__(self, *, session: AsyncSession, job_package_work_dir: Path) -> None: self._session = session + self._job_package_work_dir = job_package_work_dir async def latest_job_package_id_for_data_source( self, *, data_source_id: str @@ -24,13 +30,24 @@ async def latest_job_package_id_for_data_source( AND payload->>'data_source_id' = :data_source_id AND payload->>'job_package_id' IS NOT NULL ORDER BY occurred_at DESC - LIMIT 1 """ ), {"data_source_id": data_source_id}, ) - row = result.one_or_none() - if row is None or row.job_package_id is None: - return None - package_id = str(row.job_package_id).strip() - return package_id or None + for row in result.fetchall(): + package_id = str(row.job_package_id or "").strip() + if package_id and self._package_has_repository_content(package_id): + return package_id + return None + + def _package_has_repository_content(self, package_id: str) -> bool: + archive_path = self._job_package_work_dir / JobPackageId( + value=package_id + ).archive_name() + if not archive_path.is_file(): + return False + try: + manifest = JobPackageReader(archive_path).read_manifest() + except (OSError, ValueError): + return False + return manifest.entry_count > 0 diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index 33ce5aa11..c8057ae0c 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -8,9 +8,6 @@ from fastapi import APIRouter, Depends, HTTPException, Query, status from sqlalchemy.ext.asyncio import AsyncSession -from extraction.infrastructure.workload_runtime_settings import ( - get_extraction_workload_runtime_settings, -) from iam.application.value_objects import CurrentUser from iam.dependencies.user import get_current_user from infrastructure.database.dependencies import get_write_session @@ -28,7 +25,10 @@ from management.infrastructure.job_package_archive_reader import SqlJobPackageArchiveReader from management.ports.exceptions import UnauthorizedError from management.ports.repositories import IDataSourceSyncRunRepository -from shared_kernel.job_package.archive_availability import job_package_archive_exists +from shared_kernel.job_package.archive_availability import ( + job_package_archive_exists, + job_package_work_dir, +) from management.presentation.data_sources.models import ( CreateDataSourceRequest, DataSourceDiffSummaryResponse, @@ -295,8 +295,11 @@ async def list_data_sources( user_id=current_user.user_id.value, kg_id=kg_id, ) - archive_reader = SqlJobPackageArchiveReader(session=session) - work_dir = get_extraction_workload_runtime_settings().job_package_work_dir + archive_reader = SqlJobPackageArchiveReader( + session=session, + job_package_work_dir=job_package_work_dir(), + ) + work_dir = job_package_work_dir() responses: list[DataSourceResponse] = [] for ds in data_sources: response = DataSourceResponse.from_domain(ds) diff --git a/src/api/shared_kernel/job_package/archive_availability.py b/src/api/shared_kernel/job_package/archive_availability.py index 1f8d484da..3bc7fd849 100644 --- a/src/api/shared_kernel/job_package/archive_availability.py +++ b/src/api/shared_kernel/job_package/archive_availability.py @@ -2,11 +2,22 @@ from __future__ import annotations +import os from pathlib import Path from shared_kernel.job_package.value_objects import JobPackageId +def job_package_work_dir() -> Path: + """Return the configured on-disk directory for JobPackage ZIP archives.""" + return Path( + os.getenv( + "KARTOGRAPH_EXTRACTION_RUNTIME_JOB_PACKAGE_WORK_DIR", + "/tmp/kartograph/job_packages", + ) + ) + + def job_package_archive_path(*, work_dir: Path, job_package_id: str) -> Path: """Return the expected on-disk path for one JobPackage archive.""" return work_dir / JobPackageId(value=job_package_id).archive_name() diff --git a/src/api/tests/unit/extraction/infrastructure/test_prepared_job_package_reader.py b/src/api/tests/unit/extraction/infrastructure/test_prepared_job_package_reader.py new file mode 100644 index 000000000..320f91146 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_prepared_job_package_reader.py @@ -0,0 +1,107 @@ +"""Unit tests for SqlPreparedJobPackageReader.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader +from shared_kernel.job_package.builder import JobPackageBuilder +from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, + ChangeOperation, + ChangesetEntry, + ContentRef, + JobPackageId, + SyncMode, +) + + +def _build_package(work_dir: Path, package_id: str, *, with_file: bool) -> None: + builder = JobPackageBuilder( + data_source_id="ds-1", + knowledge_graph_id="kg-1", + sync_mode=SyncMode.FULL_REFRESH, + package_id=JobPackageId(value=package_id), + ) + if with_file: + content = b"print('hello')\n" + ref = builder.add_content(content) + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id="file-1", + type="io.kartograph.change.file", + path="pkg/api/example.go", + content_ref=ref, + content_type="text/plain", + metadata={}, + ) + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "abc"})) + builder.build(work_dir) + + +def _mock_session(rows: list) -> AsyncMock: + result = MagicMock() + result.fetchall.return_value = rows + session = AsyncMock() + session.execute = AsyncMock(return_value=result) + return session + + +@pytest.mark.asyncio +class TestSqlPreparedJobPackageReader: + async def test_prefers_latest_non_empty_job_package_per_data_source( + self, tmp_path: Path + ) -> None: + empty_id = "01JEMPTY000000000000000000" + full_id = "01JFULL0000000000000000000" + _build_package(tmp_path, empty_id, with_file=False) + _build_package(tmp_path, full_id, with_file=True) + + rows = [ + MagicMock( + data_source_id="ds-1", + job_package_id=empty_id, + occurred_at="2026-05-31T12:00:00Z", + ), + MagicMock( + data_source_id="ds-1", + job_package_id=full_id, + occurred_at="2026-05-31T11:00:00Z", + ), + ] + reader = SqlPreparedJobPackageReader( + session=_mock_session(rows), + job_package_work_dir=tmp_path, + ) + + package_ids = await reader.list_latest_for_knowledge_graph( + knowledge_graph_id="kg-1", + ) + + assert package_ids == (full_id,) + + async def test_skips_data_source_when_all_packages_are_empty(self, tmp_path: Path) -> None: + empty_id = "01JEMPTY000000000000000000" + _build_package(tmp_path, empty_id, with_file=False) + rows = [ + MagicMock( + data_source_id="ds-1", + job_package_id=empty_id, + occurred_at="2026-05-31T12:00:00Z", + ), + ] + reader = SqlPreparedJobPackageReader( + session=_mock_session(rows), + job_package_work_dir=tmp_path, + ) + + package_ids = await reader.list_latest_for_knowledge_graph( + knowledge_graph_id="kg-1", + ) + + assert package_ids == () diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index 70f96778a..f9332d126 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from pathlib import Path from shared_kernel.job_package.builder import JobPackageBuilder @@ -73,6 +74,56 @@ def test_materializer_does_not_discover_archives_when_package_ids_empty(tmp_path assert not any((session_root / "repository-files").iterdir()) +def _build_empty_package(work_dir: Path, package_id: str) -> None: + builder = JobPackageBuilder( + data_source_id="ds-empty", + knowledge_graph_id="kg-1", + sync_mode=SyncMode.INCREMENTAL, + package_id=JobPackageId(value=package_id), + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "abc"})) + builder.build(work_dir) + + +def test_materializer_skips_empty_job_packages(tmp_path: Path) -> None: + empty_id = "01JEMPTY000000000000000000" + full_id = "01JTESTPACK0000000000000003" + _build_empty_package(tmp_path, empty_id) + _build_package(tmp_path, full_id) + materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) + + session_root = materializer.prepare( + session_id="session-empty", + knowledge_graph_id="kg-1", + job_package_ids=(empty_id, full_id), + ) + + assert not (session_root / "repository-files" / empty_id).exists() + assert (session_root / "repository-files" / full_id / "README.md").exists() + + +def test_materializer_writes_sources_index(tmp_path: Path) -> None: + package_id = "01JTESTPACK0000000000000004" + _build_package(tmp_path, package_id) + materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) + + session_root = materializer.prepare( + session_id="session-index", + knowledge_graph_id="kg-1", + job_package_ids=(package_id,), + ) + + index_path = session_root / "sources-index.json" + assert index_path.is_file() + payload = json.loads(index_path.read_text(encoding="utf-8")) + assert payload["knowledge_graph_id"] == "kg-1" + assert len(payload["sources"]) == 1 + source = payload["sources"][0] + assert source["job_package_id"] == package_id + assert source["entry_count"] == 1 + assert source["sample_paths"] == ["README.md"] + + def test_materializer_refresh_preserves_session_root_directory(tmp_path: Path) -> None: package_id = "01JTESTPACK0000000000000002" _build_package(tmp_path, package_id) diff --git a/src/api/tests/unit/ingestion/application/test_ingestion_service.py b/src/api/tests/unit/ingestion/application/test_ingestion_service.py index 8da91a1d9..17311b46f 100644 --- a/src/api/tests/unit/ingestion/application/test_ingestion_service.py +++ b/src/api/tests/unit/ingestion/application/test_ingestion_service.py @@ -60,6 +60,7 @@ def __init__( self._result = result self._fail = fail self.last_checkpoint: AdapterCheckpoint | None = None + self.last_sync_mode: SyncMode | None = None self.last_credentials: dict[str, str] | None = None async def extract( @@ -70,6 +71,7 @@ async def extract( sync_mode: SyncMode, ) -> ExtractionResult: self.last_checkpoint = checkpoint + self.last_sync_mode = sync_mode self.last_credentials = credentials if self._fail: raise RuntimeError("credentials expired") @@ -211,3 +213,27 @@ async def test_run_uses_baseline_commit_as_checkpoint(self): assert adapter.last_checkpoint is not None assert adapter.last_checkpoint.data == {"commit_sha": "abc123"} + + async def test_ingest_only_uses_full_refresh_and_ignores_baseline(self): + """Prepare-for-agent runs must snapshot the full branch, not an empty delta.""" + result = _make_extraction_result() + adapter = _FakeAdapter(result=result) + registry: dict[str, IDatasourceAdapter] = {"github": adapter} + with tempfile.TemporaryDirectory() as tmpdir: + service = IngestionService( + adapter_registry=registry, + work_dir=Path(tmpdir), + ) + await service.run( + sync_run_id="run-001", + data_source_id="ds-001", + knowledge_graph_id="kg-001", + adapter_type="github", + connection_config={"repo": "org/repo"}, + credentials_path=None, + baseline_commit="abc123", + pipeline_mode="ingest_only", + ) + + assert adapter.last_sync_mode == SyncMode.FULL_REFRESH + assert adapter.last_checkpoint is None diff --git a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py index 07ac2d446..0c6e8336e 100644 --- a/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py +++ b/src/api/tests/unit/ingestion/infrastructure/test_ingestion_event_handler.py @@ -70,6 +70,7 @@ async def run( tenant_id: str | None = None, credentials: dict[str, str] | None = None, baseline_commit: str | None = None, + pipeline_mode: str = "full", ) -> IngestionRunResult: self.calls.append( { @@ -79,6 +80,7 @@ async def run( "adapter_type": adapter_type, "credentials": credentials, "baseline_commit": baseline_commit, + "pipeline_mode": pipeline_mode, } ) if self._fail: @@ -320,6 +322,7 @@ async def run( # type: ignore[override] tenant_id: str | None = None, credentials: dict[str, str] | None = None, baseline_commit: str | None = None, + pipeline_mode: str = "full", ) -> JobPackageId: raise RuntimeError( "github auth failed for token ghp_1234567890abcdef1234567890abcdef1234" @@ -439,6 +442,7 @@ async def run( # type: ignore[override] tenant_id: str | None = None, credentials: dict[str, str] | None = None, baseline_commit: str | None = None, + pipeline_mode: str = "full", ) -> JobPackageId: raise asyncio.CancelledError() diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index 52a92ec82..0a17d7d71 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -7,7 +7,7 @@ from __future__ import annotations from datetime import UTC, datetime -from unittest.mock import AsyncMock +from unittest.mock import AsyncMock, MagicMock import pytest from fastapi import FastAPI, status @@ -104,6 +104,16 @@ def sample_sync_run(sample_data_source: DataSource) -> DataSourceSyncRun: ) +@pytest.fixture +def mock_write_session() -> AsyncMock: + """Mock write DB session for JobPackage archive lookups.""" + session = AsyncMock() + result = MagicMock() + result.fetchall.return_value = [] + session.execute = AsyncMock(return_value=result) + return session + + @pytest.fixture def test_client( mock_ds_service: AsyncMock, @@ -111,9 +121,11 @@ def test_client( mock_diff_summary_service: AsyncMock, mock_commit_reference_service: AsyncMock, mock_current_user: CurrentUser, + mock_write_session: AsyncMock, ) -> TestClient: """Create TestClient with mocked dependencies.""" from iam.dependencies.user import get_current_user + from infrastructure.database.dependencies import get_write_session from management.dependencies.data_source import ( get_data_source_service, get_git_commit_reference_service, @@ -124,6 +136,9 @@ def test_client( app = FastAPI() + async def _override_write_session(): + yield mock_write_session + app.dependency_overrides[get_data_source_service] = lambda: mock_ds_service app.dependency_overrides[get_sync_run_repository] = lambda: mock_sync_run_repo app.dependency_overrides[get_git_diff_summary_service] = ( @@ -133,6 +148,7 @@ def test_client( lambda: mock_commit_reference_service ) app.dependency_overrides[get_current_user] = lambda: mock_current_user + app.dependency_overrides[get_write_session] = _override_write_session app.include_router(router) diff --git a/src/api/tests/unit/shared_kernel/job_package/test_archive_availability.py b/src/api/tests/unit/shared_kernel/job_package/test_archive_availability.py index 60d82e745..bd4ac7e0c 100644 --- a/src/api/tests/unit/shared_kernel/job_package/test_archive_availability.py +++ b/src/api/tests/unit/shared_kernel/job_package/test_archive_availability.py @@ -4,6 +4,8 @@ from pathlib import Path +import pytest + from shared_kernel.job_package.archive_availability import job_package_archive_exists from shared_kernel.job_package.builder import JobPackageBuilder from shared_kernel.job_package.value_objects import ( @@ -16,6 +18,14 @@ ) +def test_job_package_work_dir_defaults_to_tmp_path(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("KARTOGRAPH_EXTRACTION_RUNTIME_JOB_PACKAGE_WORK_DIR", raising=False) + + from shared_kernel.job_package.archive_availability import job_package_work_dir + + assert job_package_work_dir() == Path("/tmp/kartograph/job_packages") + + def test_job_package_archive_exists_when_file_present(tmp_path: Path) -> None: package_id = "01JTESTPACK0000000000000099" content_bytes = b"# hello\n" From 444c2eb839aff9665d5e620a93b2510800f16ceb Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 31 May 2026 23:02:39 -0400 Subject: [PATCH 078/153] perf(ingestion): parallelize prepare sync processing Process SyncStarted outbox events with bounded concurrency and fetch GitHub blobs in parallel to reduce ingestion-context preparation time for multi-source batches. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/api/infrastructure/outbox/worker.py | 64 +++++++++++++++++-- src/api/infrastructure/settings.py | 8 +++ .../infrastructure/adapters/github.py | 55 ++++++++++------ src/api/main.py | 6 +- .../unit/infrastructure/outbox/test_worker.py | 46 +++++++++++++ .../adapters/test_github_adapter.py | 55 ++++++++++++++++ 6 files changed, 207 insertions(+), 27 deletions(-) diff --git a/src/api/infrastructure/outbox/worker.py b/src/api/infrastructure/outbox/worker.py index ee9a6acd9..a36412d7e 100644 --- a/src/api/infrastructure/outbox/worker.py +++ b/src/api/infrastructure/outbox/worker.py @@ -50,6 +50,7 @@ def __init__( poll_interval_seconds: int = 30, batch_size: int = 100, max_retries: int = 5, + sync_started_max_concurrency: int = 1, ) -> None: """Initialize the worker. @@ -63,6 +64,8 @@ def __init__( poll_interval_seconds: How often to poll for missed events batch_size: Maximum entries to process per batch max_retries: Maximum retry attempts before moving to DLQ + sync_started_max_concurrency: Maximum parallel SyncStarted handlers + per batch. Other events remain serial to preserve lifecycle order. """ if session_factory is None: raise ValueError("session_factory is required") @@ -79,6 +82,11 @@ def __init__( raise ValueError(f"batch_size must be positive, got {batch_size}") if max_retries < 0: raise ValueError(f"max_retries must be non-negative, got {max_retries}") + if sync_started_max_concurrency <= 0: + raise ValueError( + "sync_started_max_concurrency must be positive, " + f"got {sync_started_max_concurrency}" + ) self._session_factory = session_factory self._handler = handler @@ -87,6 +95,7 @@ def __init__( self._poll_interval = poll_interval_seconds self._batch_size = batch_size self._max_retries = max_retries + self._sync_started_max_concurrency = sync_started_max_concurrency self._running = False self._tasks: list[asyncio.Task[None]] = [] # Used by stop() to interrupt the poll-loop's inter-batch sleep without @@ -240,17 +249,58 @@ async def _process_entries( session: AsyncSession, ) -> None: """Process a list of entries by delegating to the event handler.""" + sync_started_block: list[OutboxEntry] = [] for entry in entries: - try: - self._probe.event_dispatching(entry.id, entry.event_type) - await self._handler.handle(entry.event_type, entry.payload) + if ( + entry.event_type == "SyncStarted" + and self._sync_started_max_concurrency > 1 + ): + sync_started_block.append(entry) + continue + + if sync_started_block: + await self._process_sync_started_block(sync_started_block, session) + sync_started_block = [] + + await self._process_entry(entry, session) + + if sync_started_block: + await self._process_sync_started_block(sync_started_block, session) + + async def _process_entry(self, entry: OutboxEntry, session: AsyncSession) -> None: + """Process one outbox entry serially.""" + try: + self._probe.event_dispatching(entry.id, entry.event_type) + await self._handler.handle(entry.event_type, entry.payload) + await self._mark_processed(entry.id, session) + self._probe.event_processed(entry.id, entry.event_type) + except Exception as e: + await self._handle_processing_failure(entry, str(e), session) + + async def _process_sync_started_block( + self, + entries: list[OutboxEntry], + session: AsyncSession, + ) -> None: + """Process contiguous SyncStarted entries with bounded parallelism.""" + semaphore = asyncio.Semaphore(self._sync_started_max_concurrency) - # Mark as processed + async def _dispatch(entry: OutboxEntry) -> Exception | None: + self._probe.event_dispatching(entry.id, entry.event_type) + try: + async with semaphore: + await self._handler.handle(entry.event_type, entry.payload) + return None + except Exception as exc: # pragma: no cover - covered via caller paths + return exc + + errors = await asyncio.gather(*(_dispatch(entry) for entry in entries)) + for entry, error in zip(entries, errors, strict=True): + if error is None: await self._mark_processed(entry.id, session) self._probe.event_processed(entry.id, entry.event_type) - - except Exception as e: - await self._handle_processing_failure(entry, str(e), session) + else: + await self._handle_processing_failure(entry, str(error), session) async def _mark_processed(self, entry_id: UUID, session: AsyncSession) -> None: """Mark an entry as successfully processed.""" diff --git a/src/api/infrastructure/settings.py b/src/api/infrastructure/settings.py index 8a2e0fb7b..1b3a62a32 100644 --- a/src/api/infrastructure/settings.py +++ b/src/api/infrastructure/settings.py @@ -247,6 +247,8 @@ class OutboxWorkerSettings(BaseSettings): KARTOGRAPH_OUTBOX_ENABLED: Enable the outbox worker (default: true) KARTOGRAPH_OUTBOX_POLL_INTERVAL_SECONDS: Poll interval in seconds (default: 30) KARTOGRAPH_OUTBOX_BATCH_SIZE: Maximum entries per batch (default: 100) + KARTOGRAPH_OUTBOX_SYNC_STARTED_MAX_CONCURRENCY: Maximum concurrent + SyncStarted handlers (default: 5) """ model_config = SettingsConfigDict( @@ -278,6 +280,12 @@ class OutboxWorkerSettings(BaseSettings): ge=1, le=100, ) + sync_started_max_concurrency: int = Field( + default=5, + description="Maximum concurrent SyncStarted handlers per outbox batch", + ge=1, + le=100, + ) @lru_cache diff --git a/src/api/ingestion/infrastructure/adapters/github.py b/src/api/ingestion/infrastructure/adapters/github.py index 03d6e5204..8374e1e16 100644 --- a/src/api/ingestion/infrastructure/adapters/github.py +++ b/src/api/ingestion/infrastructure/adapters/github.py @@ -23,6 +23,7 @@ from __future__ import annotations +import asyncio import base64 import mimetypes from typing import Any @@ -74,8 +75,16 @@ class GitHubAdapter: with a custom transport for testing. """ - def __init__(self, http_client: httpx.AsyncClient | None = None) -> None: + def __init__( + self, + http_client: httpx.AsyncClient | None = None, + *, + blob_fetch_max_concurrency: int = 16, + ) -> None: + if blob_fetch_max_concurrency <= 0: + raise ValueError("blob_fetch_max_concurrency must be positive") self._http_client = http_client + self._blob_fetch_max_concurrency = blob_fetch_max_concurrency @staticmethod def _parse_connection_config( @@ -396,42 +405,50 @@ async def _fetch_file_contents( Returns: Tuple of (list of ChangesetEntry, content_blobs dict). """ - changeset_entries: list[ChangesetEntry] = [] - content_blobs: dict[str, bytes] = {} + semaphore = asyncio.Semaphore(self._blob_fetch_max_concurrency) + loaded: dict[int, tuple[ChangesetEntry, bytes]] = {} - for file_info in files: + async def _load_file(index: int, file_info: dict[str, Any]) -> None: path: str = file_info["path"] blob_sha: str = file_info["sha"] operation: ChangeOperation = file_info["operation"] previous_path: str | None = file_info.get("previous_path") - # Fetch raw content from blob - raw_bytes = await self._fetch_blob(client, headers, owner, repo, blob_sha) + async with semaphore: + raw_bytes = await self._fetch_blob(client, headers, owner, repo, blob_sha) - # Content-address the blob by its SHA-256 digest content_ref = ContentRef.from_bytes(raw_bytes) - content_blobs[content_ref.hex_digest] = raw_bytes - - # Detect content MIME type; default to octet-stream for unknown content_type, _ = mimetypes.guess_type(path) if content_type is None: content_type = "application/octet-stream" - # Build adapter-specific metadata metadata: dict[str, Any] = {} if previous_path: metadata["previous_path"] = previous_path - entry = ChangesetEntry( - operation=operation, - id=blob_sha, - type=_ENTRY_TYPE_FILE, - path=path, - content_ref=content_ref, - content_type=content_type, - metadata=metadata, + loaded[index] = ( + ChangesetEntry( + operation=operation, + id=blob_sha, + type=_ENTRY_TYPE_FILE, + path=path, + content_ref=content_ref, + content_type=content_type, + metadata=metadata, + ), + raw_bytes, ) + + await asyncio.gather( + *(_load_file(index, file_info) for index, file_info in enumerate(files)) + ) + + changeset_entries: list[ChangesetEntry] = [] + content_blobs: dict[str, bytes] = {} + for index in range(len(files)): + entry, raw_bytes = loaded[index] changeset_entries.append(entry) + content_blobs[entry.content_ref.hex_digest] = raw_bytes return changeset_entries, content_blobs diff --git a/src/api/main.py b/src/api/main.py index 93be300ea..f34d056a9 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -209,7 +209,10 @@ async def _ingest_only_archive_available( job_package_archive_exists, ) - reader = SqlJobPackageArchiveReader(session=session) + reader = SqlJobPackageArchiveReader( + session=session, + job_package_work_dir=_JOB_PACKAGE_WORK_DIR, + ) package_id = await reader.latest_job_package_id_for_data_source( data_source_id=data_source_id, ) @@ -667,6 +670,7 @@ async def kartograph_lifespan(app: FastAPI): poll_interval_seconds=outbox_settings.poll_interval_seconds, batch_size=outbox_settings.batch_size, max_retries=outbox_settings.max_retries, + sync_started_max_concurrency=outbox_settings.sync_started_max_concurrency, ) await worker.start() app.state.outbox_worker = worker diff --git a/src/api/tests/unit/infrastructure/outbox/test_worker.py b/src/api/tests/unit/infrastructure/outbox/test_worker.py index e2ba58e56..0585a5349 100644 --- a/src/api/tests/unit/infrastructure/outbox/test_worker.py +++ b/src/api/tests/unit/infrastructure/outbox/test_worker.py @@ -176,6 +176,52 @@ async def test_marks_entry_as_processed(self): # Verify session.execute was called (for mark_processed) mock_session.execute.assert_called() + @pytest.mark.asyncio + async def test_processes_sync_started_events_with_bounded_parallelism(self): + """SyncStarted events should fan out in parallel when configured.""" + mock_session = AsyncMock() + mock_probe = MagicMock() + in_flight = 0 + max_in_flight = 0 + + async def handle(event_type: str, payload: dict) -> None: + nonlocal in_flight, max_in_flight + in_flight += 1 + max_in_flight = max(max_in_flight, in_flight) + await asyncio.sleep(0.03) + in_flight -= 1 + + mock_handler = AsyncMock() + mock_handler.handle.side_effect = handle + + entries: list[OutboxEntry] = [] + for i in range(5): + entries.append( + OutboxEntry( + id=uuid4(), + aggregate_type="sync_run", + aggregate_id=f"01SYNC{i:02d}", + event_type="SyncStarted", + payload={"sync_run_id": f"run-{i}"}, + occurred_at=datetime(2026, 1, 8, 12, 0, 0, tzinfo=UTC), + processed_at=None, + created_at=datetime(2026, 1, 8, 12, 0, 1, tzinfo=UTC), + ) + ) + + worker = OutboxWorker( + session_factory=AsyncMock(), + handler=mock_handler, + probe=mock_probe, + event_source=None, + sync_started_max_concurrency=3, + ) + + await worker._process_entries(entries, mock_session) + + assert max_in_flight >= 2 + assert max_in_flight <= 3 + class TestOutboxWorkerLifecycle: """Tests for worker start/stop lifecycle.""" diff --git a/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py b/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py index 5f90bca74..4efa93f85 100644 --- a/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py +++ b/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py @@ -22,6 +22,7 @@ from __future__ import annotations +import asyncio import base64 import json @@ -881,3 +882,57 @@ async def test_changeset_entry_type_is_file( for entry in result.changeset_entries: assert entry.type == "io.kartograph.change.file" + + @pytest.mark.asyncio + async def test_full_refresh_fetches_blobs_with_parallelism( + self, connection_config, credentials + ): + """Blob fetches should run concurrently for better throughput.""" + max_in_flight = 0 + in_flight = 0 + + files = [ + { + "path": f"src/file_{i}.py", + "type": "blob", + "sha": f"blob{i:02d}" * 5, + } + for i in range(4) + ] + + class ConcurrentBlobTransport(httpx.AsyncBaseTransport): + async def handle_async_request( + self, request: httpx.Request + ) -> httpx.Response: + nonlocal max_in_flight, in_flight + url_path = request.url.path + if url_path.endswith("/branches/main"): + data: dict = _branch_response(HEAD_SHA) + elif f"/git/trees/{HEAD_SHA}" in url_path: + data = _tree_response(files) + elif "/git/blobs/" in url_path: + in_flight += 1 + max_in_flight = max(max_in_flight, in_flight) + await asyncio.sleep(0.03) + in_flight -= 1 + data = _blob_response(b"print('hi')\n") + else: + raise RuntimeError(f"Unexpected URL: {url_path}") + return httpx.Response( + 200, + content=json.dumps(data).encode(), + headers={"content-type": "application/json"}, + ) + + client = httpx.AsyncClient(transport=ConcurrentBlobTransport()) + adapter = GitHubAdapter(http_client=client) + + result = await adapter.extract( + connection_config=connection_config, + credentials=credentials, + checkpoint=None, + sync_mode=SyncMode.FULL_REFRESH, + ) + + assert len(result.changeset_entries) == 4 + assert max_in_flight >= 2 From a34a7ea33c6814b17550eec435bdab7de4aec331 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Mon, 1 Jun 2026 13:16:20 -0400 Subject: [PATCH 079/153] feat(ui): add entity and relationship schema artifacts to graph management Expose separate schema-entities and schema-relationships rail items with readiness-driven status and detail panels so designers can track type coverage before transitioning. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../pages/knowledge-graphs/[kgId]/manage.vue | 86 ++++++++++++++++++- .../kg-graph-management-artifacts.test.ts | 4 +- .../knowledge-graph-manage-workspace.test.ts | 10 +++ src/dev-ui/app/utils/kgGraphManagement.ts | 20 +++++ .../app/utils/kgGraphManagementArtifacts.ts | 6 ++ 5 files changed, 124 insertions(+), 2 deletions(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 49e66a397..368c7bdd7 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -363,6 +363,8 @@ const graphManagementRailItems = computed(() => { transitionEligible: statusProjection.value.transition_eligible, blockingReasonCount: statusProjection.value.readiness.blocking_reasons.length, prepopulatedGapCount: statusProjection.value.readiness.prepopulated_types_without_instances.length, + hasMinimumEntityTypes: statusProjection.value.readiness.has_minimum_entity_types, + hasMinimumRelationshipTypes: statusProjection.value.readiness.has_minimum_relationship_types, sessionUpdatedAt: extractionSession.value?.updated_at ?? null, hasActiveSession: Boolean(extractionSession.value?.id), }) @@ -1863,7 +1865,89 @@ watch(selectedOpsDataSourceId, () => { </Card> <div id="graph-management-artifact-detail" class="graph-management-detail scroll-mt-6 space-y-6"> - <Card v-if="selectedRailItemId === 'schema-readiness'"> + <Card v-if="selectedRailItemId === 'schema-entities'"> + <CardHeader> + <CardTitle class="text-base flex items-center gap-2"> + <Box class="size-4" /> + Schema: Entities + </CardTitle> + <CardDescription> + Entity type coverage snapshot for + <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. + </CardDescription> + </CardHeader> + <CardContent class="space-y-3 text-sm"> + <div class="flex flex-wrap justify-end gap-2"> + <Button variant="outline" size="sm" as-child> + <NuxtLink to="/graph/schema">Open schema browser</NuxtLink> + </Button> + </div> + <div class="rounded-lg border bg-muted/30 p-3"> + <div class="flex items-center justify-between gap-2"> + <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground"> + Entity type inventory + </p> + <Badge :variant="entityTypeLabels.length > 0 ? 'default' : 'secondary'"> + {{ entityTypeLabels.length }} type(s) + </Badge> + </div> + <p + v-if="entityTypeLabels.length === 0" + class="mt-2 text-xs text-muted-foreground" + > + No entity types defined yet. Add at least one type to satisfy schema readiness. + </p> + <div v-else class="mt-2 flex flex-wrap gap-2"> + <Badge v-for="label in entityTypeLabels" :key="label" variant="outline"> + {{ label }} + </Badge> + </div> + </div> + </CardContent> + </Card> + + <Card v-else-if="selectedRailItemId === 'schema-relationships'"> + <CardHeader> + <CardTitle class="text-base flex items-center gap-2"> + <Link2 class="size-4" /> + Schema: Relationships + </CardTitle> + <CardDescription> + Relationship type coverage snapshot for + <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. + </CardDescription> + </CardHeader> + <CardContent class="space-y-3 text-sm"> + <div class="flex flex-wrap justify-end gap-2"> + <Button variant="outline" size="sm" as-child> + <NuxtLink to="/graph/schema">Open schema browser</NuxtLink> + </Button> + </div> + <div class="rounded-lg border bg-muted/30 p-3"> + <div class="flex items-center justify-between gap-2"> + <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground"> + Relationship type inventory + </p> + <Badge :variant="relationshipTypeLabels.length > 0 ? 'default' : 'secondary'"> + {{ relationshipTypeLabels.length }} type(s) + </Badge> + </div> + <p + v-if="relationshipTypeLabels.length === 0" + class="mt-2 text-xs text-muted-foreground" + > + No relationship types defined yet. Add at least one type to satisfy schema readiness. + </p> + <div v-else class="mt-2 flex flex-wrap gap-2"> + <Badge v-for="label in relationshipTypeLabels" :key="label" variant="outline"> + {{ label }} + </Badge> + </div> + </div> + </CardContent> + </Card> + + <Card v-else-if="selectedRailItemId === 'schema-readiness'"> <CardHeader> <CardTitle class="text-base flex items-center gap-2"> <CheckCircle2 class="size-4" /> diff --git a/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts b/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts index c250bd2bb..e89b76a4e 100644 --- a/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts +++ b/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts @@ -13,6 +13,8 @@ describe('kgGraphManagementArtifacts', () => { transitionEligible: false, blockingReasonCount: 1, prepopulatedGapCount: 0, + hasMinimumEntityTypes: false, + hasMinimumRelationshipTypes: false, sessionUpdatedAt: '2026-01-01', hasActiveSession: true, }) @@ -26,7 +28,7 @@ describe('kgGraphManagementArtifacts', () => { it('resolves schema selection for the active mode', () => { expect( resolveSchemaRailSelection(null, 'initial-schema-design', items), - ).toBe('schema-readiness') + ).toBe('schema-entities') expect( resolveSchemaRailSelection('session-pointers', 'extraction-jobs', items), ).toBe('extraction-jobs-setup') diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 6bea76b83..19b93d99c 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -439,6 +439,8 @@ describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { transitionEligible: false, blockingReasonCount: 1, prepopulatedGapCount: 0, + hasMinimumEntityTypes: false, + hasMinimumRelationshipTypes: false, sessionUpdatedAt: '2026-05-22T12:00:00Z', hasActiveSession: true, }) @@ -466,6 +468,8 @@ describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { transitionEligible: true, blockingReasonCount: 0, prepopulatedGapCount: 0, + hasMinimumEntityTypes: true, + hasMinimumRelationshipTypes: true, sessionUpdatedAt: null, hasActiveSession: true, }) @@ -481,6 +485,10 @@ describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { describe('KG-MANAGE-010 - schema design parity behavior', () => { it('exposes schema readiness and validation detail in initial schema design mode', () => { + expect(manageWorkspaceVue).toContain('Schema: Entities') + expect(manageWorkspaceVue).toContain('Schema: Relationships') + expect(manageWorkspaceVue).toContain("selectedRailItemId === 'schema-entities'") + expect(manageWorkspaceVue).toContain("selectedRailItemId === 'schema-relationships'") expect(manageWorkspaceVue).toContain('progressChecklist') expect(manageWorkspaceVue).toContain('Bootstrap progress checklist') expect(manageWorkspaceVue).toContain('blocking_reasons') @@ -530,6 +538,8 @@ describe('KG-MANAGE-016 - graph management top controls', () => { transitionEligible: true, blockingReasonCount: 0, prepopulatedGapCount: 0, + hasMinimumEntityTypes: true, + hasMinimumRelationshipTypes: true, sessionUpdatedAt: '2026-05-22T12:00:00Z', hasActiveSession: true, }) diff --git a/src/dev-ui/app/utils/kgGraphManagement.ts b/src/dev-ui/app/utils/kgGraphManagement.ts index 9ddecfa71..dd2f87f44 100644 --- a/src/dev-ui/app/utils/kgGraphManagement.ts +++ b/src/dev-ui/app/utils/kgGraphManagement.ts @@ -6,6 +6,8 @@ export type GraphManagementMode = | 'one-off-mutations' export type GraphManagementRailItemId = + | 'schema-entities' + | 'schema-relationships' | 'schema-readiness' | 'validation-diagnostics' | 'session-pointers' @@ -47,6 +49,8 @@ export interface GraphManagementRailInputs { transitionEligible: boolean blockingReasonCount: number prepopulatedGapCount: number + hasMinimumEntityTypes: boolean + hasMinimumRelationshipTypes: boolean sessionUpdatedAt: string | null hasActiveSession: boolean } @@ -85,6 +89,22 @@ export function buildGraphManagementRailItems( : 'in_progress' return [ + { + id: 'schema-entities', + label: 'Schema: Entities', + status: input.hasMinimumEntityTypes ? 'ready' : 'in_progress', + lastUpdated: sessionStamp, + detailHint: 'Entity type definitions and coverage snapshot.', + modes: ['initial-schema-design'], + }, + { + id: 'schema-relationships', + label: 'Schema: Relationships', + status: input.hasMinimumRelationshipTypes ? 'ready' : 'in_progress', + lastUpdated: sessionStamp, + detailHint: 'Relationship type definitions and edge coverage snapshot.', + modes: ['initial-schema-design'], + }, { id: 'schema-readiness', label: 'Schema readiness', diff --git a/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts b/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts index a93b42ad0..2e339da08 100644 --- a/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts +++ b/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts @@ -39,6 +39,12 @@ export function graphManagementArtifactRowClass(selected: boolean, done: boolean } export function graphManagementArtifactHint(item: GraphManagementRailItem): string { + if (item.id === 'schema-entities') { + return item.status === 'ready' ? 'Types available' : 'Define entities' + } + if (item.id === 'schema-relationships') { + return item.status === 'ready' ? 'Types available' : 'Define relationships' + } if (item.id === 'schema-readiness') { return item.status === 'ready' ? 'Ready to transition' : 'Bootstrap checklist' } From ab37405592d2473f27f98e63b640363064a42cac Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 00:57:37 -0400 Subject: [PATCH 080/153] fix(extraction): improve live thinking stream and raise sticky turn timeout Stream rolling three-line activity updates through warmup, SDK heartbeats, and the Graph Management UI, with unbuffered NDJSON and clearer timeout diagnostics. Increase default sticky turn timeout to 10 minutes. Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 1 + .../kartograph_agent_runtime/executor.py | 70 +++++++++++++++++-- .../kartograph_agent_runtime/server.py | 11 ++- .../kartograph_agent_runtime/settings.py | 1 + .../thinking_stream.py | 4 +- .../tests/test_thinking_stream.py | 11 +-- .../sticky_session_runtime_service.py | 65 ++++++++--------- .../application/thinking_activity.py | 24 +++++++ .../container_workload_runtime.py | 3 + .../workload_runtime_factory.py | 1 + .../workload_runtime_settings.py | 2 +- src/api/extraction/presentation/routes.py | 18 ++++- .../application/test_thinking_activity.py | 28 ++++++++ .../extraction/SharedConversationPanel.vue | 15 ++-- .../pages/knowledge-graphs/[kgId]/manage.vue | 43 +++++++----- .../knowledge-graph-manage-workspace.test.ts | 2 + .../app/tests/thinking-activity-lines.test.ts | 28 ++++++++ src/dev-ui/app/utils/thinkingActivityLines.ts | 26 +++++++ 18 files changed, 276 insertions(+), 77 deletions(-) create mode 100644 src/api/extraction/application/thinking_activity.py create mode 100644 src/api/tests/unit/extraction/application/test_thinking_activity.py create mode 100644 src/dev-ui/app/tests/thinking-activity-lines.test.ts create mode 100644 src/dev-ui/app/utils/thinkingActivityLines.ts diff --git a/compose.dev.yaml b/compose.dev.yaml index e48dc6de8..e77368646 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -23,6 +23,7 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_SKILLS_DIR: ${PWD}/skills KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_UID: ${HOST_UID} KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID: ${HOST_GID} + KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_TURN_TIMEOUT_SECONDS: "600" # Vertex AI for Claude Agent SDK in sticky assistant containers CLAUDE_CODE_USE_VERTEX: "1" ANTHROPIC_VERTEX_PROJECT_ID: itpc-gcp-hcm-pe-eng-claude diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index e96be5f1a..8151f6a04 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -10,12 +10,14 @@ from kartograph_agent_runtime.settings import AgentRuntimeSettings from kartograph_agent_runtime.thinking_stream import ( initial_sdk_thinking_lines, + push_thinking, thinking_events_from_sdk_message, ) from kartograph_agent_runtime.tools import RuntimeTooling from kartograph_agent_runtime.vertex import build_claude_agent_env -_DEFAULT_TURN_TIMEOUT_SECONDS = 180.0 +_DEFAULT_TURN_TIMEOUT_SECONDS = 600.0 +_SDK_HEARTBEAT_SECONDS = 8.0 def _build_system_prompt( @@ -159,6 +161,43 @@ def _build_sdk_env(settings: AgentRuntimeSettings) -> dict[str, str]: return env +def _timeout_error_message( + *, + settings: AgentRuntimeSettings, + auth_mode: str, + turn_timeout_seconds: float, +) -> str: + parts = [ + f"Claude Agent SDK did not complete within {int(turn_timeout_seconds)}s.", + ] + if auth_mode == "Vertex AI": + creds_path = settings.google_application_credentials.strip() + creds_present = bool(creds_path) + parts.append( + "Vertex AI " + f"project={settings.vertex_project_id.strip() or '(missing)'}, " + f"region={settings.vertex_region.strip() or '(missing)'}, " + f"ADC={'configured' if creds_present else 'missing'}." + ) + if creds_present: + from pathlib import Path + + creds_readable = Path(creds_path).is_file() + parts.append( + f"Credentials file {'readable' if creds_readable else 'not found'} at {creds_path}." + ) + else: + parts.append( + "Direct Anthropic API " + f"{'configured' if settings.anthropic_api_key.strip() else 'missing ANTHROPIC_API_KEY'}." + ) + parts.append( + "The model may still be running in the container — check sticky container logs " + "for Vertex auth or quota errors." + ) + return " ".join(parts) + + async def stream_turn_events( *, settings: AgentRuntimeSettings, @@ -175,7 +214,6 @@ async def stream_turn_events( "Starting Claude Agent SDK runtime…", f"Model backend: {auth_mode}", f"Applying {ui_mode} skill overlay", - f"Workspace mounted at {settings.workspace_dir}", ], } @@ -256,9 +294,28 @@ async def _stream_with_claude_sdk( reply: str | None = None reply_parts: list[str] = [] last_compose_at = 0 + elapsed_seconds = 0 try: async with asyncio.timeout(turn_timeout_seconds): - async for sdk_message in query(prompt=prompt, options=options): + sdk_iter = query(prompt=prompt, options=options).__aiter__() + while True: + try: + sdk_message = await asyncio.wait_for( + sdk_iter.__anext__(), + timeout=_SDK_HEARTBEAT_SECONDS, + ) + except StopAsyncIteration: + break + except TimeoutError: + elapsed_seconds += int(_SDK_HEARTBEAT_SECONDS) + heartbeat = push_thinking( + recent, + f"Waiting for model response… ({elapsed_seconds}s)", + ) + if heartbeat: + yield heartbeat + continue + thinking_events, last_compose_at = thinking_events_from_sdk_message( sdk_message, recent=recent, @@ -279,9 +336,10 @@ async def _stream_with_claude_sdk( "ok": False, "error": { "code": "AGENT_TURN_TIMEOUT", - "message": ( - f"Claude Agent SDK did not complete within {int(turn_timeout_seconds)}s. " - "Check Vertex credentials and model access for this project." + "message": _timeout_error_message( + settings=settings, + auth_mode=auth_mode, + turn_timeout_seconds=turn_timeout_seconds, ), }, } diff --git a/src/agent-runtime/kartograph_agent_runtime/server.py b/src/agent-runtime/kartograph_agent_runtime/server.py index 7a1df58ed..462b3326f 100644 --- a/src/agent-runtime/kartograph_agent_runtime/server.py +++ b/src/agent-runtime/kartograph_agent_runtime/server.py @@ -64,6 +64,7 @@ async def event_stream() -> AsyncIterator[str]: ui_mode=request.ui_mode, agent_configuration=request.agent_configuration, message_history=request.message_history, + turn_timeout_seconds=settings.turn_timeout_seconds, ): if event.get("type") == "done": logger.info( @@ -91,4 +92,12 @@ async def event_stream() -> AsyncIterator[str]: + "\n" ) - return StreamingResponse(event_stream(), media_type="application/x-ndjson") + return StreamingResponse( + event_stream(), + media_type="application/x-ndjson", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py index fd8e6048f..2eb7778b9 100644 --- a/src/agent-runtime/kartograph_agent_runtime/settings.py +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -28,6 +28,7 @@ class AgentRuntimeSettings(BaseSettings): gcloud_config_dir: str = Field(default="", alias="CLOUDSDK_CONFIG") google_application_credentials: str = Field(default="", alias="GOOGLE_APPLICATION_CREDENTIALS") home_dir: str = Field(default="/tmp", alias="HOME") + turn_timeout_seconds: float = Field(default=600.0, ge=30.0, le=900.0, alias="KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS") def vertex_enabled(self) -> bool: return vertex_enabled_from_env() diff --git a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py index b215b859d..8cae3eabe 100644 --- a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py +++ b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py @@ -4,7 +4,8 @@ from typing import Any -_MAX_THINKING_LINES = 8 +# Rolling window surfaced to the Graph Management Assistant UI (last N thoughts). +_MAX_THINKING_LINES = 3 def normalize_activity_line(text: str) -> str: @@ -170,6 +171,5 @@ def initial_sdk_thinking_lines(*, auth_mode: str, ui_mode: str) -> list[str]: return [ f"Claude Agent SDK query started ({auth_mode})…", f"Mode overlay: {ui_mode}", - "Tools: Read, Grep, Glob on workspace repository-files", "Connected — working on your message…", ] diff --git a/src/agent-runtime/tests/test_thinking_stream.py b/src/agent-runtime/tests/test_thinking_stream.py index b399c70da..4d7e719f8 100644 --- a/src/agent-runtime/tests/test_thinking_stream.py +++ b/src/agent-runtime/tests/test_thinking_stream.py @@ -49,14 +49,9 @@ def test_initial_sdk_thinking_lines_include_connected_message() -> None: def test_push_thinking_deduplicates_and_caps_recent_lines() -> None: recent: list[str] = [] - first = push_thinking(recent, "Reading schema.yaml") - second = push_thinking(recent, "Reading schema.yaml") - third = push_thinking(recent, "Running Grep…") - - assert first is not None - assert second is None - assert third is not None - assert recent[-1] == "Running Grep…" + for index in range(5): + push_thinking(recent, f"line-{index}") + assert recent == ["line-2", "line-3", "line-4"] def test_thinking_events_from_assistant_message_tool_and_reasoning_blocks() -> None: diff --git a/src/api/extraction/application/sticky_session_runtime_service.py b/src/api/extraction/application/sticky_session_runtime_service.py index e304089d8..453c43e2d 100644 --- a/src/api/extraction/application/sticky_session_runtime_service.py +++ b/src/api/extraction/application/sticky_session_runtime_service.py @@ -23,6 +23,14 @@ from extraction.ports.sticky_session_bootstrap import IStickySessionBootstrapBuilder from shared_kernel.container_runtime.ports import ContainerRuntimeError +from extraction.application.thinking_activity import thinking_event + +NDJSON_STREAM_HEADERS = { + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", +} + class StickySessionRuntimeService: """Starts sticky containers and streams transparent readiness progress.""" @@ -146,10 +154,9 @@ async def _stream_prepare_runtime( persist_session: bool, emit_terminal: bool, ) -> AsyncIterator[dict[str, Any]]: - yield { - "type": "thinking", - "recent": ["Preparing Graph Management Assistant runtime…"], - } + recent: list[str] = [] + recent, event = thinking_event(recent, "Preparing Graph Management Assistant runtime…") + yield event resolved_skills = await self._skill_resolution_service.resolve_for_graph_management_turn( knowledge_graph_id=knowledge_graph_id, @@ -184,10 +191,10 @@ async def _stream_prepare_runtime( if persist_session: await self._session_service.save_session(session) yield {"type": "wait", "phase": gate.phase.value, "message": wait_message} - yield { - "type": "thinking", - "recent": ["Waiting for JobPackage ingestion context…", wait_message], - } + recent, event = thinking_event(recent, "Waiting for JobPackage ingestion context…") + yield event + recent, event = thinking_event(recent, wait_message) + yield event if emit_terminal: yield { "type": "done", @@ -210,21 +217,17 @@ async def _stream_prepare_runtime( session.runtime_context["sticky_runtime"] = self._lease_context(lease, phase="ready") if persist_session: await self._session_service.save_session(session) - yield { - "type": "thinking", - "recent": ["In-memory assistant runtime ready"], - } + recent, event = thinking_event(recent, "In-memory assistant runtime ready") + yield event yield {"type": "ready", "runtime_base_url": lease.runtime_base_url} yield {"type": "done", "ok": True, "ready": True} return - yield { - "type": "thinking", - "recent": [ - "Preparing Graph Management Assistant runtime…", - "Materializing workspace and skills for sticky container", - ], - } + recent, event = thinking_event( + recent, + "Materializing workspace and skills for sticky container", + ) + yield event include_job_packages = should_materialize_job_packages( readiness=readiness, gate=gate, @@ -242,13 +245,8 @@ async def _stream_prepare_runtime( session.runtime_context["workspace_materialization"] = { "job_package_ids": list(package_ids), } - yield { - "type": "thinking", - "recent": [ - "Materializing workspace and skills for sticky container", - "Starting isolated Claude Agent SDK container", - ], - } + recent, event = thinking_event(recent, "Starting isolated Claude Agent SDK container") + yield event lease: StickySessionRuntimeLease try: lease = await asyncio.to_thread( @@ -278,13 +276,11 @@ async def _stream_prepare_runtime( return session.runtime_context["sticky_runtime"] = self._lease_context(lease, phase="starting") - yield { - "type": "thinking", - "recent": [ - "Starting isolated Claude Agent SDK container", - f"Container {lease.container_id[:8]} launched", - ], - } + recent, event = thinking_event( + recent, + f"Container {lease.container_id[:8]} launched", + ) + yield event runtime_base_url = lease.runtime_base_url or "" try: @@ -292,7 +288,8 @@ async def _stream_prepare_runtime( runtime_base_url=runtime_base_url, timeout_seconds=self._sticky_health_timeout_seconds, ): - yield {"type": "thinking", "recent": [line]} + recent, event = thinking_event(recent, line) + yield event except TimeoutError as exc: session.runtime_context["sticky_runtime"]["phase"] = "unhealthy" session.runtime_context["sticky_runtime"]["status"] = "unhealthy" diff --git a/src/api/extraction/application/thinking_activity.py b/src/api/extraction/application/thinking_activity.py new file mode 100644 index 000000000..6c7e30eb8 --- /dev/null +++ b/src/api/extraction/application/thinking_activity.py @@ -0,0 +1,24 @@ +"""Rolling thinking-line helpers for NDJSON chat streams.""" + +from __future__ import annotations + +from typing import Any + +MAX_THINKING_LINES = 3 + + +def append_thinking_line(recent: list[str], line: str) -> list[str]: + normalized = " ".join(line.split()) + if not normalized: + return list(recent) + if recent and recent[-1] == normalized: + return list(recent) + updated = [*recent, normalized] + if len(updated) > MAX_THINKING_LINES: + updated = updated[-MAX_THINKING_LINES:] + return updated + + +def thinking_event(recent: list[str], line: str) -> tuple[list[str], dict[str, Any]]: + updated = append_thinking_line(recent, line) + return updated, {"type": "thinking", "recent": updated} diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index 1a10f80af..691da865f 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -62,6 +62,7 @@ def __init__( gcloud_config_container_path: str = "/gcloud/config", container_run_uid: int | None = None, container_run_gid: int | None = None, + agent_turn_timeout_seconds: float = 600.0, ) -> None: self._container_runtime = container_runtime self._sticky_image = sticky_image @@ -78,6 +79,7 @@ def __init__( self._gcloud_config_container_path = gcloud_config_container_path self._container_run_uid = container_run_uid self._container_run_gid = container_run_gid + self._agent_turn_timeout_seconds = agent_turn_timeout_seconds self._leases: dict[str, StickySessionRuntimeLease] = {} def get_or_start_runtime( @@ -281,6 +283,7 @@ def _start_runtime( "KARTOGRAPH_SESSION_MODE": mode, "KARTOGRAPH_SKILLS_DIR": self._container_skills_mount, "KARTOGRAPH_WORKSPACE_DIR": self._container_work_mount, + "KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS": str(int(self._agent_turn_timeout_seconds)), } binds: list[str] = [] if bootstrap is not None: diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py index 8642c89f5..3d227a655 100644 --- a/src/api/extraction/infrastructure/workload_runtime_factory.py +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -76,6 +76,7 @@ def create_sticky_session_runtime_manager( gcloud_config_container_path=resolved.gcloud_config_container_path, container_run_uid=resolved.container_run_uid, container_run_gid=resolved.container_run_gid, + agent_turn_timeout_seconds=resolved.sticky_turn_timeout_seconds, ) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 7c1ae8f34..bcd5567d2 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -43,7 +43,7 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): skills_dir: str = Field(default="/app/skills") api_base_url: str = Field(default="http://api:8000") sticky_health_timeout_seconds: float = Field(default=90.0, ge=5.0, le=600.0) - sticky_turn_timeout_seconds: float = Field(default=180.0, ge=30.0, le=900.0) + sticky_turn_timeout_seconds: float = Field(default=600.0, ge=30.0, le=900.0) vertex_project_id: str = Field(default="") vertex_region: str = Field(default="us-east5") gcloud_config_mount: str | None = Field(default=None) diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index 4e6dba76f..d75057314 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -38,6 +38,12 @@ router = APIRouter(tags=["extraction-sessions"]) +NDJSON_STREAM_HEADERS = { + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", +} + async def _assert_kg_edit_permission( *, @@ -192,7 +198,11 @@ async def event_stream(): ): yield json.dumps(event) + "\n" - return StreamingResponse(event_stream(), media_type="application/x-ndjson") + return StreamingResponse( + event_stream(), + media_type="application/x-ndjson", + headers=NDJSON_STREAM_HEADERS, + ) @router.post( @@ -223,7 +233,11 @@ async def event_stream(): ): yield json.dumps(event) + "\n" - return StreamingResponse(event_stream(), media_type="application/x-ndjson") + return StreamingResponse( + event_stream(), + media_type="application/x-ndjson", + headers=NDJSON_STREAM_HEADERS, + ) @router.post( diff --git a/src/api/tests/unit/extraction/application/test_thinking_activity.py b/src/api/tests/unit/extraction/application/test_thinking_activity.py new file mode 100644 index 000000000..a4ec5e59b --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_thinking_activity.py @@ -0,0 +1,28 @@ +"""Unit tests for rolling thinking activity helpers.""" + +from __future__ import annotations + +from extraction.application.thinking_activity import ( + MAX_THINKING_LINES, + append_thinking_line, + thinking_event, +) + + +def test_append_thinking_line_caps_at_three() -> None: + recent: list[str] = [] + for line in ("one", "two", "three", "four"): + recent = append_thinking_line(recent, line) + + assert recent == ["two", "three", "four"] + assert MAX_THINKING_LINES == 3 + + +def test_thinking_event_returns_full_recent_window() -> None: + recent, event = thinking_event([], "Starting container") + + assert event == {"type": "thinking", "recent": ["Starting container"]} + assert recent == ["Starting container"] + + recent, event = thinking_event(recent, "Waiting for health check") + assert event["recent"] == ["Starting container", "Waiting for health check"] diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index 1bd788399..6e143ae21 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -1,6 +1,10 @@ <script setup lang="ts"> import { computed, nextTick, onMounted, ref, watch } from 'vue' import { Bot, Loader2, RefreshCw, RotateCcw, Send, Sparkles, User } from 'lucide-vue-next' +import { + normalizeThinkingActivityLines, + THINKING_DISPLAY_LINE_COUNT, +} from '@/utils/thinkingActivityLines' import { Button } from '@/components/ui/button' import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' import { @@ -91,11 +95,9 @@ const runtimeActivityTitle = computed(() => : 'Thinking...', ) -const thinkingDisplayLines = computed(() => { - const src = props.activityLines.filter(Boolean) - if (src.length === 0) return [''] - return src -}) +const thinkingDisplayLines = computed(() => + normalizeThinkingActivityLines(props.activityLines, THINKING_DISPLAY_LINE_COUNT), +) function isUserRole(role: string | undefined): boolean { return role === 'user' || role === 'human' @@ -170,11 +172,12 @@ function sendDraftMessage() { } watch( - () => [messageHistory.value.length, props.activityLines.length, props.sending], + () => [messageHistory.value.length, props.activityLines, props.sending], async () => { await nextTick() scrollToBottom() }, + { deep: true }, ) watch( diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 368c7bdd7..c6e1177e5 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -97,6 +97,7 @@ import { type MutationLogRunRecord, } from '@/utils/kgMutationLogs' import { streamExtractionChatTurn, streamRuntimeWarmup } from '@/utils/kgExtractionChat' +import { applyThinkingRecentUpdate } from '@/utils/thinkingActivityLines' import { useGraphApi } from '@/composables/api/useGraphApi' const runtimeConfig = useRuntimeConfig() @@ -937,13 +938,20 @@ function onMutationRunKeydown(event: KeyboardEvent, runId: string) { handleActivatableKeydown(event, () => selectMutationLogRun(runId)) } +function applySessionThinkingRecent(recent: string[]) { + sessionActivityLines.value = applyThinkingRecentUpdate(sessionActivityLines.value, recent) +} + function syncActivityLinesFromSession() { if (runtimeWarming.value || showRuntimeWarmupProgress.value) return const context = extractionSession.value?.runtime_context ?? {} const candidate = context.activity_lines ?? context.ndjson_activity_lines ?? context.thinking_lines if (Array.isArray(candidate)) { - sessionActivityLines.value = candidate.filter( - (line): line is string => typeof line === 'string' && line.trim().length > 0, + sessionActivityLines.value = applyThinkingRecentUpdate( + [], + candidate.filter( + (line): line is string => typeof line === 'string' && line.trim().length > 0, + ), ) } else if (!runtimeWarming.value) { sessionActivityLines.value = [] @@ -974,16 +982,13 @@ async function warmupAssistantRuntime() { })) { if (generation !== runtimeWarmupGeneration) return if (event.type === 'thinking' && Array.isArray(event.recent)) { - const recent = event.recent.filter(Boolean) - sessionActivityLines.value = recent.length > 0 - ? recent - : sessionActivityLines.value + applySessionThinkingRecent(event.recent) } if (event.type === 'wait' && event.message) { - sessionActivityLines.value = [event.message] + applySessionThinkingRecent([event.message]) } if (event.type === 'ready') { - sessionActivityLines.value = ['Assistant container ready'] + applySessionThinkingRecent(['Assistant container ready']) } if (event.type === 'done') { if (event.ok !== true) { @@ -1041,6 +1046,7 @@ async function sendChatMessage(message: string) { updated_at: new Date().toISOString(), } + let chatSucceeded = false try { for await (const event of streamExtractionChatTurn({ apiBaseUrl: String(runtimeConfig.public.apiBaseUrl ?? ''), @@ -1052,29 +1058,32 @@ async function sendChatMessage(message: string) { message: trimmed, })) { if (event.type === 'thinking' && Array.isArray(event.recent)) { - const recent = event.recent.filter(Boolean) - sessionActivityLines.value = recent.length > 0 - ? recent - : sessionActivityLines.value + applySessionThinkingRecent(event.recent) } if (event.type === 'wait') { - sessionActivityLines.value = event.message - ? [event.message] - : ['Waiting for JobPackage ingestion context…'] + sessionActivityLines.value = applyThinkingRecentUpdate( + [], + event.message ? [event.message] : ['Waiting for JobPackage ingestion context…'], + ) } if (event.type === 'done' && event.ok !== true) { throw new Error(event.error?.message ?? 'Graph Management Assistant returned an error.') } } + chatSucceeded = true await loadExtractionSession() } catch (err) { + const message = extractErrorMessage(err) + applySessionThinkingRecent([`Error: ${message}`]) toast.error('Failed to send message', { - description: extractErrorMessage(err), + description: message, }) await loadExtractionSession() } finally { sendingChat.value = false - syncActivityLinesFromSession() + if (chatSucceeded) { + syncActivityLinesFromSession() + } } } diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 19b93d99c..956ba833f 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -368,6 +368,8 @@ describe('Shared conversation panel - extraction UX contract', () => { it('renders bubble chat, thinking state, and auto-scroll', () => { expect(sharedConversationPanelVue).toContain('thinkingDisplayLines') + expect(sharedConversationPanelVue).toContain('normalizeThinkingActivityLines') + expect(sharedConversationPanelVue).toContain('THINKING_DISPLAY_LINE_COUNT') expect(sharedConversationPanelVue).toContain('chatScrollRef') expect(sharedConversationPanelVue).toContain('renderAssistantHtml') expect(sharedConversationPanelVue).toContain('scrollToBottom') diff --git a/src/dev-ui/app/tests/thinking-activity-lines.test.ts b/src/dev-ui/app/tests/thinking-activity-lines.test.ts new file mode 100644 index 000000000..00b9805fe --- /dev/null +++ b/src/dev-ui/app/tests/thinking-activity-lines.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, it } from 'vitest' +import { + applyThinkingRecentUpdate, + normalizeThinkingActivityLines, + THINKING_DISPLAY_LINE_COUNT, +} from '../utils/thinkingActivityLines' + +describe('thinkingActivityLines', () => { + it('pads to three display slots with newest thoughts at the bottom', () => { + expect(normalizeThinkingActivityLines(['Alpha', 'Beta'])).toEqual(['', 'Alpha', 'Beta']) + }) + + it('keeps only the last three lines', () => { + expect( + normalizeThinkingActivityLines(['one', 'two', 'three', 'four']), + ).toEqual(['two', 'three', 'four']) + }) + + it('replaces activity from authoritative recent payloads', () => { + expect( + applyThinkingRecentUpdate(['stale'], ['Reading schema', 'Running Grep…']), + ).toEqual(['', 'Reading schema', 'Running Grep…']) + }) + + it('uses the shared three-line contract', () => { + expect(THINKING_DISPLAY_LINE_COUNT).toBe(3) + }) +}) diff --git a/src/dev-ui/app/utils/thinkingActivityLines.ts b/src/dev-ui/app/utils/thinkingActivityLines.ts new file mode 100644 index 000000000..d7bd57032 --- /dev/null +++ b/src/dev-ui/app/utils/thinkingActivityLines.ts @@ -0,0 +1,26 @@ +/** Rolling thinking-line panel contract for Graph Management Assistant streams. */ + +export const THINKING_DISPLAY_LINE_COUNT = 3 + +export function normalizeThinkingActivityLines( + lines: string[], + slotCount: number = THINKING_DISPLAY_LINE_COUNT, +): string[] { + const recent = lines.filter((line) => typeof line === 'string' && line.trim().length > 0) + const tail = recent.slice(-slotCount) + while (tail.length < slotCount) { + tail.unshift('') + } + return tail +} + +export function applyThinkingRecentUpdate( + current: string[], + recent: string[], + slotCount: number = THINKING_DISPLAY_LINE_COUNT, +): string[] { + if (recent.length === 0) { + return normalizeThinkingActivityLines(current, slotCount) + } + return normalizeThinkingActivityLines(recent, slotCount) +} From 8babd8d959afc13d2af1590ba30d3146b8d6f4c8 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 01:07:06 -0400 Subject: [PATCH 081/153] fix(extraction): flush live thinking stream and raise sticky agent max turns Improve incremental NDJSON delivery, SDK thinking dispatch, and error handling; default max_turns to 500 so graph management turns are not capped at 8. Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 1 + .../kartograph_agent_runtime/executor.py | 29 ++- .../kartograph_agent_runtime/server.py | 2 + .../kartograph_agent_runtime/settings.py | 1 + .../thinking_stream.py | 196 +++++++++++++++--- .../tests/test_thinking_stream.py | 29 +++ .../container_workload_runtime.py | 3 + .../remote_sticky_container_chat_agent.py | 2 + .../workload_runtime_factory.py | 1 + .../workload_runtime_settings.py | 1 + src/api/extraction/presentation/routes.py | 3 + ...test_sticky_session_container_bootstrap.py | 2 + 12 files changed, 234 insertions(+), 36 deletions(-) diff --git a/compose.dev.yaml b/compose.dev.yaml index e77368646..aaf089020 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -24,6 +24,7 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_UID: ${HOST_UID} KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID: ${HOST_GID} KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_TURN_TIMEOUT_SECONDS: "600" + KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_MAX_TURNS: "500" # Vertex AI for Claude Agent SDK in sticky assistant containers CLAUDE_CODE_USE_VERTEX: "1" ANTHROPIC_VERTEX_PROJECT_ID: itpc-gcp-hcm-pe-eng-claude diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 8151f6a04..9eb5429e2 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -11,6 +11,7 @@ from kartograph_agent_runtime.thinking_stream import ( initial_sdk_thinking_lines, push_thinking, + replace_last_thinking, thinking_events_from_sdk_message, ) from kartograph_agent_runtime.tools import RuntimeTooling @@ -262,6 +263,7 @@ async def _stream_with_claude_sdk( turn_timeout_seconds: float, ) -> AsyncIterator[dict[str, Any]]: from claude_agent_sdk import ClaudeAgentOptions, query + from claude_agent_sdk.types import ResultMessage system_prompt = _build_system_prompt( agent_configuration, @@ -285,7 +287,7 @@ async def _stream_with_claude_sdk( system_prompt=system_prompt, env=sdk_env, permission_mode="bypassPermissions", - max_turns=8, + max_turns=settings.max_turns, setting_sources=[], cwd=workspace_dir, add_dirs=[workspace_dir], @@ -308,12 +310,14 @@ async def _stream_with_claude_sdk( break except TimeoutError: elapsed_seconds += int(_SDK_HEARTBEAT_SECONDS) - heartbeat = push_thinking( + heartbeat = replace_last_thinking( recent, f"Waiting for model response… ({elapsed_seconds}s)", + prefix="Waiting for model response", ) if heartbeat: yield heartbeat + await asyncio.sleep(0) continue thinking_events, last_compose_at = thinking_events_from_sdk_message( @@ -324,6 +328,27 @@ async def _stream_with_claude_sdk( ) for event in thinking_events: yield event + await asyncio.sleep(0) + + if isinstance(sdk_message, ResultMessage): + if sdk_message.is_error: + error_text = str(sdk_message.result or "").strip() + if not error_text and sdk_message.errors: + error_text = "; ".join(str(item) for item in sdk_message.errors) + if error_text: + error_thinking = push_thinking(recent, f"Error · {error_text}") + if error_thinking: + yield error_thinking + await asyncio.sleep(0) + yield { + "type": "done", + "ok": False, + "error": { + "code": "AGENT_SDK_ERROR", + "message": error_text or "Claude Agent SDK returned an error.", + }, + } + return extracted = _extract_sdk_reply(sdk_message) if extracted: diff --git a/src/agent-runtime/kartograph_agent_runtime/server.py b/src/agent-runtime/kartograph_agent_runtime/server.py index 462b3326f..ccbeb96ce 100644 --- a/src/agent-runtime/kartograph_agent_runtime/server.py +++ b/src/agent-runtime/kartograph_agent_runtime/server.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import json import logging from collections.abc import AsyncIterator @@ -73,6 +74,7 @@ async def event_stream() -> AsyncIterator[str]: event.get("ok"), ) yield json.dumps(event) + "\n" + await asyncio.sleep(0) except Exception: logger.exception( "agent_runtime_turn_failed session_id=%s", diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py index 2eb7778b9..02d899ce4 100644 --- a/src/agent-runtime/kartograph_agent_runtime/settings.py +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -29,6 +29,7 @@ class AgentRuntimeSettings(BaseSettings): google_application_credentials: str = Field(default="", alias="GOOGLE_APPLICATION_CREDENTIALS") home_dir: str = Field(default="/tmp", alias="HOME") turn_timeout_seconds: float = Field(default=600.0, ge=30.0, le=900.0, alias="KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS") + max_turns: int = Field(default=500, ge=1, le=1000, alias="KARTOGRAPH_AGENT_MAX_TURNS") def vertex_enabled(self) -> bool: return vertex_enabled_from_env() diff --git a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py index 8cae3eabe..977194b50 100644 --- a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py +++ b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py @@ -27,6 +27,32 @@ def push_thinking(recent: list[str], line: str) -> dict[str, Any] | None: return {"type": "thinking", "recent": list(recent)} +def replace_last_thinking( + recent: list[str], + line: str, + *, + prefix: str | None = None, +) -> dict[str, Any] | None: + """Replace the last matching (or final) thinking line — used for heartbeats.""" + normalized = normalize_activity_line(line) + if not normalized: + return None + if prefix: + for index in range(len(recent) - 1, -1, -1): + if str(recent[index]).startswith(prefix): + if recent[index] == normalized: + return None + recent[index] = normalized + return {"type": "thinking", "recent": list(recent)} + if recent: + if recent[-1] == normalized: + return None + recent[-1] = normalized + else: + recent.append(normalized) + return {"type": "thinking", "recent": list(recent)} + + def update_composing_line(recent: list[str], preview_tail: str) -> dict[str, Any] | None: preview_tail = normalize_activity_line(preview_tail.replace("\n", " ")) line = normalize_activity_line( @@ -82,20 +108,63 @@ def _stream_event_line(event: dict[str, Any]) -> str | None: return None -def thinking_events_from_sdk_message( - sdk_message: Any, +def _append_task_progress_events( + events: list[dict[str, Any]], + recent: list[str], + *, + description: str, + last_tool_name: str | None, + started: bool, +) -> None: + progress_description = description.strip() + last_tool = str(last_tool_name or "").strip() + if progress_description: + prefix = "Task started · " if started else "" + event = push_thinking(recent, f"{prefix}{progress_description}".strip()) + if event: + events.append(event) + if last_tool: + event = push_thinking(recent, f"Running {last_tool}…") + if event: + events.append(event) + + +def _thinking_events_from_assistant_content( + content: list[Any], *, recent: list[str], reply_parts: list[str], last_compose_at: int, - compose_step: int = 120, + compose_step: int, ) -> tuple[list[dict[str, Any]], int]: - """Return thinking NDJSON events and updated compose offset for one SDK message.""" - events: list[dict[str, Any]] = [] + from claude_agent_sdk.types import TextBlock, ThinkingBlock, ToolUseBlock - content = getattr(sdk_message, "content", None) - if isinstance(content, list): - for block in content: + events: list[dict[str, Any]] = [] + for block in content: + if isinstance(block, ThinkingBlock): + thinking = normalize_activity_line(block.thinking or "") + if thinking: + event = push_thinking(recent, f"Reasoning · {thinking}") + if event: + events.append(event) + elif isinstance(block, ToolUseBlock): + tool_input = block.input if isinstance(block.input, dict) else {} + event = push_thinking(recent, _tool_use_line(block.name, tool_input)) + if event: + events.append(event) + elif isinstance(block, TextBlock): + text = str(block.text or "") + if text.strip(): + reply_parts.append(text) + blob = "".join(reply_parts) + plain = text.replace("\n", "").strip() + if plain and len(blob) - last_compose_at >= compose_step: + tail = blob[-88:].replace("\n", " ").strip() + event = update_composing_line(recent, tail) + if event: + events.append(event) + last_compose_at = len(blob) + else: block_type = type(block).__name__ if block_type == "ThinkingBlock" or hasattr(block, "thinking"): thinking = normalize_activity_line(getattr(block, "thinking", "") or "") @@ -123,44 +192,103 @@ def thinking_events_from_sdk_message( if event: events.append(event) last_compose_at = len(blob) + return events, last_compose_at + + +def thinking_events_from_sdk_message( + sdk_message: Any, + *, + recent: list[str], + reply_parts: list[str], + last_compose_at: int, + compose_step: int = 120, +) -> tuple[list[dict[str, Any]], int]: + """Return thinking NDJSON events and updated compose offset for one SDK message.""" + from claude_agent_sdk.types import ( + AssistantMessage, + StreamEvent, + TaskNotificationMessage, + TaskProgressMessage, + TaskStartedMessage, + ) + + events: list[dict[str, Any]] = [] + + if isinstance(sdk_message, AssistantMessage): + if isinstance(sdk_message.content, list): + return _thinking_events_from_assistant_content( + sdk_message.content, + recent=recent, + reply_parts=reply_parts, + last_compose_at=last_compose_at, + compose_step=compose_step, + ) return events, last_compose_at - task_id = getattr(sdk_message, "task_id", None) - description = str(getattr(sdk_message, "description", "") or "").strip() - if task_id and description: - last_tool = str(getattr(sdk_message, "last_tool_name", "") or "").strip() - usage = getattr(sdk_message, "usage", None) - prefix = "Task started ·" if usage is None and not last_tool else "" - line = f"{prefix}{description}".strip() - event = push_thinking(recent, line) - if event: - events.append(event) - if last_tool: - event = push_thinking(recent, f"Running {last_tool}…") + if isinstance(sdk_message, TaskStartedMessage): + _append_task_progress_events( + events, + recent, + description=str(sdk_message.description or ""), + last_tool_name=None, + started=True, + ) + return events, last_compose_at + + if isinstance(sdk_message, TaskProgressMessage): + _append_task_progress_events( + events, + recent, + description=str(sdk_message.description or ""), + last_tool_name=sdk_message.last_tool_name, + started=False, + ) + return events, last_compose_at + + if isinstance(sdk_message, TaskNotificationMessage): + summary = str(sdk_message.summary or "").strip() + if summary: + event = push_thinking(recent, summary) if event: events.append(event) return events, last_compose_at - payload = getattr(sdk_message, "event", None) - if isinstance(payload, dict): - line = _stream_event_line(payload) + if isinstance(sdk_message, StreamEvent): + line = _stream_event_line(sdk_message.event) if line: event = push_thinking(recent, line) if event: events.append(event) return events, last_compose_at - subtype = str(getattr(sdk_message, "subtype", "") or "").strip() - data = getattr(sdk_message, "data", None) or {} - if subtype == "task_progress" and isinstance(data, dict): - progress_description = str(data.get("description") or "").strip() - last_tool = str(data.get("last_tool_name") or "").strip() - if progress_description: - event = push_thinking(recent, progress_description) - if event: - events.append(event) - if last_tool: - event = push_thinking(recent, f"Running {last_tool}…") + content = getattr(sdk_message, "content", None) + if isinstance(content, list): + return _thinking_events_from_assistant_content( + content, + recent=recent, + reply_parts=reply_parts, + last_compose_at=last_compose_at, + compose_step=compose_step, + ) + + task_id = getattr(sdk_message, "task_id", None) + description = str(getattr(sdk_message, "description", "") or "").strip() + if task_id and description: + _append_task_progress_events( + events, + recent, + description=description, + last_tool_name=getattr(sdk_message, "last_tool_name", None), + started=getattr(sdk_message, "usage", None) is None + and not getattr(sdk_message, "last_tool_name", None), + ) + return events, last_compose_at + + payload = getattr(sdk_message, "event", None) + if isinstance(payload, dict): + line = _stream_event_line(payload) + if line: + event = push_thinking(recent, line) if event: events.append(event) diff --git a/src/agent-runtime/tests/test_thinking_stream.py b/src/agent-runtime/tests/test_thinking_stream.py index 4d7e719f8..915f09255 100644 --- a/src/agent-runtime/tests/test_thinking_stream.py +++ b/src/agent-runtime/tests/test_thinking_stream.py @@ -7,6 +7,7 @@ from kartograph_agent_runtime.thinking_stream import ( initial_sdk_thinking_lines, push_thinking, + replace_last_thinking, thinking_events_from_sdk_message, ) @@ -47,6 +48,14 @@ def test_initial_sdk_thinking_lines_include_connected_message() -> None: assert any("Connected" in line for line in lines) +def test_agent_runtime_settings_default_max_turns() -> None: + from kartograph_agent_runtime.settings import AgentRuntimeSettings + + settings = AgentRuntimeSettings() + + assert settings.max_turns == 500 + + def test_push_thinking_deduplicates_and_caps_recent_lines() -> None: recent: list[str] = [] for index in range(5): @@ -54,6 +63,26 @@ def test_push_thinking_deduplicates_and_caps_recent_lines() -> None: assert recent == ["line-2", "line-3", "line-4"] +def test_replace_last_thinking_updates_matching_prefix_in_place() -> None: + recent = initial_sdk_thinking_lines(auth_mode="Vertex AI", ui_mode="initial-schema-design") + + first = replace_last_thinking( + recent, + "Waiting for model response… (8s)", + prefix="Waiting for model response", + ) + second = replace_last_thinking( + recent, + "Waiting for model response… (16s)", + prefix="Waiting for model response", + ) + + assert first is not None + assert second is not None + assert recent[-1] == "Waiting for model response… (16s)" + assert len(recent) == 3 + + def test_thinking_events_from_assistant_message_tool_and_reasoning_blocks() -> None: recent = initial_sdk_thinking_lines(auth_mode="Vertex AI", ui_mode="initial-schema-design") message = FakeAssistantMessage( diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index 691da865f..765f56617 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -63,6 +63,7 @@ def __init__( container_run_uid: int | None = None, container_run_gid: int | None = None, agent_turn_timeout_seconds: float = 600.0, + agent_max_turns: int = 500, ) -> None: self._container_runtime = container_runtime self._sticky_image = sticky_image @@ -80,6 +81,7 @@ def __init__( self._container_run_uid = container_run_uid self._container_run_gid = container_run_gid self._agent_turn_timeout_seconds = agent_turn_timeout_seconds + self._agent_max_turns = agent_max_turns self._leases: dict[str, StickySessionRuntimeLease] = {} def get_or_start_runtime( @@ -284,6 +286,7 @@ def _start_runtime( "KARTOGRAPH_SKILLS_DIR": self._container_skills_mount, "KARTOGRAPH_WORKSPACE_DIR": self._container_work_mount, "KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS": str(int(self._agent_turn_timeout_seconds)), + "KARTOGRAPH_AGENT_MAX_TURNS": str(int(self._agent_max_turns)), } binds: list[str] = [] if bootstrap is not None: diff --git a/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py index 34957bf45..b5f9d9eee 100644 --- a/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py +++ b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import json from collections.abc import AsyncIterator from typing import Any @@ -76,6 +77,7 @@ async def stream_turn( if not trimmed: continue yield json.loads(trimmed) + await asyncio.sleep(0) except httpx.HTTPError as exc: yield { "type": "done", diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py index 3d227a655..1a88410ee 100644 --- a/src/api/extraction/infrastructure/workload_runtime_factory.py +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -77,6 +77,7 @@ def create_sticky_session_runtime_manager( container_run_uid=resolved.container_run_uid, container_run_gid=resolved.container_run_gid, agent_turn_timeout_seconds=resolved.sticky_turn_timeout_seconds, + agent_max_turns=resolved.sticky_max_turns, ) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index bcd5567d2..57a5db007 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -44,6 +44,7 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): api_base_url: str = Field(default="http://api:8000") sticky_health_timeout_seconds: float = Field(default=90.0, ge=5.0, le=600.0) sticky_turn_timeout_seconds: float = Field(default=600.0, ge=30.0, le=900.0) + sticky_max_turns: int = Field(default=500, ge=1, le=1000) vertex_project_id: str = Field(default="") vertex_region: str = Field(default="us-east5") gcloud_config_mount: str | None = Field(default=None) diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index d75057314..cf32f492b 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import json from typing import Annotated @@ -197,6 +198,7 @@ async def event_stream(): ui_mode=request.graph_management_ui_mode, ): yield json.dumps(event) + "\n" + await asyncio.sleep(0) return StreamingResponse( event_stream(), @@ -232,6 +234,7 @@ async def event_stream(): message=request.message, ): yield json.dumps(event) + "\n" + await asyncio.sleep(0) return StreamingResponse( event_stream(), diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py index 75a0c45fb..a4087605d 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py @@ -28,6 +28,7 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: gcloud_config_container_path="/gcloud/config", container_run_uid=1000, container_run_gid=1000, + agent_max_turns=500, ) issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) credentials = issuer.issue_for_sticky_session(tenant_id="tenant-1", knowledge_graph_id="kg-1") @@ -59,5 +60,6 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: "/gcloud/config/application_default_credentials.json" ) assert spec.env["HOME"] == "/tmp" + assert spec.env["KARTOGRAPH_AGENT_MAX_TURNS"] == "500" assert spec.user == "1000:1000" assert lease.runtime_base_url.startswith("http://kartograph-sticky-") From dd0be37bafa779e50a17aafc999bfee95553b4af Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 01:09:53 -0400 Subject: [PATCH 082/153] fix(agent-runtime): capture streamed SDK replies after tool-heavy turns Accumulate StreamEvent text deltas, join assistant text blocks, and finalize turn replies from result metadata so tool-only completions are not reported as empty. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/executor.py | 73 +++++++++++++++++-- .../thinking_stream.py | 61 ++++++++++++---- src/agent-runtime/tests/test_executor.py | 49 +++++++++++++ .../tests/test_thinking_stream.py | 44 +++++++++++ 4 files changed, 205 insertions(+), 22 deletions(-) diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 9eb5429e2..e489f1937 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio +import json import os from collections.abc import AsyncIterator from typing import Any @@ -132,6 +133,15 @@ def _extract_sdk_reply(message: Any) -> str | None: if isinstance(result, str) and result.strip(): return result.strip() + structured = getattr(message, "structured_output", None) + if structured is not None: + if isinstance(structured, str) and structured.strip(): + return structured.strip() + try: + return json.dumps(structured, indent=2) + except TypeError: + return str(structured) + content = getattr(message, "content", None) if isinstance(content, str) and content.strip(): return content.strip() @@ -140,12 +150,50 @@ def _extract_sdk_reply(message: Any) -> str | None: for block in content: text = getattr(block, "text", None) if isinstance(text, str) and text.strip(): - parts.append(text.strip()) + parts.append(text) if parts: - return parts[-1] + return "".join(parts).strip() return None +def finalize_sdk_turn_reply( + *, + reply: str | None, + reply_parts: list[str], + last_result: Any | None, + notification_summaries: list[str], +) -> str: + """Build the best available assistant reply after an SDK turn completes.""" + if isinstance(reply, str) and reply.strip(): + return reply.strip() + + streamed = "".join(reply_parts).strip() + if streamed: + return streamed + + if last_result is not None: + extracted = _extract_sdk_reply(last_result) + if extracted: + return extracted + + if notification_summaries: + return notification_summaries[-1] + + num_turns = int(getattr(last_result, "num_turns", 0) or 0) + if num_turns > 0: + return ( + f"**Assistant completed** ({num_turns} turn(s))\n\n" + "The agent finished tool work without a final written reply. " + "Review workspace artifacts or graph mutations, or ask the assistant " + "to summarize what it changed." + ) + + return ( + "Claude Agent SDK completed without a textual response. " + "Retry with a more specific graph-management request." + ) + + def _build_sdk_env(settings: AgentRuntimeSettings) -> dict[str, str]: env = build_claude_agent_env(settings) if settings.gcloud_config_dir.strip(): @@ -263,7 +311,7 @@ async def _stream_with_claude_sdk( turn_timeout_seconds: float, ) -> AsyncIterator[dict[str, Any]]: from claude_agent_sdk import ClaudeAgentOptions, query - from claude_agent_sdk.types import ResultMessage + from claude_agent_sdk.types import ResultMessage, TaskNotificationMessage system_prompt = _build_system_prompt( agent_configuration, @@ -295,6 +343,8 @@ async def _stream_with_claude_sdk( reply: str | None = None reply_parts: list[str] = [] + notification_summaries: list[str] = [] + last_result: ResultMessage | None = None last_compose_at = 0 elapsed_seconds = 0 try: @@ -330,6 +380,11 @@ async def _stream_with_claude_sdk( yield event await asyncio.sleep(0) + if isinstance(sdk_message, TaskNotificationMessage): + summary = str(sdk_message.summary or "").strip() + if summary: + notification_summaries.append(summary) + if isinstance(sdk_message, ResultMessage): if sdk_message.is_error: error_text = str(sdk_message.result or "").strip() @@ -349,6 +404,7 @@ async def _stream_with_claude_sdk( }, } return + last_result = sdk_message extracted = _extract_sdk_reply(sdk_message) if extracted: @@ -380,9 +436,10 @@ async def _stream_with_claude_sdk( } return - if not reply: - reply = ( - "Claude Agent SDK completed without a textual response. " - "Retry with a more specific graph-management request." - ) + reply = finalize_sdk_turn_reply( + reply=reply, + reply_parts=reply_parts, + last_result=last_result, + notification_summaries=notification_summaries, + ) yield {"type": "done", "ok": True, "reply": reply} diff --git a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py index 977194b50..6545c1c37 100644 --- a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py +++ b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py @@ -95,17 +95,47 @@ def _stream_event_line(event: dict[str, Any]) -> str | None: return f"Running {name}…" if block_type == "thinking": return "Reasoning…" + if block_type == "text": + return "Composing reply…" if event_type == "content_block_delta": delta = event.get("delta") or {} if delta.get("type") == "thinking_delta": thinking = str(delta.get("thinking") or "").strip() if thinking: return f"Reasoning · {normalize_activity_line(thinking)}" + return None + + +def _thinking_events_from_stream_event( + event: dict[str, Any], + *, + recent: list[str], + reply_parts: list[str], + last_compose_at: int, + compose_step: int, +) -> tuple[list[dict[str, Any]], int]: + events: list[dict[str, Any]] = [] + if event.get("type") == "content_block_delta": + delta = event.get("delta") or {} if delta.get("type") == "text_delta": - text = str(delta.get("text") or "").strip() + text = str(delta.get("text") or "") if text: - return None # handled via composing line from accumulated text - return None + reply_parts.append(text) + blob = "".join(reply_parts) + if len(blob.strip()) and len(blob) - last_compose_at >= compose_step: + tail = blob[-88:].replace("\n", " ").strip() + compose_event = update_composing_line(recent, tail) + if compose_event: + events.append(compose_event) + last_compose_at = len(blob) + return events, last_compose_at + + line = _stream_event_line(event) + if line: + compose_event = push_thinking(recent, line) + if compose_event: + events.append(compose_event) + return events, last_compose_at def _append_task_progress_events( @@ -254,12 +284,13 @@ def thinking_events_from_sdk_message( return events, last_compose_at if isinstance(sdk_message, StreamEvent): - line = _stream_event_line(sdk_message.event) - if line: - event = push_thinking(recent, line) - if event: - events.append(event) - return events, last_compose_at + return _thinking_events_from_stream_event( + sdk_message.event, + recent=recent, + reply_parts=reply_parts, + last_compose_at=last_compose_at, + compose_step=compose_step, + ) content = getattr(sdk_message, "content", None) if isinstance(content, list): @@ -286,11 +317,13 @@ def thinking_events_from_sdk_message( payload = getattr(sdk_message, "event", None) if isinstance(payload, dict): - line = _stream_event_line(payload) - if line: - event = push_thinking(recent, line) - if event: - events.append(event) + return _thinking_events_from_stream_event( + payload, + recent=recent, + reply_parts=reply_parts, + last_compose_at=last_compose_at, + compose_step=compose_step, + ) return events, last_compose_at diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index 1af437dd2..1224d252e 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -10,6 +10,8 @@ from kartograph_agent_runtime.executor import ( _build_system_prompt, _build_workspace_prompt_appendix, + _extract_sdk_reply, + finalize_sdk_turn_reply, stream_turn_events, ) from kartograph_agent_runtime.settings import AgentRuntimeSettings @@ -73,6 +75,53 @@ def test_build_system_prompt_includes_workspace_appendix() -> None: assert "Files here" in prompt +def test_extract_sdk_reply_joins_multiple_text_blocks() -> None: + from dataclasses import dataclass + + @dataclass + class Block: + text: str + + @dataclass + class Message: + content: list + + message = Message(content=[Block(text="Part one. "), Block(text="Part two.")]) + + assert _extract_sdk_reply(message) == "Part one. Part two." + + +def test_finalize_sdk_turn_reply_prefers_streamed_text() -> None: + reply = finalize_sdk_turn_reply( + reply=None, + reply_parts=["Designed ", "entity types."], + last_result=None, + notification_summaries=[], + ) + + assert reply == "Designed entity types." + + +def test_finalize_sdk_turn_reply_uses_tool_only_completion_summary() -> None: + from dataclasses import dataclass + + @dataclass + class Result: + num_turns: int + result: str | None = None + is_error: bool = False + + reply = finalize_sdk_turn_reply( + reply=None, + reply_parts=[], + last_result=Result(num_turns=4), + notification_summaries=[], + ) + + assert "4 turn(s)" in reply + assert "without a final written reply" in reply + + @pytest.mark.asyncio async def test_stream_turn_events_without_api_key_returns_done_reply( monkeypatch: pytest.MonkeyPatch, diff --git a/src/agent-runtime/tests/test_thinking_stream.py b/src/agent-runtime/tests/test_thinking_stream.py index 915f09255..aef213a28 100644 --- a/src/agent-runtime/tests/test_thinking_stream.py +++ b/src/agent-runtime/tests/test_thinking_stream.py @@ -41,6 +41,11 @@ class FakeTaskProgressMessage: usage: dict | None = None +@dataclass +class FakeStreamEvent: + event: dict + + def test_initial_sdk_thinking_lines_include_connected_message() -> None: lines = initial_sdk_thinking_lines(auth_mode="Vertex AI", ui_mode="initial-schema-design") @@ -126,3 +131,42 @@ def test_thinking_events_from_task_progress_message() -> None: joined = "\n".join(events[-1]["recent"]) assert "Inspecting repository files" in joined assert "Running Grep" in joined + + +def test_stream_event_text_delta_accumulates_reply_parts() -> None: + recent = initial_sdk_thinking_lines(auth_mode="Vertex AI", ui_mode="initial-schema-design") + reply_parts: list[str] = [] + message = FakeStreamEvent( + event={ + "type": "content_block_delta", + "delta": {"type": "text_delta", "text": "Designed three entity types."}, + }, + ) + + thinking_events_from_sdk_message( + message, + recent=recent, + reply_parts=reply_parts, + last_compose_at=0, + compose_step=10, + ) + + assert reply_parts == ["Designed three entity types."] + + +def test_assistant_message_text_accumulates_reply_parts() -> None: + recent = initial_sdk_thinking_lines(auth_mode="Vertex AI", ui_mode="initial-schema-design") + reply_parts: list[str] = [] + message = FakeAssistantMessage( + content=[FakeTextBlock(text="Here is the proposed schema.")], + ) + + thinking_events_from_sdk_message( + message, + recent=recent, + reply_parts=reply_parts, + last_compose_at=0, + compose_step=120, + ) + + assert reply_parts == ["Here is the proposed schema."] From 60565189644db622a794d923c51d34e1f6d2b617 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 01:12:41 -0400 Subject: [PATCH 083/153] fix(agent-runtime): fail empty SDK turns instead of fake success reply Return AGENT_NO_TEXTUAL_REPLY when no reply can be extracted rather than surfacing a placeholder string as an assistant message. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/executor.py | 20 ++++++++++++++----- src/agent-runtime/tests/test_executor.py | 11 ++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index e489f1937..145cc8330 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -162,7 +162,7 @@ def finalize_sdk_turn_reply( reply_parts: list[str], last_result: Any | None, notification_summaries: list[str], -) -> str: +) -> str | None: """Build the best available assistant reply after an SDK turn completes.""" if isinstance(reply, str) and reply.strip(): return reply.strip() @@ -188,10 +188,7 @@ def finalize_sdk_turn_reply( "to summarize what it changed." ) - return ( - "Claude Agent SDK completed without a textual response. " - "Retry with a more specific graph-management request." - ) + return None def _build_sdk_env(settings: AgentRuntimeSettings) -> dict[str, str]: @@ -442,4 +439,17 @@ async def _stream_with_claude_sdk( last_result=last_result, notification_summaries=notification_summaries, ) + if not reply: + yield { + "type": "done", + "ok": False, + "error": { + "code": "AGENT_NO_TEXTUAL_REPLY", + "message": ( + "The Graph Management Assistant finished without a reply. " + "Check sticky container logs for SDK output, then retry." + ), + }, + } + return yield {"type": "done", "ok": True, "reply": reply} diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index 1224d252e..33f4828a0 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -122,6 +122,17 @@ class Result: assert "without a final written reply" in reply +def test_finalize_sdk_turn_reply_returns_none_when_nothing_available() -> None: + reply = finalize_sdk_turn_reply( + reply=None, + reply_parts=[], + last_result=None, + notification_summaries=[], + ) + + assert reply is None + + @pytest.mark.asyncio async def test_stream_turn_events_without_api_key_returns_done_reply( monkeypatch: pytest.MonkeyPatch, From d3895d46c09c1ca1c1fbf9e26c9c2ca3d6cbdcf3 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 01:18:12 -0400 Subject: [PATCH 084/153] fix(agent-runtime): stop heartbeat from cancelling SDK message reads asyncio.wait_for on query().__anext__() cancelled pending reads after 8s, breaking the Claude Agent SDK stream before ResultMessage and reply text arrived. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/executor.py | 48 +++++++++++++++---- src/agent-runtime/tests/test_executor.py | 21 ++++++++ 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 145cc8330..9a8445035 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio +import contextlib import json import os from collections.abc import AsyncIterator @@ -244,6 +245,39 @@ def _timeout_error_message( return " ".join(parts) +async def _iter_sdk_messages_with_heartbeat( + sdk_iter: AsyncIterator[Any], + *, + heartbeat_seconds: float, +) -> AsyncIterator[Any | None]: + """Yield SDK messages, or ``None`` when a heartbeat tick is due. + + Unlike ``asyncio.wait_for`` on ``__anext__()``, this never cancels a pending + SDK read — cancelling mid-stream drops messages and prevents ResultMessage delivery. + """ + pending = asyncio.create_task(sdk_iter.__anext__()) + try: + while True: + done, _ = await asyncio.wait( + {pending}, + timeout=heartbeat_seconds, + return_when=asyncio.FIRST_COMPLETED, + ) + if pending in done: + try: + yield pending.result() + except StopAsyncIteration: + return + pending = asyncio.create_task(sdk_iter.__anext__()) + else: + yield None + finally: + if not pending.done(): + pending.cancel() + with contextlib.suppress(asyncio.CancelledError, StopAsyncIteration): + await pending + + async def stream_turn_events( *, settings: AgentRuntimeSettings, @@ -347,15 +381,11 @@ async def _stream_with_claude_sdk( try: async with asyncio.timeout(turn_timeout_seconds): sdk_iter = query(prompt=prompt, options=options).__aiter__() - while True: - try: - sdk_message = await asyncio.wait_for( - sdk_iter.__anext__(), - timeout=_SDK_HEARTBEAT_SECONDS, - ) - except StopAsyncIteration: - break - except TimeoutError: + async for sdk_message in _iter_sdk_messages_with_heartbeat( + sdk_iter, + heartbeat_seconds=_SDK_HEARTBEAT_SECONDS, + ): + if sdk_message is None: elapsed_seconds += int(_SDK_HEARTBEAT_SECONDS) heartbeat = replace_last_thinking( recent, diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index 33f4828a0..80e8fc71f 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import json from pathlib import Path @@ -11,6 +12,7 @@ _build_system_prompt, _build_workspace_prompt_appendix, _extract_sdk_reply, + _iter_sdk_messages_with_heartbeat, finalize_sdk_turn_reply, stream_turn_events, ) @@ -133,6 +135,25 @@ def test_finalize_sdk_turn_reply_returns_none_when_nothing_available() -> None: assert reply is None +@pytest.mark.asyncio +async def test_sdk_message_heartbeat_does_not_cancel_pending_read() -> None: + async def delayed_messages(): + await asyncio.sleep(0.01) + yield "first" + await asyncio.sleep(0.05) + yield "second" + + collected: list[str] = [] + async for item in _iter_sdk_messages_with_heartbeat( + delayed_messages().__aiter__(), + heartbeat_seconds=0.02, + ): + if item is not None: + collected.append(str(item)) + + assert collected == ["first", "second"] + + @pytest.mark.asyncio async def test_stream_turn_events_without_api_key_returns_done_reply( monkeypatch: pytest.MonkeyPatch, From 8f084bf24395e39d5457d9668714e5acee9c4de3 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 01:40:12 -0400 Subject: [PATCH 085/153] feat(extraction): add schema authoring tools and skills for graph assistant Wire Kartograph MCP tools for ontology read/save and JSONL mutations, expose workload schema endpoints, and teach the assistant how to author entity and relationship types instead of probing undocumented HTTP routes. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/executor.py | 13 ++ .../kartograph_agent_runtime/schema_tools.py | 138 ++++++++++++++ .../thinking_stream.py | 5 +- .../kartograph_agent_runtime/tools.py | 52 +++++- src/agent-runtime/tests/test_schema_tools.py | 29 +++ .../tests/test_thinking_stream.py | 2 +- .../application/schema_authoring_guide.py | 88 +++++++++ .../application/skill_resolution_service.py | 38 +++- src/api/extraction/ports/workload_schema.py | 30 +++ .../presentation/workload_routes.py | 171 +++++++++++++----- .../extraction_workload/dependencies.py | 11 ++ .../extraction_workload/schema_service.py | 45 +++++ .../test_skill_resolution_service.py | 5 + .../presentation/test_workload_routes.py | 117 ++++++++++++ 14 files changed, 691 insertions(+), 53 deletions(-) create mode 100644 src/agent-runtime/kartograph_agent_runtime/schema_tools.py create mode 100644 src/agent-runtime/tests/test_schema_tools.py create mode 100644 src/api/extraction/application/schema_authoring_guide.py create mode 100644 src/api/extraction/ports/workload_schema.py create mode 100644 src/api/infrastructure/extraction_workload/schema_service.py create mode 100644 src/api/tests/unit/extraction/presentation/test_workload_routes.py diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 9a8445035..12a02168b 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -362,6 +362,18 @@ async def _stream_with_claude_sdk( sdk_env = _build_sdk_env(settings) workspace_dir = settings.workspace_dir.strip() or "/workspace" + tooling = RuntimeTooling(settings=settings) + options_kwargs: dict[str, Any] = {} + if settings.workload_token.strip(): + from kartograph_agent_runtime.schema_tools import ( + KARTOGRAPH_SCHEMA_TOOL_NAMES, + build_kartograph_schema_mcp_server, + ) + + options_kwargs["mcp_servers"] = { + "kartograph": build_kartograph_schema_mcp_server(tooling), + } + options_kwargs["allowed_tools"] = list(KARTOGRAPH_SCHEMA_TOOL_NAMES) options = ClaudeAgentOptions( system_prompt=system_prompt, env=sdk_env, @@ -370,6 +382,7 @@ async def _stream_with_claude_sdk( setting_sources=[], cwd=workspace_dir, add_dirs=[workspace_dir], + **options_kwargs, ) reply: str | None = None diff --git a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py new file mode 100644 index 000000000..212e78773 --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py @@ -0,0 +1,138 @@ +"""In-process MCP tools for Kartograph schema authoring.""" + +from __future__ import annotations + +from typing import Any + +from claude_agent_sdk import create_sdk_mcp_server, tool + +from kartograph_agent_runtime.tools import RuntimeTooling + +KARTOGRAPH_SCHEMA_TOOL_NAMES = ( + "kartograph_get_schema_authoring_guide", + "kartograph_get_schema_ontology", + "kartograph_save_schema_ontology", + "kartograph_apply_graph_mutations", + "kartograph_search_graph_by_slug", +) + + +def build_kartograph_schema_mcp_server(tooling: RuntimeTooling): + """Register Kartograph schema tools on an SDK MCP server.""" + + @tool( + "kartograph_get_schema_authoring_guide", + "Return instructions for authoring entity types, relationship types, and instances in Kartograph.", + {}, + ) + async def get_schema_authoring_guide(_args: dict[str, Any]) -> dict[str, Any]: + try: + payload = await tooling.get_schema_authoring_guide() + guide = str(payload.get("guide") or "") + return {"content": [{"type": "text", "text": guide}]} + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to load schema guide: {exc}"}], + "is_error": True, + } + + @tool( + "kartograph_get_schema_ontology", + "Read the current canonical ontology (node_types and edge_types) for this knowledge graph.", + {}, + ) + async def get_schema_ontology(_args: dict[str, Any]) -> dict[str, Any]: + try: + return RuntimeTooling.format_tool_result(await tooling.get_schema_ontology()) + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to read ontology: {exc}"}], + "is_error": True, + } + + @tool( + "kartograph_save_schema_ontology", + "Replace the canonical ontology. Pass full node_types and edge_types arrays.", + { + "node_types": list, + "edge_types": list, + "approved_at": str, + }, + ) + async def save_schema_ontology(args: dict[str, Any]) -> dict[str, Any]: + ontology = { + "node_types": args.get("node_types") or [], + "edge_types": args.get("edge_types") or [], + } + approved_at = args.get("approved_at") + if isinstance(approved_at, str) and approved_at.strip(): + ontology["approved_at"] = approved_at.strip() + try: + return RuntimeTooling.format_tool_result( + await tooling.save_schema_ontology(ontology=ontology), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to save ontology: {exc}"}], + "is_error": True, + } + + @tool( + "kartograph_apply_graph_mutations", + "Apply JSONL mutation lines to create/update/delete entity or relationship instances.", + {"jsonl": str}, + ) + async def apply_graph_mutations(args: dict[str, Any]) -> dict[str, Any]: + jsonl = str(args.get("jsonl") or "").strip() + if not jsonl: + return { + "content": [{"type": "text", "text": "jsonl must not be empty."}], + "is_error": True, + } + try: + return RuntimeTooling.format_tool_result( + await tooling.apply_graph_mutations(jsonl=jsonl), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to apply mutations: {exc}"}], + "is_error": True, + } + + @tool( + "kartograph_search_graph_by_slug", + "Search existing graph nodes by slug within the active knowledge graph.", + {"slug": str, "entity_type": str}, + ) + async def search_graph_by_slug(args: dict[str, Any]) -> dict[str, Any]: + slug = str(args.get("slug") or "").strip() + if not slug: + return { + "content": [{"type": "text", "text": "slug must not be empty."}], + "is_error": True, + } + entity_type = args.get("entity_type") + try: + return RuntimeTooling.format_tool_result( + await tooling.search_graph_by_slug( + slug=slug, + entity_type=str(entity_type).strip() if entity_type else None, + ), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Graph search failed: {exc}"}], + "is_error": True, + } + + return create_sdk_mcp_server( + name="kartograph", + version="1.0.0", + tools=[ + get_schema_authoring_guide, + get_schema_ontology, + save_schema_ontology, + apply_graph_mutations, + search_graph_by_slug, + ], + ) diff --git a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py index 6545c1c37..555148019 100644 --- a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py +++ b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py @@ -82,6 +82,9 @@ def _tool_use_line(name: str, tool_input: dict[str, Any]) -> str: if name == "Bash": command = tool_input.get("command") or "" return f"Running {command}" if command else "Running shell command…" + if name.startswith("kartograph_"): + readable = name.removeprefix("kartograph_").replace("_", " ") + return f"Schema tool · {readable}" return f"Running {name}…" @@ -332,5 +335,5 @@ def initial_sdk_thinking_lines(*, auth_mode: str, ui_mode: str) -> list[str]: return [ f"Claude Agent SDK query started ({auth_mode})…", f"Mode overlay: {ui_mode}", - "Connected — working on your message…", + "Schema tools: ontology read/save, JSONL mutations, graph search", ] diff --git a/src/agent-runtime/kartograph_agent_runtime/tools.py b/src/agent-runtime/kartograph_agent_runtime/tools.py index 1b544fcce..8729aa9e2 100644 --- a/src/agent-runtime/kartograph_agent_runtime/tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/tools.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from dataclasses import dataclass from typing import Any @@ -16,14 +17,52 @@ class RuntimeTooling: settings: AgentRuntimeSettings + def _headers(self) -> dict[str, str]: + return {"X-Workload-Token": self.settings.workload_token} + + def _base_url(self) -> str: + return self.settings.api_base_url.rstrip("/") + + async def get_schema_authoring_guide(self) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/schema/authoring-guide" + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=self._headers()) + response.raise_for_status() + return response.json() + + async def get_schema_ontology(self) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/schema/ontology" + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=self._headers()) + response.raise_for_status() + return response.json() + + async def save_schema_ontology(self, *, ontology: dict[str, Any]) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/schema/ontology" + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.put(url, headers=self._headers(), json=ontology) + response.raise_for_status() + return response.json() + + async def apply_graph_mutations(self, *, jsonl: str) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/mutations/apply" + async with httpx.AsyncClient(timeout=120.0) as client: + response = await client.post( + url, + headers=self._headers(), + json={"jsonl": jsonl}, + ) + response.raise_for_status() + return response.json() + async def search_graph_by_slug( self, *, slug: str, entity_type: str | None = None ) -> dict[str, Any]: - headers = {"X-Workload-Token": self.settings.workload_token} + headers = self._headers() params: dict[str, str] = {"slug": slug} if entity_type: params["entity_type"] = entity_type - url = f"{self.settings.api_base_url.rstrip('/')}/extraction/workloads/graph/search-by-slug" + url = f"{self._base_url()}/extraction/workloads/graph/search-by-slug" async with httpx.AsyncClient(timeout=30.0) as client: response = await client.get(url, headers=headers, params=params) response.raise_for_status() @@ -32,8 +71,8 @@ async def search_graph_by_slug( async def propose_mutation( self, *, operation: str, summary: str, payload: dict[str, Any] | None = None ) -> dict[str, Any]: - headers = {"X-Workload-Token": self.settings.workload_token} - url = f"{self.settings.api_base_url.rstrip('/')}/extraction/workloads/mutations/propose" + headers = self._headers() + url = f"{self._base_url()}/extraction/workloads/mutations/propose" body = { "operation": operation, "summary": summary, @@ -43,3 +82,8 @@ async def propose_mutation( response = await client.post(url, headers=headers, json=body) response.raise_for_status() return response.json() + + @staticmethod + def format_tool_result(payload: dict[str, Any]) -> dict[str, Any]: + text = json.dumps(payload, indent=2) + return {"content": [{"type": "text", "text": text}]} diff --git a/src/agent-runtime/tests/test_schema_tools.py b/src/agent-runtime/tests/test_schema_tools.py new file mode 100644 index 000000000..18b19ebff --- /dev/null +++ b/src/agent-runtime/tests/test_schema_tools.py @@ -0,0 +1,29 @@ +"""Unit tests for Kartograph schema MCP tools.""" + +from __future__ import annotations + +from kartograph_agent_runtime.schema_tools import ( + KARTOGRAPH_SCHEMA_TOOL_NAMES, + build_kartograph_schema_mcp_server, +) +from kartograph_agent_runtime.settings import AgentRuntimeSettings +from kartograph_agent_runtime.tools import RuntimeTooling + + +def test_schema_tool_names_cover_authoring_surface() -> None: + assert "kartograph_get_schema_authoring_guide" in KARTOGRAPH_SCHEMA_TOOL_NAMES + assert "kartograph_get_schema_ontology" in KARTOGRAPH_SCHEMA_TOOL_NAMES + assert "kartograph_save_schema_ontology" in KARTOGRAPH_SCHEMA_TOOL_NAMES + assert "kartograph_apply_graph_mutations" in KARTOGRAPH_SCHEMA_TOOL_NAMES + + +def test_build_kartograph_schema_mcp_server_registers_tools() -> None: + tooling = RuntimeTooling( + settings=AgentRuntimeSettings( + KARTOGRAPH_WORKLOAD_TOKEN="token", + KARTOGRAPH_API_BASE_URL="http://api:8000", + ) + ) + server = build_kartograph_schema_mcp_server(tooling) + assert server["type"] == "sdk" + assert server["name"] == "kartograph" diff --git a/src/agent-runtime/tests/test_thinking_stream.py b/src/agent-runtime/tests/test_thinking_stream.py index aef213a28..c5da65b8a 100644 --- a/src/agent-runtime/tests/test_thinking_stream.py +++ b/src/agent-runtime/tests/test_thinking_stream.py @@ -50,7 +50,7 @@ def test_initial_sdk_thinking_lines_include_connected_message() -> None: lines = initial_sdk_thinking_lines(auth_mode="Vertex AI", ui_mode="initial-schema-design") assert any("Claude Agent SDK query started" in line for line in lines) - assert any("Connected" in line for line in lines) + assert any("Schema tools" in line for line in lines) def test_agent_runtime_settings_default_max_turns() -> None: diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py new file mode 100644 index 000000000..7c9c5f314 --- /dev/null +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -0,0 +1,88 @@ +"""Schema authoring guide shared by API workload tools and agent runtime skills.""" + +SCHEMA_AUTHORING_GUIDE = """ +# Kartograph schema authoring (Graph Management Assistant) + +Use the Kartograph schema tools — never probe undocumented HTTP routes. + +## Workflow + +1. Call `kartograph_get_schema_authoring_guide` (this document). +2. Call `kartograph_get_schema_ontology` to read the current entity/relationship types. +3. Edit the ontology JSON (full replace) and call `kartograph_save_schema_ontology`. +4. For instances, call `kartograph_apply_graph_mutations` with JSONL lines. + +## Entity type (node type) shape + +Each entry in `node_types`: + +```json +{ + "label": "service", + "description": "Deployable software service", + "required_properties": ["name"], + "optional_properties": ["team"], + "prepopulated": false, + "prepopulated_instance_count": 0 +} +``` + +- `label`: lowercase snake_case type name (required). +- `prepopulated`: when true, bootstrap transition requires at least one instance. +- Saving replaces the entire ontology — read first, merge your edits, then save. + +## Relationship type (edge type) shape + +Each entry in `edge_types`: + +```json +{ + "label": "depends_on", + "description": "Service dependency", + "source_labels": ["service"], + "target_labels": ["service"], + "properties": [] +} +``` + +- `source_labels` / `target_labels`: allowed node type labels for edge endpoints. + +## Instance mutations (JSONL) + +Apply after types exist. One JSON object per line. + +Define-only line (usually handled by save_schema_ontology instead): + +```json +{"op":"DEFINE","type":"node","label":"service","description":"A service","required_properties":["name"]} +``` + +Create entity instance: + +```json +{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service","set_properties":{"name":"api-gateway","slug":"api-gateway","data_source_id":"schema-bootstrap","source_path":"graph-management-assistant"}} +``` + +Create relationship instance: + +```json +{"op":"CREATE","type":"edge","id":"depends_on:0123456789abc001","label":"depends_on","start_id":"service:0123456789abcdef","end_id":"service:fedcba9876543210","set_properties":{"data_source_id":"schema-bootstrap","source_path":"graph-management-assistant"}} +``` + +Rules: +- `id` format: `{label}:{16 lowercase hex chars}`. +- CREATE requires `data_source_id` and `source_path` in `set_properties`. +- Node CREATE requires `slug` in `set_properties`. +- `knowledge_graph_id` is stamped by the platform — do not set it. + +## Readiness checklist + +Bootstrap transition needs: +- At least one entity type and one relationship type. +- Every `prepopulated=true` entity type must have instances (use CREATE lines). + +## Repository context + +Use Read/Grep/Glob on prepared JobPackage files under `repository-files/<job_package_id>/` +to infer domain concepts — then model them as ontology types, not as ad-hoc API discoveries. +""".strip() diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 11e420157..e3f665c88 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -21,8 +21,11 @@ class ResolvedExtractionSkillPack: _GLOBAL_PROMPT_SETTINGS: dict[ExtractionSessionMode, dict[str, object]] = { ExtractionSessionMode.SCHEMA_BOOTSTRAP: { "system_prompt": ( - "You are the schema bootstrap guide. Start by understanding the user's " - "capabilities, goals, and domain intent before proposing a graph model." + "You are the Graph Management Assistant for schema bootstrap. " + "Use Kartograph schema tools to read and write entity/relationship types " + "and instances — do not discover or call raw HTTP API routes. " + "Start by understanding user goals, then model the ontology and apply changes " + "with kartograph_get_schema_ontology and kartograph_save_schema_ontology." ), "prompt_hierarchy": ( "platform_security_constraints", @@ -34,6 +37,7 @@ class ResolvedExtractionSkillPack: "Prefer mutation-log compatible schema guidance over ad-hoc writes.", "Never fabricate repository content or credentials.", "Keep recommendations scoped to the active knowledge graph.", + "Use kartograph_* schema tools for ontology and JSONL mutations; never probe /management or /graph HTTP routes manually.", ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { @@ -62,13 +66,36 @@ class ResolvedExtractionSkillPack: "want a first-pass schema attempt or guided co-design." ), "schema_modeling": ( - "Guide the user to define complete entity and relationship types " - "with clear labels, constraints, and required properties." + "Author entity types (node_types) and relationship types (edge_types) via " + "kartograph_get_schema_ontology → edit → kartograph_save_schema_ontology. " + "Each entity type needs label, description, required_properties, optional " + "prepopulated flag. Each relationship type needs source_labels and " + "target_labels. Call kartograph_get_schema_authoring_guide before the first save." + ), + "entity_type_authoring": ( + "Create or edit entity types by updating node_types entries: label " + "(lowercase snake_case), description, required_properties, optional_properties, " + "prepopulated, prepopulated_instance_count. Always read the current ontology first." + ), + "relationship_type_authoring": ( + "Create or edit relationship types by updating edge_types entries: label, " + "description, source_labels, target_labels, properties. Ensure endpoints " + "reference existing entity type labels." + ), + "instance_authoring": ( + "Create entity or relationship instances with kartograph_apply_graph_mutations " + "JSONL CREATE lines after types exist. Nodes require slug, data_source_id, " + "and source_path in set_properties. Use ids like label:16hex." ), "prepopulation_validation": ( "Prioritize prepopulated type coverage and highlight any missing " "instances required before extraction-mode transition." ), + "schema_tools": ( + "Available tools: kartograph_get_schema_authoring_guide, " + "kartograph_get_schema_ontology, kartograph_save_schema_ontology, " + "kartograph_apply_graph_mutations, kartograph_search_graph_by_slug." + ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { "job_setup": ( @@ -91,7 +118,8 @@ class ResolvedExtractionSkillPack: GraphManagementUiMode.INITIAL_SCHEMA_DESIGN: { "ui_mode_framing": ( "Focus on schema bootstrap: entity/relationship modeling, intake, and " - "prepopulation guidance before extraction jobs." + "prepopulation guidance before extraction jobs. Use Kartograph schema tools " + "to persist types — do not guess API endpoints." ), }, GraphManagementUiMode.EXTRACTION_JOBS: { diff --git a/src/api/extraction/ports/workload_schema.py b/src/api/extraction/ports/workload_schema.py new file mode 100644 index 000000000..bda32bfa7 --- /dev/null +++ b/src/api/extraction/ports/workload_schema.py @@ -0,0 +1,30 @@ +"""Port for schema reads and writes performed by sticky session workload tokens.""" + +from __future__ import annotations + +from typing import Protocol + +from management.domain.value_objects import OntologyConfig + + +class IWorkloadSchemaService(Protocol): + """Canonical schema access scoped to a workload-authenticated knowledge graph.""" + + async def get_ontology(self, *, knowledge_graph_id: str) -> OntologyConfig | None: + """Return the canonical ontology for one knowledge graph.""" + + async def replace_ontology( + self, + *, + knowledge_graph_id: str, + config: OntologyConfig, + ) -> OntologyConfig: + """Replace the canonical ontology via DEFINE mutation-log operations.""" + + async def apply_mutation_jsonl( + self, + *, + knowledge_graph_id: str, + jsonl: str, + ) -> dict[str, object]: + """Apply JSONL mutation lines (CREATE/UPDATE/DELETE instances, additive DEFINE).""" diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index e95fd1f51..f436f928a 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -2,21 +2,34 @@ from __future__ import annotations -from typing import Annotated +from typing import Annotated, Any from fastapi import APIRouter, Depends, HTTPException, Query, status from pydantic import BaseModel, Field from extraction.ports.workload_graph import IWorkloadGraphReader +from extraction.ports.workload_schema import IWorkloadSchemaService from extraction.presentation.workload_auth import ( WorkloadAuthContext, get_workload_auth_context, ) -from infrastructure.extraction_workload.dependencies import get_workload_graph_reader +from infrastructure.extraction_workload.dependencies import ( + get_workload_graph_reader, + get_workload_schema_service, +) +from management.domain.value_objects import OntologyConfig router = APIRouter(prefix="/workloads", tags=["extraction-workloads"]) +def _require_chat_scope(auth: WorkloadAuthContext) -> None: + if "workload:chat" not in auth.credentials.scopes: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Workload token is not authorized for chat graph operations", + ) + + class WorkloadGraphSearchResponse(BaseModel): """Graph read response for sticky session agent tools.""" @@ -24,20 +37,123 @@ class WorkloadGraphSearchResponse(BaseModel): count: int -class WorkloadMutationProposalRequest(BaseModel): - """Mutation emitter payload from sticky session agent tools.""" +class WorkloadOntologyResponse(BaseModel): + """Canonical schema ontology for one knowledge graph.""" + + knowledge_graph_id: str + node_types: list[dict[str, Any]] + edge_types: list[dict[str, Any]] + approved_at: str | None = None + + +class WorkloadOntologySaveRequest(BaseModel): + """Full ontology replace payload matching Management OntologyConfig.""" + + node_types: list[dict[str, Any]] = Field(default_factory=list) + edge_types: list[dict[str, Any]] = Field(default_factory=list) + approved_at: str | None = None + + +class WorkloadMutationApplyRequest(BaseModel): + """JSONL mutation batch for instance authoring or additive schema changes.""" + + jsonl: str = Field(min_length=1) + + +class WorkloadMutationApplyResponse(BaseModel): + """Result of applying a JSONL mutation batch.""" - operation: str = Field(min_length=1) - summary: str = Field(min_length=1) - payload: dict = Field(default_factory=dict) + applied: bool + errors: list[str] = Field(default_factory=list) -class WorkloadMutationProposalResponse(BaseModel): - """Acknowledgement for a proposed mutation (not yet applied).""" +class WorkloadSchemaAuthoringGuideResponse(BaseModel): + """Authoring instructions surfaced to the Graph Management Assistant.""" - accepted: bool - proposal_id: str - message: str + guide: str + + +@router.get( + "/schema/authoring-guide", + response_model=WorkloadSchemaAuthoringGuideResponse, +) +async def workload_schema_authoring_guide( + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., +) -> WorkloadSchemaAuthoringGuideResponse: + _require_chat_scope(auth) + from extraction.application.schema_authoring_guide import SCHEMA_AUTHORING_GUIDE + + return WorkloadSchemaAuthoringGuideResponse(guide=SCHEMA_AUTHORING_GUIDE) + + +@router.get( + "/schema/ontology", + response_model=WorkloadOntologyResponse, +) +async def workload_get_schema_ontology( + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., +) -> WorkloadOntologyResponse: + _require_chat_scope(auth) + config = await schema_service.get_ontology(knowledge_graph_id=auth.knowledge_graph_id) + if config is None: + return WorkloadOntologyResponse( + knowledge_graph_id=auth.knowledge_graph_id, + node_types=[], + edge_types=[], + approved_at=None, + ) + payload = config.to_dict() + return WorkloadOntologyResponse( + knowledge_graph_id=auth.knowledge_graph_id, + node_types=list(payload.get("node_types", [])), + edge_types=list(payload.get("edge_types", [])), + approved_at=payload.get("approved_at"), + ) + + +@router.put( + "/schema/ontology", + response_model=WorkloadOntologyResponse, +) +async def workload_save_schema_ontology( + request: WorkloadOntologySaveRequest, + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., +) -> WorkloadOntologyResponse: + _require_chat_scope(auth) + config = OntologyConfig.from_dict(request.model_dump()) + saved = await schema_service.replace_ontology( + knowledge_graph_id=auth.knowledge_graph_id, + config=config, + ) + payload = saved.to_dict() + return WorkloadOntologyResponse( + knowledge_graph_id=auth.knowledge_graph_id, + node_types=list(payload.get("node_types", [])), + edge_types=list(payload.get("edge_types", [])), + approved_at=payload.get("approved_at"), + ) + + +@router.post( + "/mutations/apply", + response_model=WorkloadMutationApplyResponse, +) +async def workload_apply_mutations( + request: WorkloadMutationApplyRequest, + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., +) -> WorkloadMutationApplyResponse: + _require_chat_scope(auth) + result = await schema_service.apply_mutation_jsonl( + knowledge_graph_id=auth.knowledge_graph_id, + jsonl=request.jsonl, + ) + return WorkloadMutationApplyResponse( + applied=bool(result.get("applied")), + errors=[str(item) for item in result.get("errors", [])], + ) @router.get( @@ -50,11 +166,7 @@ async def workload_search_graph_by_slug( auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., reader: Annotated[IWorkloadGraphReader, Depends(get_workload_graph_reader)] = ..., ) -> WorkloadGraphSearchResponse: - if "workload:chat" not in auth.credentials.scopes: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Workload token is not authorized for chat graph reads", - ) + _require_chat_scope(auth) nodes = await reader.search_by_slug( tenant_id=auth.tenant_id, @@ -72,28 +184,3 @@ async def workload_search_graph_by_slug( for node in nodes ] return WorkloadGraphSearchResponse(nodes=serialized, count=len(serialized)) - - -@router.post( - "/mutations/propose", - response_model=WorkloadMutationProposalResponse, -) -async def workload_propose_mutation( - request: WorkloadMutationProposalRequest, - auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., -) -> WorkloadMutationProposalResponse: - if "workload:chat" not in auth.credentials.scopes: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Workload token is not authorized for chat mutation proposals", - ) - - proposal_id = f"proposal-{request.operation}-{auth.knowledge_graph_id}" - return WorkloadMutationProposalResponse( - accepted=True, - proposal_id=proposal_id, - message=( - "Mutation proposal recorded for audit. Apply via mutation log pipeline " - "in a follow-up change." - ), - ) diff --git a/src/api/infrastructure/extraction_workload/dependencies.py b/src/api/infrastructure/extraction_workload/dependencies.py index a74594e5b..fa768666c 100644 --- a/src/api/infrastructure/extraction_workload/dependencies.py +++ b/src/api/infrastructure/extraction_workload/dependencies.py @@ -10,10 +10,15 @@ from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer from extraction.infrastructure.workload_runtime_factory import get_workload_credential_issuer from extraction.ports.workload_graph import IWorkloadGraphReader +from extraction.ports.workload_schema import IWorkloadSchemaService from infrastructure.database.connection_pool import ConnectionPool from infrastructure.dependencies import get_age_connection_pool from infrastructure.extraction_workload.graph_reader import GraphWorkloadGraphReader +from infrastructure.extraction_workload.schema_service import GraphWorkloadSchemaService from infrastructure.settings import get_database_settings +from sqlalchemy.ext.asyncio import AsyncSession + +from infrastructure.database.dependencies import get_write_session @lru_cache @@ -29,3 +34,9 @@ def get_workload_graph_reader( pool: Annotated[ConnectionPool, Depends(get_age_connection_pool)], ) -> IWorkloadGraphReader: return GraphWorkloadGraphReader(pool=pool, settings=get_database_settings()) + + +def get_workload_schema_service( + session: Annotated[AsyncSession, Depends(get_write_session)], +) -> IWorkloadSchemaService: + return GraphWorkloadSchemaService(session=session) diff --git a/src/api/infrastructure/extraction_workload/schema_service.py b/src/api/infrastructure/extraction_workload/schema_service.py new file mode 100644 index 000000000..57bff8a08 --- /dev/null +++ b/src/api/infrastructure/extraction_workload/schema_service.py @@ -0,0 +1,45 @@ +"""Graph-backed schema service for extraction workload runtimes.""" + +from __future__ import annotations + +from sqlalchemy.ext.asyncio import AsyncSession + +from infrastructure.canonical_schema.graph_canonical_schema_repository import ( + GraphCanonicalSchemaRepository, +) +from management.domain.value_objects import OntologyConfig +from management.ports.exceptions import CanonicalSchemaMutationError + + +class GraphWorkloadSchemaService: + """Read and write canonical schema using the Management graph-native store.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + self._repository = GraphCanonicalSchemaRepository(session) + + async def get_ontology(self, *, knowledge_graph_id: str) -> OntologyConfig | None: + return await self._repository.get_ontology(knowledge_graph_id) + + async def replace_ontology( + self, + *, + knowledge_graph_id: str, + config: OntologyConfig, + ) -> OntologyConfig: + await self._repository.replace_ontology(knowledge_graph_id, config) + await self._session.commit() + return config + + async def apply_mutation_jsonl( + self, + *, + knowledge_graph_id: str, + jsonl: str, + ) -> dict[str, object]: + try: + await self._repository.apply_mutation_log(knowledge_graph_id, jsonl) + except CanonicalSchemaMutationError as exc: + return {"applied": False, "errors": [str(exc)]} + await self._session.commit() + return {"applied": True, "errors": []} diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index fa5167b54..0d8e06983 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -35,6 +35,11 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): ) assert "schema_modeling" in resolved.skills + assert "entity_type_authoring" in resolved.skills + assert "relationship_type_authoring" in resolved.skills + assert "instance_authoring" in resolved.skills + assert "schema_tools" in resolved.skills + assert "kartograph_get_schema_ontology" in resolved.skills["schema_tools"] assert "prepopulation_validation" in resolved.skills assert "capabilities_intake" in resolved.skills assert "goal" in resolved.system_prompt.lower() diff --git a/src/api/tests/unit/extraction/presentation/test_workload_routes.py b/src/api/tests/unit/extraction/presentation/test_workload_routes.py new file mode 100644 index 000000000..0a811f31b --- /dev/null +++ b/src/api/tests/unit/extraction/presentation/test_workload_routes.py @@ -0,0 +1,117 @@ +"""Unit tests for extraction workload schema routes.""" + +from __future__ import annotations + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer +from extraction.presentation import workload_routes +from extraction.presentation.workload_auth import WorkloadAuthContext, get_workload_auth_context +from infrastructure.extraction_workload.dependencies import get_workload_schema_service +from management.domain.value_objects import OntologyConfig + + +class _FakeSchemaService: + def __init__(self) -> None: + self.saved: OntologyConfig | None = None + self.applied_jsonl: str | None = None + + async def get_ontology(self, *, knowledge_graph_id: str) -> OntologyConfig | None: + return self.saved + + async def replace_ontology( + self, + *, + knowledge_graph_id: str, + config: OntologyConfig, + ) -> OntologyConfig: + self.saved = config + return config + + async def apply_mutation_jsonl( + self, + *, + knowledge_graph_id: str, + jsonl: str, + ) -> dict[str, object]: + self.applied_jsonl = jsonl + return {"applied": True, "errors": []} + + +@pytest.fixture +def workload_client() -> tuple[TestClient, _FakeSchemaService, str]: + fake = _FakeSchemaService() + issuer = ScopedWorkloadCredentialIssuer(default_ttl=__import__("datetime").timedelta(minutes=10)) + credentials = issuer.issue_for_sticky_session( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + ) + + app = FastAPI() + app.include_router(workload_routes.router, prefix="/extraction") + app.dependency_overrides[get_workload_schema_service] = lambda: fake + app.dependency_overrides[get_workload_auth_context] = lambda: WorkloadAuthContext( + credentials=credentials, + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + ) + + client = TestClient(app) + return client, fake, credentials.token + + +def test_workload_get_schema_authoring_guide(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, _fake, token = workload_client + response = client.get( + "/extraction/workloads/schema/authoring-guide", + headers={"X-Workload-Token": token}, + ) + assert response.status_code == 200 + assert "kartograph_get_schema_ontology" in response.json()["guide"] + + +def test_workload_save_schema_ontology(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, fake, token = workload_client + response = client.put( + "/extraction/workloads/schema/ontology", + headers={"X-Workload-Token": token}, + json={ + "node_types": [ + { + "label": "service", + "description": "Service entity", + "required_properties": ["name"], + "optional_properties": [], + "prepopulated": False, + "prepopulated_instance_count": 0, + } + ], + "edge_types": [ + { + "label": "depends_on", + "description": "Dependency", + "source_labels": ["service"], + "target_labels": ["service"], + "properties": [], + } + ], + }, + ) + assert response.status_code == 200 + assert fake.saved is not None + assert fake.saved.node_types[0].label == "service" + assert fake.saved.edge_types[0].label == "depends_on" + + +def test_workload_apply_graph_mutations(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, fake, token = workload_client + response = client.post( + "/extraction/workloads/mutations/apply", + headers={"X-Workload-Token": token}, + json={"jsonl": '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service","set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap","source_path":"assistant"}}'}, + ) + assert response.status_code == 200 + assert response.json()["applied"] is True + assert fake.applied_jsonl is not None From db44c1008d83a3baf3c27ca44e8b151d19ee6773 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 02:07:19 -0400 Subject: [PATCH 086/153] feat(manage-kg): load design artifacts from DB and persist agent graph writes Add design-artifacts API and k-extract-style panels with refresh, route workload CREATE mutations to tenant AGE, and enrich the sticky agent system prompt with skills and tools guidance. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 99 ++++++ .../kartograph_agent_runtime/executor.py | 19 +- src/agent-runtime/tests/test_agent_prompt.py | 46 +++ src/agent-runtime/tests/test_executor.py | 2 + .../application/skill_resolution_service.py | 11 +- src/api/extraction/ports/workload_schema.py | 1 + .../presentation/workload_routes.py | 1 + .../extraction_workload/dependencies.py | 13 +- .../graph_mutation_writer.py | 146 +++++++++ .../extraction_workload/graph_reader.py | 2 +- .../extraction_workload/schema_service.py | 52 ++- .../management/design_artifacts_service.py | 55 ++++ .../application/design_artifacts.py | 223 +++++++++++++ .../dependencies/design_artifacts.py | 27 ++ .../presentation/knowledge_graphs/models.py | 70 ++++ .../presentation/knowledge_graphs/routes.py | 34 ++ .../test_workload_graph_mutations.py | 120 +++++++ .../presentation/test_workload_routes.py | 1 + .../test_graph_mutation_writer.py | 47 +++ .../test_schema_service.py | 68 ++++ .../application/test_design_artifacts.py | 81 +++++ .../GraphDesignEntitiesPanel.vue | 301 ++++++++++++++++++ .../GraphDesignRelationshipsPanel.vue | 243 ++++++++++++++ .../pages/knowledge-graphs/[kgId]/manage.vue | 155 ++++----- .../app/tests/kg-design-artifacts.test.ts | 25 ++ .../knowledge-graph-manage-workspace.test.ts | 11 +- src/dev-ui/app/utils/kgDesignArtifacts.ts | 104 ++++++ 27 files changed, 1849 insertions(+), 108 deletions(-) create mode 100644 src/agent-runtime/kartograph_agent_runtime/agent_prompt.py create mode 100644 src/agent-runtime/tests/test_agent_prompt.py create mode 100644 src/api/infrastructure/extraction_workload/graph_mutation_writer.py create mode 100644 src/api/infrastructure/management/design_artifacts_service.py create mode 100644 src/api/management/application/design_artifacts.py create mode 100644 src/api/management/dependencies/design_artifacts.py create mode 100644 src/api/tests/integration/extraction/test_workload_graph_mutations.py create mode 100644 src/api/tests/unit/infrastructure/extraction_workload/test_graph_mutation_writer.py create mode 100644 src/api/tests/unit/infrastructure/extraction_workload/test_schema_service.py create mode 100644 src/api/tests/unit/management/application/test_design_artifacts.py create mode 100644 src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue create mode 100644 src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue create mode 100644 src/dev-ui/app/tests/kg-design-artifacts.test.ts create mode 100644 src/dev-ui/app/utils/kgDesignArtifacts.ts diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py new file mode 100644 index 000000000..5c64bd37f --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -0,0 +1,99 @@ +"""System prompt assembly for the Graph Management Assistant.""" + +from __future__ import annotations + +from typing import Any + +from kartograph_agent_runtime.schema_tools import KARTOGRAPH_SCHEMA_TOOL_NAMES +from kartograph_agent_runtime.settings import AgentRuntimeSettings + +_TOOLS_QUICK_REFERENCE = """ +## Kartograph schema tools (always use these — never probe HTTP routes) + +| Tool | Purpose | +|------|---------| +| `kartograph_get_schema_authoring_guide` | Full JSON shapes and mutation rules — call first on schema tasks | +| `kartograph_get_schema_ontology` | Read current `node_types` and `edge_types` before every save | +| `kartograph_save_schema_ontology` | Replace canonical ontology (read → merge edits → save full payload) | +| `kartograph_apply_graph_mutations` | Apply JSONL CREATE/UPDATE/DELETE instance lines to the official graph DB | +| `kartograph_search_graph_by_slug` | Find existing nodes by slug to avoid duplicates | + +### Quick workflow + +1. `kartograph_get_schema_authoring_guide` +2. `kartograph_get_schema_ontology` +3. Model entity/relationship types → `kartograph_save_schema_ontology` +4. Create instances → `kartograph_apply_graph_mutations` (one JSON object per line) +5. Verify → `kartograph_search_graph_by_slug` + +Writes persist to the platform database for the active knowledge graph. Use Read/Grep/Glob +only for repository files under the session workspace — not for API discovery. +""".strip() + + +def build_agent_system_prompt( + agent_configuration: dict[str, Any], + *, + settings: AgentRuntimeSettings | None = None, + workspace_appendix: str = "", + include_tools_manifest: bool = True, +) -> str: + """Build the full system prompt with skills, guardrails, tools, and session scope.""" + system_prompt = str(agent_configuration.get("system_prompt") or "").strip() + guardrails = agent_configuration.get("guardrails") or [] + skills = agent_configuration.get("skills") or {} + prompt_hierarchy = agent_configuration.get("prompt_hierarchy") or [] + ui_mode = str(agent_configuration.get("graph_management_ui_mode") or "").strip() + + guardrail_lines = "\n".join(f"- {item}" for item in guardrails if str(item).strip()) + + skill_sections: list[str] = [] + if prompt_hierarchy: + hierarchy_line = " → ".join(str(item) for item in prompt_hierarchy if str(item).strip()) + if hierarchy_line: + skill_sections.append(f"Prompt hierarchy: {hierarchy_line}") + if ui_mode: + skill_sections.append(f"UI mode: {ui_mode}") + + for key, value in sorted(skills.items()): + text = str(value).strip() + if text: + skill_sections.append(f"**{key}**: {text}") + + skills_block = "" + if skill_sections: + skills_block = "## Skills\n\n" + "\n\n".join(skill_sections) + + tools_block = "" + if include_tools_manifest and settings is not None and settings.workload_token.strip(): + tool_list = ", ".join(f"`{name}`" for name in KARTOGRAPH_SCHEMA_TOOL_NAMES) + tools_block = f"{_TOOLS_QUICK_REFERENCE}\n\nRegistered tools: {tool_list}." + + session_block = "" + if settings is not None: + kg_id = settings.knowledge_graph_id.strip() + tenant_id = settings.tenant_id.strip() + if kg_id or tenant_id: + lines = ["## Session scope"] + if kg_id: + lines.append(f"- Knowledge graph: `{kg_id}`") + if tenant_id: + lines.append(f"- Tenant: `{tenant_id}`") + lines.append( + "- All Kartograph schema tool writes target this knowledge graph automatically." + ) + session_block = "\n".join(lines) + + sections = [ + section + for section in ( + system_prompt, + guardrail_lines, + skills_block, + tools_block, + session_block, + workspace_appendix.strip(), + ) + if section + ] + return "\n\n".join(sections) or "You are the Graph Management Assistant." diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 12a02168b..0d4593e96 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -9,6 +9,7 @@ from collections.abc import AsyncIterator from typing import Any +from kartograph_agent_runtime.agent_prompt import build_agent_system_prompt from kartograph_agent_runtime.settings import AgentRuntimeSettings from kartograph_agent_runtime.thinking_stream import ( initial_sdk_thinking_lines, @@ -26,19 +27,14 @@ def _build_system_prompt( agent_configuration: dict[str, Any], *, + settings: AgentRuntimeSettings | None = None, workspace_appendix: str = "", ) -> str: - system_prompt = str(agent_configuration.get("system_prompt") or "").strip() - guardrails = agent_configuration.get("guardrails") or [] - skills = agent_configuration.get("skills") or {} - skill_lines = "\n".join(f"- {key}: {value}" for key, value in sorted(skills.items())) - guardrail_lines = "\n".join(f"- {item}" for item in guardrails if str(item).strip()) - sections = [ - section - for section in (system_prompt, guardrail_lines, skill_lines, workspace_appendix.strip()) - if section - ] - return "\n\n".join(sections) or "You are the Graph Management Assistant." + return build_agent_system_prompt( + agent_configuration, + settings=settings, + workspace_appendix=workspace_appendix, + ) def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: @@ -346,6 +342,7 @@ async def _stream_with_claude_sdk( system_prompt = _build_system_prompt( agent_configuration, + settings=settings, workspace_appendix=_build_workspace_prompt_appendix(settings), ) history_lines = [ diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py new file mode 100644 index 000000000..9cdffef84 --- /dev/null +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -0,0 +1,46 @@ +"""Unit tests for agent system prompt assembly.""" + +from __future__ import annotations + +from kartograph_agent_runtime.agent_prompt import build_agent_system_prompt +from kartograph_agent_runtime.settings import AgentRuntimeSettings + + +def test_build_agent_system_prompt_includes_skills_tools_and_session_scope() -> None: + prompt = build_agent_system_prompt( + { + "system_prompt": "You are the Graph Management Assistant.", + "prompt_hierarchy": ["platform_security_constraints", "mode_specific_skill_pack"], + "guardrails": ["Use Kartograph schema tools only."], + "skills": { + "schema_modeling": "Read ontology before save.", + "schema_tools": "Five kartograph_* tools available.", + }, + "graph_management_ui_mode": "initial-schema-design", + }, + settings=AgentRuntimeSettings( + KARTOGRAPH_WORKLOAD_TOKEN="token", + KARTOGRAPH_KNOWLEDGE_GRAPH_ID="kg-123", + KARTOGRAPH_TENANT_ID="tenant-456", + ), + workspace_appendix="## Session workspace\nFiles here", + ) + + assert "Graph Management Assistant" in prompt + assert "Use Kartograph schema tools only." in prompt + assert "**schema_modeling**" in prompt + assert "kartograph_get_schema_ontology" in prompt + assert "Quick workflow" in prompt + assert "kg-123" in prompt + assert "tenant-456" in prompt + assert "Files here" in prompt + + +def test_build_agent_system_prompt_omits_tools_without_workload_token() -> None: + prompt = build_agent_system_prompt( + {"system_prompt": "Base"}, + settings=AgentRuntimeSettings(KARTOGRAPH_WORKLOAD_TOKEN=""), + ) + + assert "Quick workflow" not in prompt + assert "Base" in prompt diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index 80e8fc71f..f95fb4d2f 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -70,11 +70,13 @@ def test_build_workspace_prompt_appendix_lists_materialized_repository_files( def test_build_system_prompt_includes_workspace_appendix() -> None: prompt = _build_system_prompt( {"system_prompt": "Base prompt"}, + settings=AgentRuntimeSettings(KARTOGRAPH_WORKLOAD_TOKEN="token"), workspace_appendix="## Session workspace\nFiles here", ) assert "Base prompt" in prompt assert "Files here" in prompt + assert "kartograph_get_schema_ontology" in prompt def test_extract_sdk_reply_joins_multiple_text_blocks() -> None: diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index e3f665c88..d461a0055 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -92,9 +92,16 @@ class ResolvedExtractionSkillPack: "instances required before extraction-mode transition." ), "schema_tools": ( - "Available tools: kartograph_get_schema_authoring_guide, " + "Available MCP tools (call by exact name): kartograph_get_schema_authoring_guide, " "kartograph_get_schema_ontology, kartograph_save_schema_ontology, " - "kartograph_apply_graph_mutations, kartograph_search_graph_by_slug." + "kartograph_apply_graph_mutations, kartograph_search_graph_by_slug. " + "Always start with get_schema_authoring_guide, then get_schema_ontology before saves." + ), + "tools_quickstart": ( + "Workflow: (1) kartograph_get_schema_authoring_guide → (2) kartograph_get_schema_ontology " + "→ (3) kartograph_save_schema_ontology for types → (4) kartograph_apply_graph_mutations " + "for instances → (5) kartograph_search_graph_by_slug to verify. " + "Never call /management or /graph HTTP routes." ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { diff --git a/src/api/extraction/ports/workload_schema.py b/src/api/extraction/ports/workload_schema.py index bda32bfa7..10b19a84f 100644 --- a/src/api/extraction/ports/workload_schema.py +++ b/src/api/extraction/ports/workload_schema.py @@ -24,6 +24,7 @@ async def replace_ontology( async def apply_mutation_jsonl( self, *, + tenant_id: str, knowledge_graph_id: str, jsonl: str, ) -> dict[str, object]: diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index f436f928a..61650091c 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -147,6 +147,7 @@ async def workload_apply_mutations( ) -> WorkloadMutationApplyResponse: _require_chat_scope(auth) result = await schema_service.apply_mutation_jsonl( + tenant_id=auth.tenant_id, knowledge_graph_id=auth.knowledge_graph_id, jsonl=request.jsonl, ) diff --git a/src/api/infrastructure/extraction_workload/dependencies.py b/src/api/infrastructure/extraction_workload/dependencies.py index fa768666c..4f34f15fd 100644 --- a/src/api/infrastructure/extraction_workload/dependencies.py +++ b/src/api/infrastructure/extraction_workload/dependencies.py @@ -13,6 +13,9 @@ from extraction.ports.workload_schema import IWorkloadSchemaService from infrastructure.database.connection_pool import ConnectionPool from infrastructure.dependencies import get_age_connection_pool +from infrastructure.extraction_workload.graph_mutation_writer import ( + GraphWorkloadGraphMutationWriter, +) from infrastructure.extraction_workload.graph_reader import GraphWorkloadGraphReader from infrastructure.extraction_workload.schema_service import GraphWorkloadSchemaService from infrastructure.settings import get_database_settings @@ -38,5 +41,13 @@ def get_workload_graph_reader( def get_workload_schema_service( session: Annotated[AsyncSession, Depends(get_write_session)], + pool: Annotated[ConnectionPool, Depends(get_age_connection_pool)], ) -> IWorkloadSchemaService: - return GraphWorkloadSchemaService(session=session) + return GraphWorkloadSchemaService( + session=session, + mutation_writer=GraphWorkloadGraphMutationWriter( + pool=pool, + settings=get_database_settings(), + session=session, + ), + ) diff --git a/src/api/infrastructure/extraction_workload/graph_mutation_writer.py b/src/api/infrastructure/extraction_workload/graph_mutation_writer.py new file mode 100644 index 000000000..619e0b9e9 --- /dev/null +++ b/src/api/infrastructure/extraction_workload/graph_mutation_writer.py @@ -0,0 +1,146 @@ +"""Graph-backed adapter for extraction workload instance mutations.""" + +from __future__ import annotations + +import asyncio +import json +from typing import Any + +from pydantic import ValidationError +from sqlalchemy.ext.asyncio import AsyncSession + +from graph.application.services.graph_mutation_service import GraphMutationService +from graph.domain.value_objects import MutationOperation, MutationOperationType +from graph.infrastructure.age_bulk_loading import AgeBulkLoadingStrategy +from graph.infrastructure.age_client import AgeGraphClient +from graph.infrastructure.mutation_applier import MutationApplier +from graph.infrastructure.postgres_kg_type_definition_store import ( + PostgresKnowledgeGraphTypeDefinitionStore, +) +from graph.infrastructure.type_definition_repository import InMemoryTypeDefinitionRepository +from infrastructure.database.connection import ConnectionFactory +from infrastructure.database.connection_pool import ConnectionPool +from infrastructure.settings import DatabaseSettings +from management.ports.exceptions import CanonicalSchemaMutationError + +_INSTANCE_OPS = frozenset( + { + MutationOperationType.CREATE, + MutationOperationType.UPDATE, + MutationOperationType.DELETE, + } +) + + +class GraphWorkloadGraphMutationWriter: + """Apply CREATE/UPDATE/DELETE mutations to the tenant AGE graph.""" + + def __init__( + self, + *, + pool: ConnectionPool, + settings: DatabaseSettings, + session: AsyncSession, + ) -> None: + self._pool = pool + self._settings = settings + self._session = session + self._type_store = PostgresKnowledgeGraphTypeDefinitionStore(session) + + @staticmethod + def parse_jsonl(jsonl_content: str) -> list[MutationOperation]: + operations: list[MutationOperation] = [] + for line_num, line in enumerate(jsonl_content.strip().split("\n"), start=1): + stripped = line.strip() + if not stripped: + continue + try: + operations.append(MutationOperation(**json.loads(stripped))) + except json.JSONDecodeError as exc: + raise CanonicalSchemaMutationError( + f"JSON parse error on line {line_num}: {exc}" + ) from exc + except ValidationError as exc: + raise CanonicalSchemaMutationError( + f"Validation error on line {line_num}: {exc}" + ) from exc + return operations + + @staticmethod + def split_operations( + operations: list[MutationOperation], + ) -> tuple[list[MutationOperation], list[MutationOperation]]: + define_ops: list[MutationOperation] = [] + instance_ops: list[MutationOperation] = [] + for operation in operations: + if operation.op == MutationOperationType.DEFINE: + define_ops.append(operation) + elif operation.op in _INSTANCE_OPS: + instance_ops.append(operation) + else: + raise CanonicalSchemaMutationError( + f"Unsupported mutation operation: {operation.op}" + ) + return define_ops, instance_ops + + async def apply_instance_operations( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + operations: list[MutationOperation], + ) -> dict[str, Any]: + if not operations: + return {"applied": True, "errors": [], "operations_applied": 0} + + type_repo = InMemoryTypeDefinitionRepository() + for row in await self._type_store.list_for_kg(knowledge_graph_id): + type_repo.save(self._type_store.to_type_definition(row)) + + return await asyncio.to_thread( + self._apply_sync, + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + operations=operations, + type_repo=type_repo, + ) + + def _apply_sync( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + operations: list[MutationOperation], + type_repo: InMemoryTypeDefinitionRepository, + ) -> dict[str, Any]: + graph_name = f"tenant_{tenant_id}" + factory = ConnectionFactory(self._settings, pool=self._pool) + client = AgeGraphClient( + self._settings, + connection_factory=factory, + graph_name=graph_name, + ) + client.connect() + try: + applier = MutationApplier( + client=client, + bulk_loading_strategy=AgeBulkLoadingStrategy(), + ) + service = GraphMutationService( + mutation_applier=applier, + type_definition_repository=type_repo, + ) + result = service.apply_mutations( + operations, + knowledge_graph_id=knowledge_graph_id, + ) + if not result.success: + errors = list(result.errors or ["mutation failed"]) + return {"applied": False, "errors": errors, "operations_applied": 0} + return { + "applied": True, + "errors": [], + "operations_applied": result.operations_applied, + } + finally: + client.disconnect() diff --git a/src/api/infrastructure/extraction_workload/graph_reader.py b/src/api/infrastructure/extraction_workload/graph_reader.py index 6ff902aea..52ab33a90 100644 --- a/src/api/infrastructure/extraction_workload/graph_reader.py +++ b/src/api/infrastructure/extraction_workload/graph_reader.py @@ -40,7 +40,7 @@ async def search_by_slug( try: repository = GraphExtractionReadOnlyRepository( client=client, - graph_id=knowledge_graph_id, + graph_id=graph_name, ) service = GraphQueryService(repository=repository, probe=DefaultGraphServiceProbe()) nodes = service.search_by_slug( diff --git a/src/api/infrastructure/extraction_workload/schema_service.py b/src/api/infrastructure/extraction_workload/schema_service.py index 57bff8a08..75e005577 100644 --- a/src/api/infrastructure/extraction_workload/schema_service.py +++ b/src/api/infrastructure/extraction_workload/schema_service.py @@ -2,11 +2,16 @@ from __future__ import annotations +import json + from sqlalchemy.ext.asyncio import AsyncSession from infrastructure.canonical_schema.graph_canonical_schema_repository import ( GraphCanonicalSchemaRepository, ) +from infrastructure.extraction_workload.graph_mutation_writer import ( + GraphWorkloadGraphMutationWriter, +) from management.domain.value_objects import OntologyConfig from management.ports.exceptions import CanonicalSchemaMutationError @@ -14,9 +19,15 @@ class GraphWorkloadSchemaService: """Read and write canonical schema using the Management graph-native store.""" - def __init__(self, session: AsyncSession) -> None: + def __init__( + self, + session: AsyncSession, + *, + mutation_writer: GraphWorkloadGraphMutationWriter, + ) -> None: self._session = session self._repository = GraphCanonicalSchemaRepository(session) + self._mutation_writer = mutation_writer async def get_ontology(self, *, knowledge_graph_id: str) -> OntologyConfig | None: return await self._repository.get_ontology(knowledge_graph_id) @@ -34,12 +45,47 @@ async def replace_ontology( async def apply_mutation_jsonl( self, *, + tenant_id: str, knowledge_graph_id: str, jsonl: str, ) -> dict[str, object]: try: - await self._repository.apply_mutation_log(knowledge_graph_id, jsonl) + operations = GraphWorkloadGraphMutationWriter.parse_jsonl(jsonl) + define_ops, instance_ops = GraphWorkloadGraphMutationWriter.split_operations( + operations + ) except CanonicalSchemaMutationError as exc: return {"applied": False, "errors": [str(exc)]} + + if not define_ops and not instance_ops: + return {"applied": True, "errors": [], "operations_applied": 0} + + errors: list[str] = [] + operations_applied = 0 + + if define_ops: + define_jsonl = "\n".join( + json.dumps(operation.model_dump(mode="json")) for operation in define_ops + ) + try: + await self._repository.apply_mutation_log(knowledge_graph_id, define_jsonl) + except CanonicalSchemaMutationError as exc: + errors.append(str(exc)) + + if instance_ops and not errors: + instance_result = await self._mutation_writer.apply_instance_operations( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + operations=instance_ops, + ) + if not instance_result.get("applied"): + errors.extend(str(item) for item in instance_result.get("errors", [])) + else: + operations_applied = int(instance_result.get("operations_applied", 0)) + + if errors: + await self._session.rollback() + return {"applied": False, "errors": errors} + await self._session.commit() - return {"applied": True, "errors": []} + return {"applied": True, "errors": [], "operations_applied": operations_applied} diff --git a/src/api/infrastructure/management/design_artifacts_service.py b/src/api/infrastructure/management/design_artifacts_service.py new file mode 100644 index 000000000..a92950a5c --- /dev/null +++ b/src/api/infrastructure/management/design_artifacts_service.py @@ -0,0 +1,55 @@ +"""Load design artifacts from canonical schema and tenant AGE graph.""" + +from __future__ import annotations + +from starlette.concurrency import run_in_threadpool + +from graph.infrastructure.bulk_data_reader import fetch_bulk_graph_data +from infrastructure.database.connection_pool import ConnectionPool +from management.application.design_artifacts import build_design_artifacts +from management.application.services.knowledge_graph_service import KnowledgeGraphService + + +class DesignArtifactsService: + """Compose ontology definitions with live graph instances for the Dev UI.""" + + def __init__( + self, + *, + knowledge_graph_service: KnowledgeGraphService, + connection_pool: ConnectionPool, + tenant_id: str, + ) -> None: + self._knowledge_graph_service = knowledge_graph_service + self._connection_pool = connection_pool + self._tenant_id = tenant_id + + async def get_design_artifacts( + self, + *, + user_id: str, + kg_id: str, + limit: int = 500, + ) -> dict[str, object] | None: + ontology = await self._knowledge_graph_service.get_ontology( + user_id=user_id, + kg_id=kg_id, + ) + if ontology is None: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + return None + + graph_name = f"tenant_{self._tenant_id}" + graph_data = await run_in_threadpool( + fetch_bulk_graph_data, + self._connection_pool, + graph_name, + ) + bounded_limit = max(1, min(limit, 3000)) + return build_design_artifacts( + knowledge_graph_id=kg_id, + ontology=ontology, + graph_data=graph_data, + limit=bounded_limit, + ) diff --git a/src/api/management/application/design_artifacts.py b/src/api/management/application/design_artifacts.py new file mode 100644 index 000000000..f46787069 --- /dev/null +++ b/src/api/management/application/design_artifacts.py @@ -0,0 +1,223 @@ +"""Pure builders for knowledge graph design artifact views.""" + +from __future__ import annotations + +from collections import defaultdict +from typing import Any + +from management.domain.value_objects import OntologyConfig + +_SYSTEM_NODE_PROPERTIES = frozenset( + { + "id", + "slug", + "data_source_id", + "source_path", + "knowledge_graph_id", + "graph_id", + "name", + } +) + + +def _instance_properties(raw: dict[str, Any]) -> dict[str, Any]: + return { + key: value + for key, value in raw.items() + if key not in _SYSTEM_NODE_PROPERTIES and not key.startswith("_") + } + + +def build_design_artifacts( + *, + knowledge_graph_id: str, + ontology: OntologyConfig | None, + graph_data: dict[str, Any], + limit: int, +) -> dict[str, Any]: + """Merge canonical ontology with live AGE graph instances for the Dev UI.""" + nodes = [ + node + for node in graph_data.get("nodes", []) + if node.get("knowledge_graph_id") == knowledge_graph_id and not node.get("_redacted") + ] + edges = [ + edge + for edge in graph_data.get("edges", []) + if edge.get("knowledge_graph_id") == knowledge_graph_id and not edge.get("_redacted") + ] + + node_by_age_id = {str(node.get("id")): node for node in nodes if node.get("id")} + + instances_by_type: dict[str, list[dict[str, Any]]] = defaultdict(list) + sorted_nodes = sorted( + nodes, + key=lambda node: ( + str(node.get("type") or ""), + str(node.get("slug") or node.get("domainId") or node.get("id") or ""), + ), + ) + truncated_nodes = sorted_nodes[:limit] + + for node in truncated_nodes: + entity_type = str(node.get("type") or "unknown") + slug = str(node.get("slug") or node.get("domainId") or node.get("id") or "") + instances_by_type[entity_type].append( + { + "slug": slug, + "properties": _instance_properties(node), + } + ) + + entities: dict[str, dict[str, Any]] = {} + if ontology is not None: + for node_type in ontology.node_types: + required = list(node_type.required_properties) + optional = list(node_type.optional_properties) + property_definitions = { + prop: prop.replace("_", " ").strip() or prop + for prop in (*required, *optional) + } + type_instances = instances_by_type.get(node_type.label, []) + entities[node_type.label] = { + "type": node_type.label, + "description": node_type.description, + "required_properties": required, + "optional_properties": optional, + "property_definitions": property_definitions, + "prepopulated_instances": node_type.prepopulated, + "instance_count": len(instances_by_type.get(node_type.label, [])), + "instances": type_instances, + } + + for entity_type, type_instances in instances_by_type.items(): + if entity_type in entities: + continue + entities[entity_type] = { + "type": entity_type, + "description": "", + "required_properties": [], + "optional_properties": [], + "property_definitions": {}, + "prepopulated_instances": False, + "instance_count": len(type_instances), + "instances": type_instances, + } + + relationship_instances: dict[str, list[dict[str, Any]]] = defaultdict(list) + sorted_edges = sorted( + edges, + key=lambda edge: ( + str(edge.get("type") or ""), + str(edge.get("source") or ""), + str(edge.get("target") or ""), + ), + ) + truncated_edges = sorted_edges[:limit] + + for edge in truncated_edges: + source_node = node_by_age_id.get(str(edge.get("source"))) + target_node = node_by_age_id.get(str(edge.get("target"))) + if source_node is None or target_node is None: + continue + source_type = str(source_node.get("type") or "unknown") + target_type = str(target_node.get("type") or "unknown") + relationship_type = str(edge.get("type") or "unknown") + composite_key = f"{source_type}|{relationship_type}|{target_type}" + relationship_instances[composite_key].append( + { + "source_slug": str( + source_node.get("slug") + or source_node.get("domainId") + or source_node.get("id") + or "" + ), + "target_slug": str( + target_node.get("slug") + or target_node.get("domainId") + or target_node.get("id") + or "" + ), + "properties": _instance_properties(edge), + } + ) + + relationships: list[dict[str, Any]] = [] + if ontology is not None: + for edge_type in ontology.edge_types: + source_label = edge_type.source_labels[0] if edge_type.source_labels else "" + target_label = edge_type.target_labels[0] if edge_type.target_labels else "" + composite_key = f"{source_label}|{edge_type.label}|{target_label}" + type_instances = relationship_instances.get(composite_key, []) + if not type_instances: + for key, instances in relationship_instances.items(): + parts = key.split("|") + if len(parts) == 3 and parts[1] == edge_type.label: + composite_key = key + type_instances = instances + break + relationships.append( + { + "key": composite_key, + "source_entity_type": source_label, + "target_entity_type": target_label, + "relationship_type": edge_type.label, + "reverse_relationship_type": None, + "reverse_relationship_description": None, + "prepopulated_instances": False, + "description": edge_type.description or None, + "instance_count": len(type_instances), + "instances": type_instances, + "required_parameters": list(edge_type.properties), + "optional_parameters": [], + "parameter_definitions": { + prop: prop.replace("_", " ").strip() or prop + for prop in edge_type.properties + }, + } + ) + + seen_relationship_keys = {row["key"] for row in relationships} + for composite_key, type_instances in relationship_instances.items(): + if composite_key in seen_relationship_keys: + continue + parts = composite_key.split("|") + if len(parts) != 3: + continue + relationships.append( + { + "key": composite_key, + "source_entity_type": parts[0], + "target_entity_type": parts[2], + "relationship_type": parts[1], + "reverse_relationship_type": None, + "reverse_relationship_description": None, + "prepopulated_instances": False, + "description": None, + "instance_count": len(type_instances), + "instances": type_instances, + "required_parameters": [], + "optional_parameters": [], + "parameter_definitions": {}, + } + ) + + return { + "found": ontology is not None or bool(entities) or bool(relationships), + "knowledge_graph_id": knowledge_graph_id, + "entities": entities, + "relationships": relationships, + "counts": { + "entity_types": len(entities), + "relationship_types": len(relationships), + "entity_instances": len(nodes), + "relationship_instances": len(edges), + }, + "limits": { + "requested": limit, + "entity_instances_returned": len(truncated_nodes), + "relationship_instances_returned": len(truncated_edges), + "entity_instances_truncated": len(nodes) > len(truncated_nodes), + "relationship_instances_truncated": len(edges) > len(truncated_edges), + }, + } diff --git a/src/api/management/dependencies/design_artifacts.py b/src/api/management/dependencies/design_artifacts.py new file mode 100644 index 000000000..3f0fb4e3e --- /dev/null +++ b/src/api/management/dependencies/design_artifacts.py @@ -0,0 +1,27 @@ +"""Dependencies for design artifact endpoints.""" + +from __future__ import annotations + +from typing import Annotated + +from fastapi import Depends + +from iam.application.value_objects import CurrentUser +from iam.dependencies.user import get_current_user +from infrastructure.database.connection_pool import ConnectionPool +from infrastructure.dependencies import get_age_connection_pool +from infrastructure.management.design_artifacts_service import DesignArtifactsService +from management.dependencies.knowledge_graph import get_knowledge_graph_service +from management.application.services.knowledge_graph_service import KnowledgeGraphService + + +def get_design_artifacts_service( + kg_service: Annotated[KnowledgeGraphService, Depends(get_knowledge_graph_service)], + pool: Annotated[ConnectionPool, Depends(get_age_connection_pool)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> DesignArtifactsService: + return DesignArtifactsService( + knowledge_graph_service=kg_service, + connection_pool=pool, + tenant_id=current_user.tenant_id.value, + ) diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index f79dc2c76..5126506c8 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -383,3 +383,73 @@ def from_domain(cls, config: OntologyConfig) -> OntologyConfigResponse: ], approved_at=config.approved_at, ) + + +class DesignArtifactInstanceModel(BaseModel): + """One entity or relationship instance surfaced in design artifacts.""" + + slug: str | None = None + source_slug: str | None = None + target_slug: str | None = None + properties: dict[str, object] = Field(default_factory=dict) + + +class DesignArtifactEntityTypeModel(BaseModel): + """Entity type definition merged with live instance counts.""" + + type: str + description: str = "" + required_properties: list[str] = Field(default_factory=list) + optional_properties: list[str] = Field(default_factory=list) + property_definitions: dict[str, str] = Field(default_factory=dict) + prepopulated_instances: bool | str = False + instance_count: int = 0 + instances: list[DesignArtifactInstanceModel] = Field(default_factory=list) + + +class DesignArtifactRelationshipTypeModel(BaseModel): + """Relationship type definition merged with live instance counts.""" + + key: str + source_entity_type: str + target_entity_type: str + relationship_type: str + reverse_relationship_type: str | None = None + reverse_relationship_description: str | None = None + prepopulated_instances: bool | str = False + description: str | None = None + instance_count: int = 0 + instances: list[DesignArtifactInstanceModel] = Field(default_factory=list) + required_parameters: list[str] = Field(default_factory=list) + optional_parameters: list[str] = Field(default_factory=list) + parameter_definitions: dict[str, str] = Field(default_factory=dict) + + +class DesignArtifactsCountsModel(BaseModel): + """Aggregate counts for design artifact navigation.""" + + entity_types: int = 0 + relationship_types: int = 0 + entity_instances: int = 0 + relationship_instances: int = 0 + + +class DesignArtifactsLimitsModel(BaseModel): + """Truncation metadata for instance payloads.""" + + requested: int + entity_instances_returned: int + relationship_instances_returned: int + entity_instances_truncated: bool + relationship_instances_truncated: bool + + +class DesignArtifactsResponse(BaseModel): + """Canonical schema plus live graph instances for Graph Management UI.""" + + found: bool + knowledge_graph_id: str + entities: dict[str, DesignArtifactEntityTypeModel] + relationships: list[DesignArtifactRelationshipTypeModel] + counts: DesignArtifactsCountsModel + limits: DesignArtifactsLimitsModel diff --git a/src/api/management/presentation/knowledge_graphs/routes.py b/src/api/management/presentation/knowledge_graphs/routes.py index ba24b7e0e..b384238c3 100644 --- a/src/api/management/presentation/knowledge_graphs/routes.py +++ b/src/api/management/presentation/knowledge_graphs/routes.py @@ -28,8 +28,11 @@ MaintenanceScheduleUpsertRequest, OntologyConfigRequest, OntologyConfigResponse, + DesignArtifactsResponse, UpdateKnowledgeGraphRequest, ) +from infrastructure.management.design_artifacts_service import DesignArtifactsService +from management.dependencies.design_artifacts import get_design_artifacts_service from shared_kernel.authorization.types import Permission router = APIRouter(tags=["knowledge-graphs"]) @@ -593,6 +596,37 @@ async def update_knowledge_graph( ) +@router.get( + "/knowledge-graphs/{kg_id}/design-artifacts", + response_model=DesignArtifactsResponse, + summary="Get design artifacts for a knowledge graph", + description=""" +Return canonical schema definitions merged with live graph instances from the tenant AGE database. + +Used by the Graph Management workspace to render k-extract-style design artifact panels. +Requires `view` permission on the knowledge graph. +""", +) +async def get_knowledge_graph_design_artifacts( + kg_id: str, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[DesignArtifactsService, Depends(get_design_artifacts_service)], + limit: Annotated[int, Query(ge=1, le=3000)] = 500, +) -> DesignArtifactsResponse: + """Get merged ontology and graph instance artifacts for one knowledge graph.""" + payload = await service.get_design_artifacts( + user_id=current_user.user_id.value, + kg_id=kg_id, + limit=limit, + ) + if payload is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Knowledge graph {kg_id} not found or not accessible", + ) + return DesignArtifactsResponse.model_validate(payload) + + @router.get( "/knowledge-graphs/{kg_id}/ontology", response_model=OntologyConfigResponse, diff --git a/src/api/tests/integration/extraction/test_workload_graph_mutations.py b/src/api/tests/integration/extraction/test_workload_graph_mutations.py new file mode 100644 index 000000000..cc8a690e7 --- /dev/null +++ b/src/api/tests/integration/extraction/test_workload_graph_mutations.py @@ -0,0 +1,120 @@ +"""Integration tests for workload graph instance mutations.""" + +from __future__ import annotations + +import json + +import pytest +from sqlalchemy import text + +from graph.domain.value_objects import EntityType, MutationOperationType +from graph.infrastructure.tenant_graph_handler import AGEGraphProvisioner +from infrastructure.database.connection import ConnectionFactory +from infrastructure.extraction_workload.graph_mutation_writer import ( + GraphWorkloadGraphMutationWriter, +) +from infrastructure.extraction_workload.graph_reader import GraphWorkloadGraphReader +from infrastructure.extraction_workload.schema_service import GraphWorkloadSchemaService +from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig + +pytestmark = pytest.mark.integration + + +async def _table_exists(async_session, table_name: str) -> bool: + result = await async_session.execute( + text( + """ + SELECT 1 + FROM information_schema.tables + WHERE table_name = :table_name + """ + ), + {"table_name": table_name}, + ) + return result.scalar_one_or_none() is not None + + +@pytest.mark.asyncio +async def test_workload_apply_mutation_jsonl_writes_instance_to_age_graph( + async_session, + clean_management_data: None, + test_tenant: str, + integration_connection_pool, + integration_db_settings, +) -> None: + """CREATE mutations via workload schema service persist nodes in tenant AGE graph.""" + if not await _table_exists(async_session, "knowledge_graph_type_definitions"): + pytest.skip("knowledge_graph_type_definitions table is missing") + + await async_session.rollback() + + graph_name = f"tenant_{test_tenant}" + factory = ConnectionFactory(integration_db_settings, pool=integration_connection_pool) + AGEGraphProvisioner(connection_factory=factory).ensure_graph_exists(graph_name) + + knowledge_graph_id = "kg-workload-mutation-001" + mutation_writer = GraphWorkloadGraphMutationWriter( + pool=integration_connection_pool, + settings=integration_db_settings, + session=async_session, + ) + schema_service = GraphWorkloadSchemaService( + session=async_session, + mutation_writer=mutation_writer, + ) + reader = GraphWorkloadGraphReader( + pool=integration_connection_pool, + settings=integration_db_settings, + ) + + ontology = OntologyConfig( + node_types=( + NodeTypeDefinition( + label="service", + description="Deployable service", + required_properties=("name", "slug"), + ), + ), + edge_types=( + EdgeTypeDefinition( + label="depends_on", + source_labels=("service",), + target_labels=("service",), + ), + ), + ) + await schema_service.replace_ontology( + knowledge_graph_id=knowledge_graph_id, + config=ontology, + ) + + create_line = { + "op": MutationOperationType.CREATE.value, + "type": EntityType.NODE.value, + "id": "service:0123456789abcdef", + "label": "service", + "set_properties": { + "name": "api-gateway", + "slug": "api-gateway", + "data_source_id": "schema-bootstrap", + "source_path": "graph-management-assistant", + }, + } + result = await schema_service.apply_mutation_jsonl( + tenant_id=test_tenant, + knowledge_graph_id=knowledge_graph_id, + jsonl=json.dumps(create_line), + ) + + assert result["applied"] is True, result.get("errors") + assert result.get("operations_applied") == 1 + + nodes = await reader.search_by_slug( + tenant_id=test_tenant, + knowledge_graph_id=knowledge_graph_id, + slug="api-gateway", + entity_type="service", + ) + assert len(nodes) == 1 + assert nodes[0].slug == "api-gateway" + assert nodes[0].properties.get("name") == "api-gateway" diff --git a/src/api/tests/unit/extraction/presentation/test_workload_routes.py b/src/api/tests/unit/extraction/presentation/test_workload_routes.py index 0a811f31b..3a97182e8 100644 --- a/src/api/tests/unit/extraction/presentation/test_workload_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_workload_routes.py @@ -33,6 +33,7 @@ async def replace_ontology( async def apply_mutation_jsonl( self, *, + tenant_id: str, knowledge_graph_id: str, jsonl: str, ) -> dict[str, object]: diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_graph_mutation_writer.py b/src/api/tests/unit/infrastructure/extraction_workload/test_graph_mutation_writer.py new file mode 100644 index 000000000..bbf169442 --- /dev/null +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_graph_mutation_writer.py @@ -0,0 +1,47 @@ +"""Unit tests for workload graph mutation writer.""" + +from __future__ import annotations + +import pytest + +from graph.domain.value_objects import EntityType, MutationOperation, MutationOperationType +from infrastructure.extraction_workload.graph_mutation_writer import ( + GraphWorkloadGraphMutationWriter, +) +from management.ports.exceptions import CanonicalSchemaMutationError + + +def test_split_operations_separates_define_and_instance_ops() -> None: + operations = [ + MutationOperation( + op=MutationOperationType.DEFINE, + type=EntityType.NODE, + label="service", + description="Service", + required_properties=["name"], + ), + MutationOperation( + op=MutationOperationType.CREATE, + type=EntityType.NODE, + id="service:0123456789abcdef", + label="service", + set_properties={ + "name": "api", + "slug": "api", + "data_source_id": "bootstrap", + "source_path": "assistant", + }, + ), + ] + + define_ops, instance_ops = GraphWorkloadGraphMutationWriter.split_operations(operations) + + assert len(define_ops) == 1 + assert define_ops[0].op == MutationOperationType.DEFINE + assert len(instance_ops) == 1 + assert instance_ops[0].op == MutationOperationType.CREATE + + +def test_parse_jsonl_rejects_invalid_json() -> None: + with pytest.raises(CanonicalSchemaMutationError, match="JSON parse error"): + GraphWorkloadGraphMutationWriter.parse_jsonl("{not-json") diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_schema_service.py b/src/api/tests/unit/infrastructure/extraction_workload/test_schema_service.py new file mode 100644 index 000000000..acf6b0593 --- /dev/null +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_schema_service.py @@ -0,0 +1,68 @@ +"""Unit tests for workload schema service mutation routing.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from graph.domain.value_objects import MutationOperationType +from infrastructure.extraction_workload.schema_service import GraphWorkloadSchemaService + + +@pytest.mark.asyncio +async def test_apply_mutation_jsonl_routes_instance_ops_to_graph_writer() -> None: + session = MagicMock() + session.commit = AsyncMock() + session.rollback = AsyncMock() + mutation_writer = MagicMock() + mutation_writer.apply_instance_operations = AsyncMock( + return_value={"applied": True, "errors": [], "operations_applied": 1} + ) + service = GraphWorkloadSchemaService(session=session, mutation_writer=mutation_writer) + service._repository = MagicMock() + service._repository.apply_mutation_log = AsyncMock() + + jsonl = ( + '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service",' + '"set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap","source_path":"assistant"}}' + ) + result = await service.apply_mutation_jsonl( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + jsonl=jsonl, + ) + + assert result["applied"] is True + service._repository.apply_mutation_log.assert_not_called() + mutation_writer.apply_instance_operations.assert_awaited_once() + await_args = mutation_writer.apply_instance_operations.await_args + assert await_args.kwargs["tenant_id"] == "tenant-1" + assert await_args.kwargs["knowledge_graph_id"] == "kg-1" + assert await_args.kwargs["operations"][0].op == MutationOperationType.CREATE + + +@pytest.mark.asyncio +async def test_apply_mutation_jsonl_routes_define_ops_to_canonical_repo() -> None: + session = MagicMock() + session.commit = AsyncMock() + session.rollback = AsyncMock() + mutation_writer = MagicMock() + mutation_writer.apply_instance_operations = AsyncMock() + service = GraphWorkloadSchemaService(session=session, mutation_writer=mutation_writer) + service._repository = MagicMock() + service._repository.apply_mutation_log = AsyncMock() + + jsonl = ( + '{"op":"DEFINE","type":"node","label":"service","description":"Service",' + '"required_properties":["name"]}' + ) + result = await service.apply_mutation_jsonl( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + jsonl=jsonl, + ) + + assert result["applied"] is True + service._repository.apply_mutation_log.assert_awaited_once() + mutation_writer.apply_instance_operations.assert_not_called() diff --git a/src/api/tests/unit/management/application/test_design_artifacts.py b/src/api/tests/unit/management/application/test_design_artifacts.py new file mode 100644 index 000000000..64ae79634 --- /dev/null +++ b/src/api/tests/unit/management/application/test_design_artifacts.py @@ -0,0 +1,81 @@ +"""Unit tests for design artifact builders.""" + +from __future__ import annotations + +from management.application.design_artifacts import build_design_artifacts +from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig + + +def test_build_design_artifacts_merges_ontology_with_graph_instances() -> None: + ontology = OntologyConfig( + node_types=( + NodeTypeDefinition( + label="service", + description="Deployable service", + required_properties=("name", "slug"), + prepopulated=True, + ), + ), + edge_types=( + EdgeTypeDefinition( + label="depends_on", + source_labels=("service",), + target_labels=("service",), + ), + ), + ) + graph_data = { + "nodes": [ + { + "id": "age-1", + "type": "service", + "slug": "api-gateway", + "knowledge_graph_id": "kg-1", + "name": "api-gateway", + "data_source_id": "bootstrap", + "source_path": "assistant", + } + ], + "edges": [ + { + "id": "edge-1", + "type": "depends_on", + "source": "age-1", + "target": "age-1", + "knowledge_graph_id": "kg-1", + "data_source_id": "bootstrap", + "source_path": "assistant", + } + ], + } + + payload = build_design_artifacts( + knowledge_graph_id="kg-1", + ontology=ontology, + graph_data=graph_data, + limit=500, + ) + + assert payload["found"] is True + assert payload["entities"]["service"]["instance_count"] == 1 + assert payload["entities"]["service"]["instances"][0]["slug"] == "api-gateway" + assert payload["relationships"][0]["instance_count"] == 1 + assert payload["relationships"][0]["instances"][0]["source_slug"] == "api-gateway" + + +def test_build_design_artifacts_filters_other_knowledge_graphs() -> None: + payload = build_design_artifacts( + knowledge_graph_id="kg-1", + ontology=None, + graph_data={ + "nodes": [ + {"id": "1", "type": "service", "slug": "a", "knowledge_graph_id": "kg-2"}, + {"id": "2", "type": "service", "slug": "b", "knowledge_graph_id": "kg-1"}, + ], + "edges": [], + }, + limit=500, + ) + + assert payload["counts"]["entity_instances"] == 1 + assert payload["entities"]["service"]["instance_count"] == 1 diff --git a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue new file mode 100644 index 000000000..5496a5a5a --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue @@ -0,0 +1,301 @@ +<script setup lang="ts"> +import { computed, ref, watch } from 'vue' +import { toast } from 'vue-sonner' +import { ChevronDown, Loader2, RefreshCw, Search } from 'lucide-vue-next' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { + type DesignArtifactEntityType, + type DesignArtifactsResponse, + pageSlice, + prepopulationBadgeClass, + prepopulationCardClass, + prepopulationLabel, +} from '@/utils/kgDesignArtifacts' + +const props = withDefaults( + defineProps<{ + kgId: string + reloadNonce?: number + embedded?: boolean + }>(), + { reloadNonce: 0, embedded: true }, +) + +const { apiFetch } = useApiClient() + +const loading = ref(true) +const data = ref<DesignArtifactsResponse | null>(null) +const filterText = ref('') +const instancePage = ref<Record<string, number>>({}) + +async function fetchEntities() { + if (!props.kgId) { + data.value = null + loading.value = false + return + } + loading.value = true + try { + data.value = await apiFetch<DesignArtifactsResponse>( + `/management/knowledge-graphs/${props.kgId}/design-artifacts`, + { query: { limit: 500 } }, + ) + instancePage.value = {} + } catch (err: unknown) { + toast.error('Failed to load entity design artifacts', { + description: err instanceof Error ? err.message : 'Request failed', + }) + data.value = null + } finally { + loading.value = false + } +} + +const entityRows = computed((): DesignArtifactEntityType[] => { + if (!data.value?.entities) return [] + return Object.entries(data.value.entities).map(([type, def]) => ({ + type, + ...def, + })) +}) + +const filteredRows = computed(() => { + const query = filterText.value.trim().toLowerCase() + if (!query) return entityRows.value + return entityRows.value.filter((row) => row.type.toLowerCase().includes(query)) +}) + +function setInstancePage(typeKey: string, page: number) { + instancePage.value = { ...instancePage.value, [typeKey]: page } +} + +watch(filterText, () => { + instancePage.value = {} +}) + +watch( + () => [props.kgId, props.reloadNonce] as const, + () => { + void fetchEntities() + }, + { immediate: true }, +) + +defineExpose({ refresh: fetchEntities }) +</script> + +<template> + <div :class="embedded ? 'space-y-4' : 'mx-auto max-w-4xl space-y-6'"> + <div v-if="embedded" class="flex flex-wrap items-start justify-between gap-2 border-b pb-3"> + <div> + <h2 class="text-lg font-semibold tracking-tight">Entity ontology</h2> + <p class="text-xs text-muted-foreground"> + Canonical schema and live instances from the platform database for this knowledge graph. + </p> + </div> + <div class="flex items-center gap-2"> + <Badge v-if="data?.counts.entity_types" variant="secondary" class="shrink-0"> + {{ data.counts.entity_types }} type(s) + </Badge> + <Button variant="outline" size="sm" :disabled="loading" @click="fetchEntities"> + <Loader2 v-if="loading" class="mr-1.5 size-3.5 animate-spin" /> + <RefreshCw v-else class="mr-1.5 size-3.5" /> + Refresh + </Button> + </div> + </div> + + <div v-if="loading" class="flex items-center justify-center py-16"> + <Loader2 class="size-8 animate-spin text-muted-foreground" /> + </div> + + <template v-else-if="data"> + <Card v-if="!data.found || entityRows.length === 0"> + <CardHeader> + <CardTitle class="text-base"> + <span v-if="!data.found">No ontology saved yet</span> + <span v-else>No entity types yet</span> + </CardTitle> + </CardHeader> + <CardContent class="space-y-3 text-sm text-muted-foreground"> + <p class="text-foreground"> + Use the Graph Management Assistant above to design entity types and instances. When changes + are saved to the database, click Refresh to review them here. + </p> + </CardContent> + </Card> + + <template v-else> + <Card> + <CardHeader class="pb-3"> + <CardTitle class="text-base">Prepopulation strategy color guide</CardTitle> + <CardDescription> + Each entity type is color-coded by its prepopulation requirement. + </CardDescription> + </CardHeader> + <CardContent class="space-y-3 text-sm"> + <div class="flex flex-wrap gap-2"> + <Badge variant="outline" class="border-cyan-500/40 bg-cyan-500/10 text-cyan-700 dark:text-cyan-300"> + true + </Badge> + <Badge variant="outline" class="border-emerald-500/40 bg-emerald-500/10 text-emerald-700 dark:text-emerald-300"> + false + </Badge> + </div> + </CardContent> + </Card> + + <Card> + <CardHeader class="pb-3"> + <CardTitle class="text-base">Filter types</CardTitle> + </CardHeader> + <CardContent> + <div class="relative"> + <Search class="absolute left-3 top-1/2 size-4 -translate-y-1/2 text-muted-foreground" /> + <input + v-model="filterText" + type="search" + placeholder="Search by entity type name…" + class="flex h-10 w-full rounded-md border border-input bg-background pl-9 pr-3 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" + /> + </div> + </CardContent> + </Card> + + <div class="space-y-3"> + <p v-if="filteredRows.length === 0" class="py-4 text-center text-sm text-muted-foreground"> + No entity types match your search. + </p> + + <Card + v-for="row in filteredRows" + :key="row.type" + :class="['overflow-hidden', prepopulationCardClass(row.prepopulated_instances)]" + > + <details class="group"> + <summary class="flex cursor-pointer list-none items-start gap-3 p-4 [&::-webkit-details-marker]:hidden"> + <ChevronDown + class="mt-0.5 size-4 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" + /> + <div class="flex min-w-0 flex-1 flex-wrap items-center gap-2"> + <span class="text-base font-semibold">{{ row.type }}</span> + <Badge variant="outline" :class="prepopulationBadgeClass(row.prepopulated_instances)"> + {{ prepopulationLabel(row.prepopulated_instances) }} + </Badge> + <Badge variant="secondary"> + {{ row.instance_count }} instance{{ row.instance_count === 1 ? '' : 's' }} + </Badge> + </div> + </summary> + <div class="space-y-4 border-t px-4 pb-4 pt-0"> + <p v-if="row.description" class="pt-3 text-sm text-muted-foreground"> + {{ row.description }} + </p> + <div v-else class="pt-2 text-sm italic text-muted-foreground">No description</div> + + <div class="space-y-2"> + <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground">Properties</p> + <div + v-if="row.property_definitions && Object.keys(row.property_definitions).length > 0" + class="divide-y rounded-md border" + > + <div + v-for="(label, key) in row.property_definitions" + :key="key" + class="flex flex-wrap gap-x-2 gap-y-1 px-3 py-2 text-sm" + > + <code class="rounded bg-muted px-1.5 py-0.5 font-mono text-xs">{{ key }}</code> + <span class="text-muted-foreground">{{ label }}</span> + <Badge + v-if="row.required_properties?.includes(String(key))" + variant="outline" + class="h-5 text-[10px]" + > + required + </Badge> + <Badge + v-else-if="row.optional_properties?.includes(String(key))" + variant="outline" + class="h-5 text-[10px] opacity-70" + > + optional + </Badge> + </div> + </div> + <p v-else class="text-sm text-muted-foreground">No property definitions</p> + </div> + + <details v-if="(row.instances?.length ?? 0) > 0" class="group/inst rounded-lg border"> + <summary + class="flex cursor-pointer list-none items-center gap-2 px-3 py-2 text-sm font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" + > + <ChevronDown + class="size-4 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" + /> + Instances + </summary> + <div class="space-y-3 border-t p-3"> + <ul class="space-y-2 text-sm"> + <li + v-for="(inst, idx) in pageSlice(instancePage, row.type, row.instances || []).items" + :key="inst.slug ?? idx" + class="rounded-md bg-muted/40 px-3 py-2" + > + <div class="mb-1 font-mono text-xs text-muted-foreground"> + {{ inst.slug ?? '—' }} + </div> + <pre class="whitespace-pre-wrap break-all text-xs">{{ + JSON.stringify(inst.properties ?? {}, null, 2) + }}</pre> + </li> + </ul> + <div + v-if="pageSlice(instancePage, row.type, row.instances || []).total > 20" + class="flex flex-wrap items-center gap-2 pt-1" + @click.stop + > + <Button + variant="outline" + size="sm" + :disabled="pageSlice(instancePage, row.type, row.instances || []).page <= 0" + @click.stop.prevent="setInstancePage(row.type, pageSlice(instancePage, row.type, row.instances || []).page - 1)" + > + Previous + </Button> + <span class="text-xs text-muted-foreground"> + Page {{ pageSlice(instancePage, row.type, row.instances || []).page + 1 }} / + {{ pageSlice(instancePage, row.type, row.instances || []).totalPages }} + ({{ pageSlice(instancePage, row.type, row.instances || []).total }} total) + </span> + <Button + variant="outline" + size="sm" + :disabled=" + pageSlice(instancePage, row.type, row.instances || []).page + >= pageSlice(instancePage, row.type, row.instances || []).totalPages - 1 + " + @click.stop.prevent="setInstancePage(row.type, pageSlice(instancePage, row.type, row.instances || []).page + 1)" + > + Next + </Button> + </div> + </div> + </details> + </div> + </details> + </Card> + </div> + + <p + v-if="data.limits.entity_instances_truncated" + class="text-xs text-muted-foreground" + > + Showing the first {{ data.limits.entity_instances_returned }} of + {{ data.counts.entity_instances }} entity instances. Increase the API limit to inspect more. + </p> + </template> + </template> + </div> +</template> diff --git a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue new file mode 100644 index 000000000..3d6c824e7 --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue @@ -0,0 +1,243 @@ +<script setup lang="ts"> +import { computed, ref, watch } from 'vue' +import { toast } from 'vue-sonner' +import { ChevronDown, Loader2, RefreshCw, Search } from 'lucide-vue-next' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { + type DesignArtifactRelationshipType, + type DesignArtifactsResponse, + pageSlice, + prepopulationBadgeClass, + prepopulationCardClass, + prepopulationLabel, +} from '@/utils/kgDesignArtifacts' + +const props = withDefaults( + defineProps<{ + kgId: string + reloadNonce?: number + embedded?: boolean + }>(), + { reloadNonce: 0, embedded: true }, +) + +const { apiFetch } = useApiClient() + +const loading = ref(true) +const data = ref<DesignArtifactsResponse | null>(null) +const filterText = ref('') +const instancePage = ref<Record<string, number>>({}) + +async function fetchRelationships() { + if (!props.kgId) { + data.value = null + loading.value = false + return + } + loading.value = true + try { + data.value = await apiFetch<DesignArtifactsResponse>( + `/management/knowledge-graphs/${props.kgId}/design-artifacts`, + { query: { limit: 500 } }, + ) + instancePage.value = {} + } catch (err: unknown) { + toast.error('Failed to load relationship design artifacts', { + description: err instanceof Error ? err.message : 'Request failed', + }) + data.value = null + } finally { + loading.value = false + } +} + +const relationshipRows = computed(() => data.value?.relationships ?? []) + +const filteredRows = computed(() => { + const query = filterText.value.trim().toLowerCase() + if (!query) return relationshipRows.value + return relationshipRows.value.filter((rel) => { + return ( + rel.relationship_type.toLowerCase().includes(query) + || (rel.reverse_relationship_type ?? '').toLowerCase().includes(query) + || rel.source_entity_type.toLowerCase().includes(query) + || rel.target_entity_type.toLowerCase().includes(query) + || rel.key.toLowerCase().includes(query) + ) + }) +}) + +function setInstancePage(key: string, page: number) { + instancePage.value = { ...instancePage.value, [key]: page } +} + +watch(filterText, () => { + instancePage.value = {} +}) + +watch( + () => [props.kgId, props.reloadNonce] as const, + () => { + void fetchRelationships() + }, + { immediate: true }, +) + +defineExpose({ refresh: fetchRelationships }) +</script> + +<template> + <div :class="embedded ? 'space-y-4' : 'mx-auto max-w-4xl space-y-6'"> + <div v-if="embedded" class="flex flex-wrap items-start justify-between gap-2 border-b pb-3"> + <div> + <h2 class="text-lg font-semibold tracking-tight">Relationship ontology</h2> + <p class="text-xs text-muted-foreground"> + Canonical relationship types and live edge instances from the platform database. + </p> + </div> + <div class="flex items-center gap-2"> + <Badge v-if="data?.counts.relationship_types" variant="secondary" class="shrink-0"> + {{ data.counts.relationship_types }} type(s) + </Badge> + <Button variant="outline" size="sm" :disabled="loading" @click="fetchRelationships"> + <Loader2 v-if="loading" class="mr-1.5 size-3.5 animate-spin" /> + <RefreshCw v-else class="mr-1.5 size-3.5" /> + Refresh + </Button> + </div> + </div> + + <div v-if="loading" class="flex items-center justify-center py-16"> + <Loader2 class="size-8 animate-spin text-muted-foreground" /> + </div> + + <template v-else-if="data"> + <Card v-if="relationshipRows.length === 0"> + <CardHeader> + <CardTitle class="text-base">No relationship types yet</CardTitle> + </CardHeader> + <CardContent class="space-y-3 text-sm text-muted-foreground"> + <p class="text-foreground"> + Use the Graph Management Assistant to define relationship types and instances, then click Refresh. + </p> + </CardContent> + </Card> + + <template v-else> + <Card> + <CardHeader class="pb-3"> + <CardTitle class="text-base">Filter types</CardTitle> + </CardHeader> + <CardContent> + <div class="relative"> + <Search class="absolute left-3 top-1/2 size-4 -translate-y-1/2 text-muted-foreground" /> + <input + v-model="filterText" + type="search" + placeholder="Search by relationship, source, or target type…" + class="flex h-10 w-full rounded-md border border-input bg-background pl-9 pr-3 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" + /> + </div> + </CardContent> + </Card> + + <div class="space-y-3"> + <p v-if="filteredRows.length === 0" class="py-4 text-center text-sm text-muted-foreground"> + No relationship types match your search. + </p> + + <Card + v-for="rel in filteredRows" + :key="rel.key" + :class="['overflow-hidden', prepopulationCardClass(rel.prepopulated_instances)]" + > + <details class="group"> + <summary class="flex cursor-pointer list-none items-start gap-3 p-4 [&::-webkit-details-marker]:hidden"> + <ChevronDown + class="mt-0.5 size-4 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" + /> + <div class="min-w-0 flex-1 space-y-1"> + <div class="flex min-w-0 flex-wrap items-center gap-2"> + <span class="text-sm font-semibold text-foreground">{{ rel.source_entity_type }}</span> + <Badge variant="secondary" class="font-mono text-xs">{{ rel.relationship_type }}</Badge> + <template v-if="rel.reverse_relationship_type"> + <span class="text-xs text-muted-foreground">/</span> + <Badge variant="outline" class="font-mono text-xs">{{ rel.reverse_relationship_type }}</Badge> + </template> + <Badge variant="outline" :class="prepopulationBadgeClass(rel.prepopulated_instances)"> + {{ prepopulationLabel(rel.prepopulated_instances) }} + </Badge> + <span class="text-sm text-muted-foreground">→</span> + <span class="text-sm font-semibold text-foreground">{{ rel.target_entity_type }}</span> + <Badge variant="outline" class="ml-auto"> + {{ rel.instance_count }} instance{{ rel.instance_count === 1 ? '' : 's' }} + </Badge> + </div> + <p class="truncate text-xs text-muted-foreground">{{ rel.key }}</p> + </div> + </summary> + <div class="space-y-4 border-t px-4 pb-4 pt-3"> + <p v-if="rel.description" class="text-sm text-muted-foreground">{{ rel.description }}</p> + + <div class="space-y-2"> + <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground">Parameters</p> + <div + v-if="rel.parameter_definitions && Object.keys(rel.parameter_definitions).length > 0" + class="divide-y rounded-md border" + > + <div + v-for="(label, key) in rel.parameter_definitions" + :key="key" + class="flex flex-wrap gap-x-2 gap-y-1 px-3 py-2 text-sm" + > + <code class="rounded bg-muted px-1.5 py-0.5 font-mono text-xs">{{ key }}</code> + <span class="text-muted-foreground">{{ label }}</span> + </div> + </div> + <p v-else class="text-sm text-muted-foreground">No parameter definitions</p> + </div> + + <details v-if="rel.instances.length > 0" class="group/inst rounded-lg border"> + <summary + class="flex cursor-pointer list-none items-center gap-2 px-3 py-2 text-sm font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" + > + <ChevronDown + class="size-4 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" + /> + Instances + </summary> + <div class="space-y-3 border-t p-3"> + <ul class="space-y-2 text-sm"> + <li + v-for="(inst, idx) in pageSlice(instancePage, rel.key, rel.instances).items" + :key="`${rel.key}-${idx}`" + class="rounded-md bg-muted/40 px-3 py-2" + > + <div class="mb-1 font-mono text-xs text-muted-foreground"> + {{ inst.source_slug }} --{{ rel.relationship_type }}--> {{ inst.target_slug }} + </div> + <pre class="whitespace-pre-wrap break-all text-xs">{{ + JSON.stringify(inst.properties ?? {}, null, 2) + }}</pre> + </li> + </ul> + </div> + </details> + </div> + </details> + </Card> + </div> + + <p + v-if="data.limits.relationship_instances_truncated" + class="text-xs text-muted-foreground" + > + Showing the first {{ data.limits.relationship_instances_returned }} of + {{ data.counts.relationship_instances }} relationship instances. + </p> + </template> + </template> + </div> +</template> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index c6e1177e5..fb9a5a8d7 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -18,6 +18,7 @@ import { MessageSquare, PencilRuler, PlayCircle, + RefreshCw, ScrollText, ShieldAlert, Trash2, @@ -38,6 +39,8 @@ import { AlertDialogTitle, } from '@/components/ui/alert-dialog' import SharedConversationPanel from '@/components/extraction/SharedConversationPanel.vue' +import GraphDesignEntitiesPanel from '@/components/graph-management/GraphDesignEntitiesPanel.vue' +import GraphDesignRelationshipsPanel from '@/components/graph-management/GraphDesignRelationshipsPanel.vue' import { GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS, GRAPH_MANAGEMENT_MODE_LABELS, @@ -99,6 +102,7 @@ import { import { streamExtractionChatTurn, streamRuntimeWarmup } from '@/utils/kgExtractionChat' import { applyThinkingRecentUpdate } from '@/utils/thinkingActivityLines' import { useGraphApi } from '@/composables/api/useGraphApi' +import type { DesignArtifactsResponse } from '@/utils/kgDesignArtifacts' const runtimeConfig = useRuntimeConfig() const { accessToken } = useAuth() @@ -242,6 +246,8 @@ const inlineRunLogsError = ref<string | null>(null) const inlineMutationJsonl = ref('') const inlineMutationApplying = ref(false) const inlineMutationApplyError = ref<string | null>(null) +const designArtifactsReloadNonce = ref(0) +const designArtifactsRefreshing = ref(false) const activeStep = computed(() => parseManageStepQuery(route.query.step)) const showOverview = computed(() => activeStep.value === null) @@ -494,6 +500,31 @@ const nextSteps = computed(() => { const sessionActivityLines = ref<string[]>([]) +async function refreshDesignArtifacts(options: { silent?: boolean } = {}) { + if (!hasTenant.value || !kgId.value) return + designArtifactsRefreshing.value = true + try { + const artifacts = await apiFetch<DesignArtifactsResponse>( + `/management/knowledge-graphs/${kgId.value}/design-artifacts`, + { query: { limit: 500 } }, + ) + entityTypeLabels.value = Object.keys(artifacts.entities ?? {}).sort() + relationshipTypeLabels.value = (artifacts.relationships ?? []).map((rel) => rel.relationship_type) + designArtifactsReloadNonce.value += 1 + if (!options.silent) { + toast.success('Design artifacts refreshed') + } + } catch (err) { + if (!options.silent) { + toast.error('Failed to refresh design artifacts', { + description: extractErrorMessage(err), + }) + } + } finally { + designArtifactsRefreshing.value = false + } +} + async function loadKgIdentity() { if (!hasTenant.value || !kgId.value) return try { @@ -1083,6 +1114,8 @@ async function sendChatMessage(message: string) { sendingChat.value = false if (chatSucceeded) { syncActivityLinesFromSession() + await refreshDesignArtifacts({ silent: true }) + await loadWorkspaceStatus() } } } @@ -1211,6 +1244,7 @@ watch( loadExtractionSession(), loadSessionHistory(), loadGraphManagementDataSources(), + refreshDesignArtifacts({ silent: true }), ]) await warmupAssistantRuntime() } else { @@ -1840,12 +1874,27 @@ watch(selectedOpsDataSourceId, () => { class="graph-management-schema-panel lg:sticky lg:top-4 lg:self-start" > <CardHeader class="pb-2"> - <CardTitle class="text-sm font-semibold">Schema & artifacts</CardTitle> - <CardDescription class="text-xs"> - Workspace signals for - <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. - Select an artifact to open it in the detail panel to the right. - </CardDescription> + <div class="flex items-start justify-between gap-2"> + <div> + <CardTitle class="text-sm font-semibold">Design Artifacts</CardTitle> + <CardDescription class="text-xs"> + Live schema and instances from the platform database for + <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. + Select an artifact to open it in the detail panel to the right. + </CardDescription> + </div> + <Button + variant="outline" + size="sm" + class="shrink-0" + :disabled="designArtifactsRefreshing" + @click="refreshDesignArtifacts()" + > + <Loader2 v-if="designArtifactsRefreshing" class="mr-1.5 size-3.5 animate-spin" /> + <RefreshCw v-else class="mr-1.5 size-3.5" /> + Refresh + </Button> + </div> </CardHeader> <CardContent class="space-y-1.5 p-3 pt-0"> <template v-if="schemaRailItems.length > 0"> @@ -1874,87 +1923,21 @@ watch(selectedOpsDataSourceId, () => { </Card> <div id="graph-management-artifact-detail" class="graph-management-detail scroll-mt-6 space-y-6"> - <Card v-if="selectedRailItemId === 'schema-entities'"> - <CardHeader> - <CardTitle class="text-base flex items-center gap-2"> - <Box class="size-4" /> - Schema: Entities - </CardTitle> - <CardDescription> - Entity type coverage snapshot for - <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. - </CardDescription> - </CardHeader> - <CardContent class="space-y-3 text-sm"> - <div class="flex flex-wrap justify-end gap-2"> - <Button variant="outline" size="sm" as-child> - <NuxtLink to="/graph/schema">Open schema browser</NuxtLink> - </Button> - </div> - <div class="rounded-lg border bg-muted/30 p-3"> - <div class="flex items-center justify-between gap-2"> - <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground"> - Entity type inventory - </p> - <Badge :variant="entityTypeLabels.length > 0 ? 'default' : 'secondary'"> - {{ entityTypeLabels.length }} type(s) - </Badge> - </div> - <p - v-if="entityTypeLabels.length === 0" - class="mt-2 text-xs text-muted-foreground" - > - No entity types defined yet. Add at least one type to satisfy schema readiness. - </p> - <div v-else class="mt-2 flex flex-wrap gap-2"> - <Badge v-for="label in entityTypeLabels" :key="label" variant="outline"> - {{ label }} - </Badge> - </div> - </div> - </CardContent> - </Card> + <div v-if="selectedRailItemId === 'schema-entities'" class="min-w-0 space-y-2"> + <GraphDesignEntitiesPanel + :kg-id="kgId" + :reload-nonce="designArtifactsReloadNonce" + embedded + /> + </div> - <Card v-else-if="selectedRailItemId === 'schema-relationships'"> - <CardHeader> - <CardTitle class="text-base flex items-center gap-2"> - <Link2 class="size-4" /> - Schema: Relationships - </CardTitle> - <CardDescription> - Relationship type coverage snapshot for - <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. - </CardDescription> - </CardHeader> - <CardContent class="space-y-3 text-sm"> - <div class="flex flex-wrap justify-end gap-2"> - <Button variant="outline" size="sm" as-child> - <NuxtLink to="/graph/schema">Open schema browser</NuxtLink> - </Button> - </div> - <div class="rounded-lg border bg-muted/30 p-3"> - <div class="flex items-center justify-between gap-2"> - <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground"> - Relationship type inventory - </p> - <Badge :variant="relationshipTypeLabels.length > 0 ? 'default' : 'secondary'"> - {{ relationshipTypeLabels.length }} type(s) - </Badge> - </div> - <p - v-if="relationshipTypeLabels.length === 0" - class="mt-2 text-xs text-muted-foreground" - > - No relationship types defined yet. Add at least one type to satisfy schema readiness. - </p> - <div v-else class="mt-2 flex flex-wrap gap-2"> - <Badge v-for="label in relationshipTypeLabels" :key="label" variant="outline"> - {{ label }} - </Badge> - </div> - </div> - </CardContent> - </Card> + <div v-else-if="selectedRailItemId === 'schema-relationships'" class="min-w-0 space-y-2"> + <GraphDesignRelationshipsPanel + :kg-id="kgId" + :reload-nonce="designArtifactsReloadNonce" + embedded + /> + </div> <Card v-else-if="selectedRailItemId === 'schema-readiness'"> <CardHeader> diff --git a/src/dev-ui/app/tests/kg-design-artifacts.test.ts b/src/dev-ui/app/tests/kg-design-artifacts.test.ts new file mode 100644 index 000000000..7bed4be68 --- /dev/null +++ b/src/dev-ui/app/tests/kg-design-artifacts.test.ts @@ -0,0 +1,25 @@ +/** Tests for design artifact UI helpers. */ + +import { describe, expect, it } from 'vitest' +import { + pageSlice, + prepopulationBadgeClass, + prepopulationLabel, + prepopulationMode, +} from '../utils/kgDesignArtifacts' + +describe('kgDesignArtifacts', () => { + it('maps prepopulation flags to k-extract-style labels', () => { + expect(prepopulationMode(true)).toBe('true') + expect(prepopulationMode(false)).toBe('false') + expect(prepopulationLabel(true)).toContain('prepopulated: true') + expect(prepopulationBadgeClass(true)).toContain('cyan') + }) + + it('pages instance lists consistently', () => { + const items = Array.from({ length: 25 }, (_, index) => index) + const slice = pageSlice({}, 'service', items) + expect(slice.items).toHaveLength(20) + expect(slice.totalPages).toBe(2) + }) +}) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 956ba833f..f6a22d7c2 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -427,7 +427,10 @@ describe('KG-MANAGE-007 - graph management modes', () => { describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { it('renders artifact navigator and detail panel in k-extract-style layout', () => { expect(manageWorkspaceVue).toContain('graph-management-artifacts') - expect(manageWorkspaceVue).toContain('Schema & artifacts') + expect(manageWorkspaceVue).toContain('Design Artifacts') + expect(manageWorkspaceVue).toContain('refreshDesignArtifacts') + expect(manageWorkspaceVue).toContain('GraphDesignEntitiesPanel') + expect(manageWorkspaceVue).toContain('GraphDesignRelationshipsPanel') expect(manageWorkspaceVue).toContain('graph-management-artifact-detail') expect(manageWorkspaceVue).toContain('graph-management-session-pointers') expect(manageWorkspaceVue).toContain('graphManagementArtifactRowClass') @@ -487,9 +490,9 @@ describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { describe('KG-MANAGE-010 - schema design parity behavior', () => { it('exposes schema readiness and validation detail in initial schema design mode', () => { - expect(manageWorkspaceVue).toContain('Schema: Entities') - expect(manageWorkspaceVue).toContain('Schema: Relationships') - expect(manageWorkspaceVue).toContain("selectedRailItemId === 'schema-entities'") + expect(manageWorkspaceVue).toContain('GraphDesignEntitiesPanel') + expect(manageWorkspaceVue).toContain('GraphDesignRelationshipsPanel') + expect(manageWorkspaceVue).toContain('refreshDesignArtifacts') expect(manageWorkspaceVue).toContain("selectedRailItemId === 'schema-relationships'") expect(manageWorkspaceVue).toContain('progressChecklist') expect(manageWorkspaceVue).toContain('Bootstrap progress checklist') diff --git a/src/dev-ui/app/utils/kgDesignArtifacts.ts b/src/dev-ui/app/utils/kgDesignArtifacts.ts new file mode 100644 index 000000000..b3edbd6ef --- /dev/null +++ b/src/dev-ui/app/utils/kgDesignArtifacts.ts @@ -0,0 +1,104 @@ +/** Shared helpers for Graph Management design artifact panels. */ + +export type PrepopulationMode = 'true' | 'hard-coded' | 'false' + +export function prepopulationMode(raw: string | boolean | undefined): PrepopulationMode { + if (raw === true) return 'true' + const normalized = String(raw ?? 'false').toLowerCase().trim() + if (normalized === 'true') return 'true' + if (normalized === 'hard-coded') return 'hard-coded' + return 'false' +} + +export function prepopulationLabel(raw: string | boolean | undefined): string { + const mode = prepopulationMode(raw) + if (mode === 'true') return 'prepopulated: true' + if (mode === 'hard-coded') return 'prepopulated: hard-coded' + return 'prepopulated: false' +} + +export function prepopulationBadgeClass(raw: string | boolean | undefined): string { + const mode = prepopulationMode(raw) + if (mode === 'true') return 'border-cyan-500/40 bg-cyan-500/10 text-cyan-700 dark:text-cyan-300' + if (mode === 'hard-coded') return 'border-amber-500/40 bg-amber-500/10 text-amber-700 dark:text-amber-300' + return 'border-emerald-500/40 bg-emerald-500/10 text-emerald-700 dark:text-emerald-300' +} + +export function prepopulationCardClass(raw: string | boolean | undefined): string { + const mode = prepopulationMode(raw) + if (mode === 'true') return 'border-l-4 border-l-cyan-500/70' + if (mode === 'hard-coded') return 'border-l-4 border-l-amber-500/70' + return 'border-l-4 border-l-emerald-500/70' +} + +export interface DesignArtifactInstance { + slug?: string + source_slug?: string + target_slug?: string + properties?: Record<string, unknown> +} + +export interface DesignArtifactEntityType { + type: string + description?: string + required_properties?: string[] + optional_properties?: string[] + property_definitions?: Record<string, string> + prepopulated_instances?: string | boolean + instance_count: number + instances?: DesignArtifactInstance[] +} + +export interface DesignArtifactRelationshipType { + key: string + source_entity_type: string + target_entity_type: string + relationship_type: string + reverse_relationship_type: string | null + reverse_relationship_description: string | null + prepopulated_instances?: string | boolean + description: string | null + instance_count: number + instances: DesignArtifactInstance[] + required_parameters: string[] + optional_parameters: string[] + parameter_definitions: Record<string, string> +} + +export interface DesignArtifactsResponse { + found: boolean + knowledge_graph_id: string + entities: Record<string, Omit<DesignArtifactEntityType, 'type'>> + relationships: DesignArtifactRelationshipType[] + counts: { + entity_types: number + relationship_types: number + entity_instances: number + relationship_instances: number + } + limits: { + requested: number + entity_instances_returned: number + relationship_instances_returned: number + entity_instances_truncated: boolean + relationship_instances_truncated: boolean + } +} + +export const DESIGN_ARTIFACTS_PAGE_SIZE = 20 + +export function pageSlice<T>( + pageByKey: Record<string, number>, + key: string, + items: T[], + pageSize = DESIGN_ARTIFACTS_PAGE_SIZE, +) { + const page = pageByKey[key] ?? 0 + const start = page * pageSize + return { + items: items.slice(start, start + pageSize), + page, + totalPages: Math.max(1, Math.ceil(items.length / pageSize)), + total: items.length, + } +} From 9bd5df7f9799d923bc80ab9bac41223011eb6038 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 10:25:06 -0400 Subject: [PATCH 087/153] fix(dev-ui): focus new URL field when adding data source rows Auto-focus the freshly added input on the new data source wizard so users can keep typing without an extra click. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../[kgId]/data-sources/new.vue | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue index 331d775e6..7cbb60bce 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/new.vue @@ -96,6 +96,24 @@ const syncStepLabel = ref('') const readyForStats = ref(false) const wizardSectionRef = ref<HTMLElement | null>(null) +const urlInputRefs = new Map<string, HTMLInputElement>() + +function setUrlInputRef(id: string, el: unknown) { + if (!el) { + urlInputRefs.delete(id) + return + } + if (el instanceof HTMLInputElement) { + urlInputRefs.set(id, el) + return + } + if (typeof el === 'object' && el !== null && '$el' in el) { + const root = (el as { $el: unknown }).$el + if (root instanceof HTMLInputElement) { + urlInputRefs.set(id, root) + } + } +} const manageUrl = computed(() => buildKgManageUrl(kgId.value)) const operationsUrl = computed(() => buildKgDataSourcesUrl(kgId.value)) @@ -115,11 +133,11 @@ const preparedSourceCount = computed(() => createdSources.value.filter((s) => s.syncStatus === 'ingested').length, ) -function addUrlField() { - sourceUrlInputs.value.push({ - id: `source-${Date.now()}-${sourceUrlInputs.value.length + 1}`, - url: '', - }) +async function addUrlField() { + const id = `source-${Date.now()}-${sourceUrlInputs.value.length + 1}` + sourceUrlInputs.value.push({ id, url: '' }) + await nextTick() + urlInputRefs.get(id)?.focus() } function removeUrlField(id: string) { @@ -469,6 +487,7 @@ onUnmounted(() => { class="flex items-center gap-2" > <Input + :ref="(el) => setUrlInputRef(row.id, el)" v-model="row.url" type="text" placeholder="https://github.com/org/repo" From cf73af82ec6c665e02b7dfd28f7918fa4c9b5ca3 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 10:46:35 -0400 Subject: [PATCH 088/153] fix(dev-ui): stop phantom footer scroll on graph management page Restructure the artifact rail and detail panel so long schema content scrolls internally instead of inflating the page with blank space. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../pages/knowledge-graphs/[kgId]/manage.vue | 41 +++++++------------ .../knowledge-graph-manage-workspace.test.ts | 3 +- 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index fb9a5a8d7..5db42d01a 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -18,7 +18,6 @@ import { MessageSquare, PencilRuler, PlayCircle, - RefreshCw, ScrollText, ShieldAlert, Trash2, @@ -946,9 +945,9 @@ function setGraphManagementMode(mode: GraphManagementMode) { function selectSchemaRailItem(itemId: GraphManagementRailItemId) { selectedRailItemId.value = itemId void nextTick(() => { - document.getElementById('graph-management-artifact-detail')?.scrollIntoView({ + document.querySelector<HTMLElement>('.graph-management-detail')?.scrollTo({ + top: 0, behavior: 'smooth', - block: 'start', }) }) } @@ -1868,33 +1867,18 @@ watch(selectedOpsDataSourceId, () => { @send-message="sendChatMessage" /> - <div class="graph-management-artifacts grid gap-6 lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)] lg:items-start"> + <div class="graph-management-artifacts flex flex-col gap-6 lg:relative"> <Card id="graph-management-schema-artifacts" - class="graph-management-schema-panel lg:sticky lg:top-4 lg:self-start" + class="graph-management-schema-panel w-full shrink-0 lg:absolute lg:left-0 lg:top-0 lg:z-10 lg:w-[15.5rem]" > <CardHeader class="pb-2"> - <div class="flex items-start justify-between gap-2"> - <div> - <CardTitle class="text-sm font-semibold">Design Artifacts</CardTitle> - <CardDescription class="text-xs"> - Live schema and instances from the platform database for - <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. - Select an artifact to open it in the detail panel to the right. - </CardDescription> - </div> - <Button - variant="outline" - size="sm" - class="shrink-0" - :disabled="designArtifactsRefreshing" - @click="refreshDesignArtifacts()" - > - <Loader2 v-if="designArtifactsRefreshing" class="mr-1.5 size-3.5 animate-spin" /> - <RefreshCw v-else class="mr-1.5 size-3.5" /> - Refresh - </Button> - </div> + <CardTitle class="text-sm font-semibold">Design Artifacts</CardTitle> + <CardDescription class="text-xs"> + Live schema and instances from the platform database for + <span class="font-medium text-foreground">{{ graphManagementModeLabel }}</span>. + Select an artifact to open it in the detail panel to the right. + </CardDescription> </CardHeader> <CardContent class="space-y-1.5 p-3 pt-0"> <template v-if="schemaRailItems.length > 0"> @@ -1922,7 +1906,10 @@ watch(selectedOpsDataSourceId, () => { </CardContent> </Card> - <div id="graph-management-artifact-detail" class="graph-management-detail scroll-mt-6 space-y-6"> + <div + id="graph-management-artifact-detail" + class="graph-management-detail min-w-0 space-y-6 overscroll-contain lg:pl-[calc(15.5rem+1.5rem)] lg:max-h-[min(70vh,calc(100vh-18rem))] lg:overflow-y-auto" + > <div v-if="selectedRailItemId === 'schema-entities'" class="min-w-0 space-y-2"> <GraphDesignEntitiesPanel :kg-id="kgId" diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index f6a22d7c2..e6c5b59b8 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -435,7 +435,8 @@ describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { expect(manageWorkspaceVue).toContain('graph-management-session-pointers') expect(manageWorkspaceVue).toContain('graphManagementArtifactRowClass') expect(manageWorkspaceVue).toContain('schemaRailItems') - expect(manageWorkspaceVue).toContain('lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)]') + expect(manageWorkspaceVue).toContain('lg:absolute lg:left-0 lg:top-0') + expect(manageWorkspaceVue).toContain('lg:overflow-y-auto') }) it('builds rail items with status and last-updated metadata', () => { From 24819189fe7ab28e954f63e522586c719610e23b Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 13:19:32 -0400 Subject: [PATCH 089/153] fix(dev-ui): remove outer scroll on graph management step Constrain the step to the viewport so only the assistant chat and artifact detail panel scroll, eliminating the main page scroll and empty footer gap. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../pages/knowledge-graphs/[kgId]/manage.vue | 64 +++++++++++-------- .../knowledge-graph-manage-workspace.test.ts | 4 +- 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 5db42d01a..e681ee1f8 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -1268,7 +1268,14 @@ watch(selectedOpsDataSourceId, () => { </script> <template> - <div class="mx-auto max-w-7xl space-y-6"> + <div + class="mx-auto max-w-7xl" + :class=" + activeStep === 'graph-management' && !showOverview + ? 'flex flex-col gap-4 lg:min-h-0 lg:h-[calc(100dvh-11.5rem)] lg:max-h-[calc(100dvh-11.5rem)] lg:overflow-hidden' + : 'space-y-6' + " + > <template v-if="showOverview"> <NuxtLink to="/knowledge-graphs" @@ -1280,27 +1287,29 @@ watch(selectedOpsDataSourceId, () => { </template> <template v-else> - <div class="flex items-center justify-between"> - <div class="space-y-1"> - <div class="flex items-center gap-2"> - <h1 class="text-2xl font-semibold tracking-tight">{{ graphHeaderTitle }}</h1> - <Badge variant="secondary">{{ stepBadgeLabel }}</Badge> + <div :class="activeStep === 'graph-management' ? 'shrink-0 space-y-4' : 'space-y-4'"> + <div class="flex items-center justify-between"> + <div class="space-y-1"> + <div class="flex items-center gap-2"> + <h1 class="text-2xl font-semibold tracking-tight">{{ graphHeaderTitle }}</h1> + <Badge variant="secondary">{{ stepBadgeLabel }}</Badge> + </div> + <p class="text-sm text-muted-foreground"> + <template v-if="activeStep === 'graph-management'"> + Conversation-first graph management with shared session and mode-specific workspace panels. + </template> + <template v-else> + Knowledge-graph scoped mutation run visibility and run metrics. + </template> + </p> </div> - <p class="text-sm text-muted-foreground"> - <template v-if="activeStep === 'graph-management'"> - Conversation-first graph management with shared session and mode-specific workspace panels. - </template> - <template v-else> - Knowledge-graph scoped mutation run visibility and run metrics. - </template> - </p> + <Button variant="outline" size="sm" @click="returnToWorkspaceOverview()"> + <ArrowLeft class="mr-1.5 size-3.5" /> + Back to workspace overview + </Button> </div> - <Button variant="outline" size="sm" @click="returnToWorkspaceOverview()"> - <ArrowLeft class="mr-1.5 size-3.5" /> - Back to workspace overview - </Button> + <Separator /> </div> - <Separator /> </template> <div v-if="!hasTenant" class="rounded-lg border border-dashed p-6 text-sm text-muted-foreground"> @@ -1767,7 +1776,10 @@ watch(selectedOpsDataSourceId, () => { </Card> </section> - <section v-else-if="activeStep === 'graph-management'" class="space-y-4"> + <section + v-else-if="activeStep === 'graph-management'" + class="flex flex-col gap-4 lg:min-h-0 lg:flex-1 lg:overflow-hidden" + > <div v-if="graphManagementSectionState.phase === 'error'" class="rounded-lg border border-dashed p-4 text-sm" @@ -1780,7 +1792,7 @@ watch(selectedOpsDataSourceId, () => { </Button> </div> - <Card class="graph-management-controls overflow-hidden"> + <Card class="graph-management-controls shrink-0 overflow-hidden"> <CardHeader class="space-y-4 pb-4"> <div class="flex flex-wrap items-start gap-3"> <div @@ -1846,7 +1858,8 @@ watch(selectedOpsDataSourceId, () => { </CardHeader> </Card> - <SharedConversationPanel + <div class="shrink-0"> + <SharedConversationPanel v-model:draft-message="draftMessage" :mode-label="graphManagementModeLabel" :description="graphManagementChatDescription" @@ -1865,9 +1878,10 @@ watch(selectedOpsDataSourceId, () => { @refresh="refreshGraphManagementSession" @clear-chat="clearChat" @send-message="sendChatMessage" - /> + /> + </div> - <div class="graph-management-artifacts flex flex-col gap-6 lg:relative"> + <div class="graph-management-artifacts flex min-h-0 flex-1 flex-col gap-4 overflow-hidden lg:relative"> <Card id="graph-management-schema-artifacts" class="graph-management-schema-panel w-full shrink-0 lg:absolute lg:left-0 lg:top-0 lg:z-10 lg:w-[15.5rem]" @@ -1908,7 +1922,7 @@ watch(selectedOpsDataSourceId, () => { <div id="graph-management-artifact-detail" - class="graph-management-detail min-w-0 space-y-6 overscroll-contain lg:pl-[calc(15.5rem+1.5rem)] lg:max-h-[min(70vh,calc(100vh-18rem))] lg:overflow-y-auto" + class="graph-management-detail min-h-0 min-w-0 flex-1 space-y-6 overflow-y-auto overscroll-contain lg:pl-[calc(15.5rem+1.5rem)]" > <div v-if="selectedRailItemId === 'schema-entities'" class="min-w-0 space-y-2"> <GraphDesignEntitiesPanel diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index e6c5b59b8..15d927f12 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -436,7 +436,9 @@ describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { expect(manageWorkspaceVue).toContain('graphManagementArtifactRowClass') expect(manageWorkspaceVue).toContain('schemaRailItems') expect(manageWorkspaceVue).toContain('lg:absolute lg:left-0 lg:top-0') - expect(manageWorkspaceVue).toContain('lg:overflow-y-auto') + expect(manageWorkspaceVue).toContain('graph-management-detail min-h-0 min-w-0 flex-1') + expect(manageWorkspaceVue).toContain('overflow-y-auto overscroll-contain') + expect(manageWorkspaceVue).toContain('lg:h-[calc(100dvh-11.5rem)]') }) it('builds rail items with status and last-updated metadata', () => { From 376813b0df60b699edd994b31595461843ca44e8 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 13:31:29 -0400 Subject: [PATCH 090/153] fix(extraction): sign workload tokens as JWTs for sticky session auth Replace in-memory ULID lookup with HMAC-signed tokens so Graph Management schema tools keep working after API reloads and hot restarts. Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 1 + src/api/extraction/infrastructure/__init__.py | 2 +- .../workload_credential_issuer.py | 105 ++++++++++++++++++ .../infrastructure/workload_runtime.py | 51 +-------- .../workload_runtime_factory.py | 19 +++- .../workload_runtime_settings.py | 7 ++ .../test_workload_credential_issuer.py | 64 ++++++++++- .../test_workload_runtime_settings.py | 13 +++ 8 files changed, 207 insertions(+), 55 deletions(-) create mode 100644 src/api/extraction/infrastructure/workload_credential_issuer.py diff --git a/compose.dev.yaml b/compose.dev.yaml index aaf089020..d0823cc2a 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -19,6 +19,7 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_NETWORK: kartograph_kartograph KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_IMAGE: kartograph-agent-runtime:dev KARTOGRAPH_EXTRACTION_RUNTIME_API_BASE_URL: http://api:8000 + KARTOGRAPH_EXTRACTION_RUNTIME_WORKLOAD_TOKEN_SIGNING_KEY: kartograph-dev-workload-token-signing-key KARTOGRAPH_EXTRACTION_RUNTIME_JOB_PACKAGE_WORK_DIR: /tmp/kartograph/job_packages KARTOGRAPH_EXTRACTION_RUNTIME_SKILLS_DIR: ${PWD}/skills KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_UID: ${HOST_UID} diff --git a/src/api/extraction/infrastructure/__init__.py b/src/api/extraction/infrastructure/__init__.py index f8bfd2360..990ee8092 100644 --- a/src/api/extraction/infrastructure/__init__.py +++ b/src/api/extraction/infrastructure/__init__.py @@ -12,10 +12,10 @@ from extraction.infrastructure.runtime_context_builder import ( FilesystemExtractionRuntimeContextBuilder, ) +from extraction.infrastructure.workload_credential_issuer import ScopedWorkloadCredentialIssuer from extraction.infrastructure.workload_runtime import ( InMemoryEphemeralExtractionWorkerLauncher, InMemoryStickySessionRuntimeManager, - ScopedWorkloadCredentialIssuer, ) from extraction.infrastructure.workload_runtime_factory import ( create_ephemeral_extraction_worker_launcher, diff --git a/src/api/extraction/infrastructure/workload_credential_issuer.py b/src/api/extraction/infrastructure/workload_credential_issuer.py new file mode 100644 index 000000000..33a4605e9 --- /dev/null +++ b/src/api/extraction/infrastructure/workload_credential_issuer.py @@ -0,0 +1,105 @@ +"""Stateless JWT workload credentials for extraction runtime containers.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta + +from jose import JWTError, jwt +from jose.exceptions import ExpiredSignatureError +from ulid import ULID + +from extraction.ports.runtime import ScopedWorkloadCredentials + +WORKLOAD_TOKEN_ALGORITHM = "HS256" +DEFAULT_DEV_WORKLOAD_TOKEN_SIGNING_KEY = "kartograph-dev-workload-token-signing-key" + + +class ScopedWorkloadCredentialIssuer: + """Issues and verifies short-lived tenant/KG scoped workload JWTs.""" + + def __init__( + self, + *, + signing_key: str = DEFAULT_DEV_WORKLOAD_TOKEN_SIGNING_KEY, + default_ttl: timedelta = timedelta(minutes=15), + ) -> None: + normalized_key = signing_key.strip() + if not normalized_key: + raise ValueError("workload token signing key must not be empty") + self._signing_key = normalized_key + self._default_ttl = default_ttl + + def issue( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + extra_scopes: tuple[str, ...] = (), + ) -> ScopedWorkloadCredentials: + now = datetime.now(UTC) + expires_at = (now + self._default_ttl).replace(microsecond=0) + scopes = ( + f"tenant:{tenant_id}", + f"knowledge_graph:{knowledge_graph_id}", + "workload:extraction", + *extra_scopes, + ) + token = jwt.encode( + { + "sub": "workload", + "jti": str(ULID()), + "scopes": list(scopes), + "iat": int(now.timestamp()), + "exp": int(expires_at.timestamp()), + }, + self._signing_key, + algorithm=WORKLOAD_TOKEN_ALGORITHM, + ) + return ScopedWorkloadCredentials( + token=str(token), + expires_at=expires_at, + scopes=scopes, + ) + + def issue_for_sticky_session( + self, *, tenant_id: str, knowledge_graph_id: str + ) -> ScopedWorkloadCredentials: + """Issue chat-scoped credentials for sticky session agent containers.""" + return self.issue( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + extra_scopes=("workload:chat",), + ) + + def verify(self, token: str) -> ScopedWorkloadCredentials | None: + """Return credentials when the JWT signature and expiry are valid.""" + try: + payload = jwt.decode( + token, + self._signing_key, + algorithms=[WORKLOAD_TOKEN_ALGORITHM], + options={"require_exp": True, "require_iat": True}, + ) + except ExpiredSignatureError: + return None + except JWTError: + return None + + scopes_raw = payload.get("scopes") + if not isinstance(scopes_raw, list) or not scopes_raw: + return None + + exp = payload.get("exp") + if not isinstance(exp, int): + return None + + expires_at = datetime.fromtimestamp(exp, tz=UTC) + if expires_at <= datetime.now(UTC): + return None + + scopes = tuple(str(scope) for scope in scopes_raw) + return ScopedWorkloadCredentials( + token=token, + expires_at=expires_at, + scopes=scopes, + ) diff --git a/src/api/extraction/infrastructure/workload_runtime.py b/src/api/extraction/infrastructure/workload_runtime.py index 7544854f7..5d4e9f658 100644 --- a/src/api/extraction/infrastructure/workload_runtime.py +++ b/src/api/extraction/infrastructure/workload_runtime.py @@ -7,6 +7,7 @@ from ulid import ULID +from extraction.infrastructure.workload_credential_issuer import ScopedWorkloadCredentialIssuer from extraction.ports.runtime import ( EphemeralWorkerLaunchRequest, EphemeralWorkerLaunchResult, @@ -132,56 +133,6 @@ def is_runtime_active( ) -class ScopedWorkloadCredentialIssuer: - """Issues short-lived tenant/KG scoped credentials for extraction workloads.""" - - def __init__(self, *, default_ttl: timedelta = timedelta(minutes=15)) -> None: - self._default_ttl = default_ttl - self._issued: dict[str, ScopedWorkloadCredentials] = {} - - def issue( - self, - *, - tenant_id: str, - knowledge_graph_id: str, - extra_scopes: tuple[str, ...] = (), - ) -> ScopedWorkloadCredentials: - now = datetime.now(UTC) - scopes = ( - f"tenant:{tenant_id}", - f"knowledge_graph:{knowledge_graph_id}", - "workload:extraction", - *extra_scopes, - ) - credentials = ScopedWorkloadCredentials( - token=str(ULID()), - expires_at=now + self._default_ttl, - scopes=scopes, - ) - self._issued[credentials.token] = credentials - return credentials - - def issue_for_sticky_session( - self, *, tenant_id: str, knowledge_graph_id: str - ) -> ScopedWorkloadCredentials: - """Issue chat-scoped credentials for sticky session agent containers.""" - return self.issue( - tenant_id=tenant_id, - knowledge_graph_id=knowledge_graph_id, - extra_scopes=("workload:chat",), - ) - - def verify(self, token: str) -> ScopedWorkloadCredentials | None: - """Return credentials when token is known and not expired.""" - credentials = self._issued.get(token) - if credentials is None: - return None - if credentials.expires_at <= datetime.now(UTC): - self._issued.pop(token, None) - return None - return credentials - - class InMemoryEphemeralExtractionWorkerLauncher(IEphemeralExtractionWorkerLauncher): """Ephemeral worker launcher that validates scope and tracks active workers.""" diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py index 1a88410ee..e74d8eae5 100644 --- a/src/api/extraction/infrastructure/workload_runtime_factory.py +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -13,10 +13,13 @@ from extraction.infrastructure.remote_sticky_container_chat_agent import ( RemoteStickyContainerChatAgent, ) +from extraction.infrastructure.workload_credential_issuer import ( + DEFAULT_DEV_WORKLOAD_TOKEN_SIGNING_KEY, + ScopedWorkloadCredentialIssuer, +) from extraction.infrastructure.workload_runtime import ( InMemoryEphemeralExtractionWorkerLauncher, InMemoryStickySessionRuntimeManager, - ScopedWorkloadCredentialIssuer, ) from extraction.infrastructure.workload_runtime_settings import ( ExtractionWorkloadRuntimeSettings, @@ -30,12 +33,24 @@ from shared_kernel.container_runtime.factory import create_container_runtime +def resolve_workload_token_signing_key( + settings: ExtractionWorkloadRuntimeSettings | None = None, +) -> str: + """Return the HMAC key used to sign and verify workload JWTs.""" + resolved = settings or get_extraction_workload_runtime_settings() + configured = resolved.workload_token_signing_key.strip() + if configured: + return configured + return DEFAULT_DEV_WORKLOAD_TOKEN_SIGNING_KEY + + @lru_cache def get_workload_credential_issuer() -> ScopedWorkloadCredentialIssuer: """Return shared workload credential issuer for runtime containers.""" settings = get_extraction_workload_runtime_settings() return ScopedWorkloadCredentialIssuer( - default_ttl=timedelta(minutes=settings.session_ttl_minutes) + signing_key=resolve_workload_token_signing_key(settings), + default_ttl=timedelta(minutes=settings.session_ttl_minutes), ) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 57a5db007..045646ac9 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -42,6 +42,13 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): job_package_work_dir: str = Field(default="/tmp/kartograph/job_packages") skills_dir: str = Field(default="/app/skills") api_base_url: str = Field(default="http://api:8000") + workload_token_signing_key: str = Field( + default="", + description=( + "HMAC secret for signing extraction workload JWTs. Must be stable across " + "API reloads so sticky containers can authenticate after hot reload." + ), + ) sticky_health_timeout_seconds: float = Field(default=90.0, ge=5.0, le=600.0) sticky_turn_timeout_seconds: float = Field(default=600.0, ge=30.0, le=900.0) sticky_max_turns: int = Field(default=500, ge=1, le=1000) diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py b/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py index 4a72d633f..fd7d8d304 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py @@ -4,7 +4,10 @@ from datetime import timedelta -from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer +from extraction.infrastructure.workload_credential_issuer import ( + DEFAULT_DEV_WORKLOAD_TOKEN_SIGNING_KEY, + ScopedWorkloadCredentialIssuer, +) def test_issue_for_sticky_session_includes_chat_scope() -> None: @@ -20,4 +23,61 @@ def test_issue_for_sticky_session_includes_chat_scope() -> None: def test_verify_rejects_unknown_token() -> None: issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=5)) - assert issuer.verify("missing-token") is None + assert issuer.verify("not-a-valid-jwt") is None + + +def test_verify_survives_new_issuer_instance_with_same_signing_key() -> None: + signing_key = "shared-test-signing-key" + issuer_a = ScopedWorkloadCredentialIssuer( + signing_key=signing_key, + default_ttl=timedelta(minutes=5), + ) + credentials = issuer_a.issue_for_sticky_session( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + ) + + issuer_b = ScopedWorkloadCredentialIssuer( + signing_key=signing_key, + default_ttl=timedelta(minutes=5), + ) + verified = issuer_b.verify(credentials.token) + + assert verified is not None + assert verified.scopes == credentials.scopes + assert verified.expires_at == credentials.expires_at + + +def test_verify_rejects_token_signed_with_different_key() -> None: + issuer = ScopedWorkloadCredentialIssuer( + signing_key="issuer-a-key", + default_ttl=timedelta(minutes=5), + ) + credentials = issuer.issue(tenant_id="tenant-1", knowledge_graph_id="kg-1") + + other_issuer = ScopedWorkloadCredentialIssuer( + signing_key="issuer-b-key", + default_ttl=timedelta(minutes=5), + ) + + assert other_issuer.verify(credentials.token) is None + + +def test_verify_rejects_expired_token() -> None: + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(seconds=-60)) + credentials = issuer.issue(tenant_id="tenant-1", knowledge_graph_id="kg-1") + + assert issuer.verify(credentials.token) is None + + +def test_rejects_empty_signing_key() -> None: + try: + ScopedWorkloadCredentialIssuer(signing_key=" ") + except ValueError as exc: + assert "signing key" in str(exc).lower() + else: + raise AssertionError("expected ValueError for empty signing key") + + +def test_default_dev_signing_key_is_stable() -> None: + assert DEFAULT_DEV_WORKLOAD_TOKEN_SIGNING_KEY diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py index f03834f1e..e769c1e99 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py @@ -2,6 +2,7 @@ from __future__ import annotations +from extraction.infrastructure.workload_runtime_factory import resolve_workload_token_signing_key from extraction.infrastructure.workload_runtime_settings import ( ExtractionWorkloadRuntimeSettings, ) @@ -21,3 +22,15 @@ def test_parses_command_strings_into_tuple(self) -> None: assert settings.sticky_command == ("sleep", "3600") assert settings.worker_command == ("sleep", "120") + + def test_resolve_workload_token_signing_key_uses_configured_value(self) -> None: + settings = ExtractionWorkloadRuntimeSettings( + workload_token_signing_key="configured-secret", + ) + + assert resolve_workload_token_signing_key(settings) == "configured-secret" + + def test_resolve_workload_token_signing_key_falls_back_to_dev_default(self) -> None: + settings = ExtractionWorkloadRuntimeSettings(workload_token_signing_key="") + + assert resolve_workload_token_signing_key(settings) From 4517fe47ec83e78a64e061a778281a981047f5c5 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 13:36:52 -0400 Subject: [PATCH 091/153] fix(dev-ui): eliminate phantom page scroll below app shell Keep scrolling inside main via flex min-h-0, lock document overflow, and restore graph-management artifacts to a single flowing grid layout. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/dev-ui/app/assets/css/main.css | 9 +++ src/dev-ui/app/layouts/default.vue | 4 +- .../pages/knowledge-graphs/[kgId]/manage.vue | 70 ++++++++----------- .../knowledge-graph-manage-workspace.test.ts | 7 +- 4 files changed, 42 insertions(+), 48 deletions(-) diff --git a/src/dev-ui/app/assets/css/main.css b/src/dev-ui/app/assets/css/main.css index a3f97d92c..b07e56d90 100644 --- a/src/dev-ui/app/assets/css/main.css +++ b/src/dev-ui/app/assets/css/main.css @@ -114,6 +114,15 @@ * { @apply border-border outline-ring/50; } + html, + body, + #__nuxt { + height: 100%; + } + html, + body { + overflow: hidden; + } body { @apply bg-background text-foreground; } diff --git a/src/dev-ui/app/layouts/default.vue b/src/dev-ui/app/layouts/default.vue index 82f67dd61..0f3992705 100644 --- a/src/dev-ui/app/layouts/default.vue +++ b/src/dev-ui/app/layouts/default.vue @@ -800,7 +800,7 @@ watch(() => route.path, () => { closeMobile() }) </Sheet> <!-- Main Content Area --> - <div class="flex flex-1 flex-col overflow-hidden"> + <div class="flex min-h-0 flex-1 flex-col overflow-hidden"> <!-- Header --> <header class="flex h-14 items-center gap-4 border-b border-border bg-background px-4"> <!-- Mobile menu button --> @@ -895,7 +895,7 @@ watch(() => route.path, () => { closeMobile() }) </header> <!-- Page Content --> - <main class="flex-1 overflow-y-auto p-6"> + <main class="min-h-0 flex-1 overflow-y-auto p-6"> <slot /> </main> </div> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index e681ee1f8..7633a97cb 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -945,9 +945,9 @@ function setGraphManagementMode(mode: GraphManagementMode) { function selectSchemaRailItem(itemId: GraphManagementRailItemId) { selectedRailItemId.value = itemId void nextTick(() => { - document.querySelector<HTMLElement>('.graph-management-detail')?.scrollTo({ - top: 0, + document.getElementById('graph-management-artifact-detail')?.scrollIntoView({ behavior: 'smooth', + block: 'start', }) }) } @@ -1268,14 +1268,7 @@ watch(selectedOpsDataSourceId, () => { </script> <template> - <div - class="mx-auto max-w-7xl" - :class=" - activeStep === 'graph-management' && !showOverview - ? 'flex flex-col gap-4 lg:min-h-0 lg:h-[calc(100dvh-11.5rem)] lg:max-h-[calc(100dvh-11.5rem)] lg:overflow-hidden' - : 'space-y-6' - " - > + <div class="mx-auto max-w-7xl space-y-6"> <template v-if="showOverview"> <NuxtLink to="/knowledge-graphs" @@ -1287,29 +1280,27 @@ watch(selectedOpsDataSourceId, () => { </template> <template v-else> - <div :class="activeStep === 'graph-management' ? 'shrink-0 space-y-4' : 'space-y-4'"> - <div class="flex items-center justify-between"> - <div class="space-y-1"> - <div class="flex items-center gap-2"> - <h1 class="text-2xl font-semibold tracking-tight">{{ graphHeaderTitle }}</h1> - <Badge variant="secondary">{{ stepBadgeLabel }}</Badge> - </div> - <p class="text-sm text-muted-foreground"> - <template v-if="activeStep === 'graph-management'"> - Conversation-first graph management with shared session and mode-specific workspace panels. - </template> - <template v-else> - Knowledge-graph scoped mutation run visibility and run metrics. - </template> - </p> + <div class="flex items-center justify-between"> + <div class="space-y-1"> + <div class="flex items-center gap-2"> + <h1 class="text-2xl font-semibold tracking-tight">{{ graphHeaderTitle }}</h1> + <Badge variant="secondary">{{ stepBadgeLabel }}</Badge> </div> - <Button variant="outline" size="sm" @click="returnToWorkspaceOverview()"> - <ArrowLeft class="mr-1.5 size-3.5" /> - Back to workspace overview - </Button> + <p class="text-sm text-muted-foreground"> + <template v-if="activeStep === 'graph-management'"> + Conversation-first graph management with shared session and mode-specific workspace panels. + </template> + <template v-else> + Knowledge-graph scoped mutation run visibility and run metrics. + </template> + </p> </div> - <Separator /> + <Button variant="outline" size="sm" @click="returnToWorkspaceOverview()"> + <ArrowLeft class="mr-1.5 size-3.5" /> + Back to workspace overview + </Button> </div> + <Separator /> </template> <div v-if="!hasTenant" class="rounded-lg border border-dashed p-6 text-sm text-muted-foreground"> @@ -1776,10 +1767,7 @@ watch(selectedOpsDataSourceId, () => { </Card> </section> - <section - v-else-if="activeStep === 'graph-management'" - class="flex flex-col gap-4 lg:min-h-0 lg:flex-1 lg:overflow-hidden" - > + <section v-else-if="activeStep === 'graph-management'" class="space-y-4"> <div v-if="graphManagementSectionState.phase === 'error'" class="rounded-lg border border-dashed p-4 text-sm" @@ -1792,7 +1780,7 @@ watch(selectedOpsDataSourceId, () => { </Button> </div> - <Card class="graph-management-controls shrink-0 overflow-hidden"> + <Card class="graph-management-controls overflow-hidden"> <CardHeader class="space-y-4 pb-4"> <div class="flex flex-wrap items-start gap-3"> <div @@ -1858,8 +1846,7 @@ watch(selectedOpsDataSourceId, () => { </CardHeader> </Card> - <div class="shrink-0"> - <SharedConversationPanel + <SharedConversationPanel v-model:draft-message="draftMessage" :mode-label="graphManagementModeLabel" :description="graphManagementChatDescription" @@ -1878,13 +1865,12 @@ watch(selectedOpsDataSourceId, () => { @refresh="refreshGraphManagementSession" @clear-chat="clearChat" @send-message="sendChatMessage" - /> - </div> + /> - <div class="graph-management-artifacts flex min-h-0 flex-1 flex-col gap-4 overflow-hidden lg:relative"> + <div class="graph-management-artifacts grid gap-6 lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)] lg:items-start"> <Card id="graph-management-schema-artifacts" - class="graph-management-schema-panel w-full shrink-0 lg:absolute lg:left-0 lg:top-0 lg:z-10 lg:w-[15.5rem]" + class="graph-management-schema-panel lg:sticky lg:top-4 lg:self-start" > <CardHeader class="pb-2"> <CardTitle class="text-sm font-semibold">Design Artifacts</CardTitle> @@ -1922,7 +1908,7 @@ watch(selectedOpsDataSourceId, () => { <div id="graph-management-artifact-detail" - class="graph-management-detail min-h-0 min-w-0 flex-1 space-y-6 overflow-y-auto overscroll-contain lg:pl-[calc(15.5rem+1.5rem)]" + class="graph-management-detail min-w-0 space-y-6" > <div v-if="selectedRailItemId === 'schema-entities'" class="min-w-0 space-y-2"> <GraphDesignEntitiesPanel diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 15d927f12..8d1e12f6e 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -435,10 +435,9 @@ describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { expect(manageWorkspaceVue).toContain('graph-management-session-pointers') expect(manageWorkspaceVue).toContain('graphManagementArtifactRowClass') expect(manageWorkspaceVue).toContain('schemaRailItems') - expect(manageWorkspaceVue).toContain('lg:absolute lg:left-0 lg:top-0') - expect(manageWorkspaceVue).toContain('graph-management-detail min-h-0 min-w-0 flex-1') - expect(manageWorkspaceVue).toContain('overflow-y-auto overscroll-contain') - expect(manageWorkspaceVue).toContain('lg:h-[calc(100dvh-11.5rem)]') + expect(manageWorkspaceVue).toContain('lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)]') + expect(manageWorkspaceVue).toContain('lg:sticky lg:top-4') + expect(manageWorkspaceVue).toContain('scrollIntoView') }) it('builds rail items with status and last-updated metadata', () => { From 022bfc14da6eeca296d51dc4dfa59ed55d191aa4 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 14:46:28 -0400 Subject: [PATCH 092/153] feat(manage-kg): improve GMA chat UX and repository workspace paths Use slugified data source names for repository-files folders, mint a fresh workload JWT on each chat turn for schema tools, and render assistant replies as Markdown while keeping the transcript scrolled to the bottom. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/executor.py | 47 +- .../kartograph_agent_runtime/server.py | 5 + src/agent-runtime/tests/test_executor.py | 14 +- .../application/chat_turn_service.py | 11 + .../application/repository_workspace_paths.py | 18 + .../application/schema_authoring_guide.py | 6 +- .../application/skill_resolution_service.py | 2 + .../sticky_session_runtime_service.py | 8 +- src/api/extraction/dependencies.py | 1 + .../domain/prepared_job_package_source.py | 15 + .../deterministic_chat_agent.py | 1 + .../prepared_job_package_reader.py | 41 +- .../remote_sticky_container_chat_agent.py | 5 +- .../sticky_session_bootstrap_builder.py | 13 +- .../sticky_session_workdir_materializer.py | 35 +- src/api/extraction/ports/chat_agent.py | 1 + .../extraction/ports/prepared_job_packages.py | 8 +- .../application/test_chat_turn_service.py | 50 + .../test_repository_workspace_paths.py | 16 + .../test_sticky_session_runtime_service.py | 15 +- .../test_prepared_job_package_reader.py | 14 +- ...est_sticky_session_workdir_materializer.py | 101 +- src/dev-ui/app/assets/css/main.css | 95 + .../extraction/SharedConversationPanel.vue | 109 +- .../knowledge-graph-manage-workspace.test.ts | 4 +- src/dev-ui/package-lock.json | 9739 +++++++++++++++++ src/dev-ui/package.json | 4 +- 27 files changed, 10222 insertions(+), 156 deletions(-) create mode 100644 src/api/extraction/application/repository_workspace_paths.py create mode 100644 src/api/extraction/domain/prepared_job_package_source.py create mode 100644 src/api/tests/unit/extraction/application/test_repository_workspace_paths.py create mode 100644 src/dev-ui/package-lock.json diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 0d4593e96..fed15aa19 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -55,22 +55,32 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: f"Workspace mount: `{settings.workspace_dir}`", ( "Prepared repository files live under " - "`repository-files/<job_package_id>/` relative to the workspace mount. " + "`repository-files/<data_source_name>/` relative to the workspace mount " + "(one folder per data source for this session's knowledge graph; folder " + "names are slugified data source names such as `hyperfleet-api`). " "Use Read, Grep, and Glob tools against those paths." ), ] for source in sources[:12]: if not isinstance(source, dict): continue - package_id = str(source.get("job_package_id") or "?") + data_source_name = str(source.get("data_source_name") or "?") + data_source_id = str(source.get("data_source_id") or "?") entry_count = source.get("entry_count", 0) + repository_folder = str(source.get("repository_folder") or "").strip() repository_root = str( - source.get("repository_root") or f"repository-files/{package_id}" + source.get("repository_root") + or ( + f"repository-files/{repository_folder}" + if repository_folder + else f"repository-files/{data_source_name}" + ) ) - data_source_id = str(source.get("data_source_id") or "?") + package_id = str(source.get("job_package_id") or "?") lines.append( f"- `{repository_root}`: {entry_count} file(s) " - f"(data source `{data_source_id}`)" + f"(data source `{data_source_name}`, id `{data_source_id}`, " + f"JobPackage `{package_id}`)" ) sample_paths = source.get("sample_paths") if isinstance(sample_paths, list): @@ -102,13 +112,14 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: f"Workspace mount: `{settings.workspace_dir}`", ( "Prepared repository files live under " - "`repository-files/<job_package_id>/` relative to the workspace mount. " + "`repository-files/<data_source_name>/` relative to the workspace mount " + "(one folder per data source; names are slugified data source names). " "Use Read, Grep, and Glob tools against those paths." ), ] for package_dir in package_dirs[:8]: files = sorted(path for path in package_dir.rglob("*") if path.is_file()) - lines.append(f"- `{package_dir.name}`: {len(files)} file(s)") + lines.append(f"- `repository-files/{package_dir.name}`: {len(files)} file(s)") for file_path in files[:4]: rel = file_path.relative_to(package_dir).as_posix() lines.append(f" - `{rel}`") @@ -274,6 +285,16 @@ async def _iter_sdk_messages_with_heartbeat( await pending +def _tooling_settings( + settings: AgentRuntimeSettings, + workload_token: str | None = None, +) -> AgentRuntimeSettings: + token = (workload_token or "").strip() + if not token: + return settings + return settings.model_copy(update={"workload_token": token}) + + async def stream_turn_events( *, settings: AgentRuntimeSettings, @@ -282,8 +303,10 @@ async def stream_turn_events( agent_configuration: dict[str, Any], message_history: list[dict[str, Any]], turn_timeout_seconds: float = _DEFAULT_TURN_TIMEOUT_SECONDS, + workload_token: str | None = None, ) -> AsyncIterator[dict[str, Any]]: - auth_mode = _apply_model_env(settings) + effective_settings = _tooling_settings(settings, workload_token) + auth_mode = _apply_model_env(effective_settings) yield { "type": "thinking", "recent": [ @@ -293,9 +316,9 @@ async def stream_turn_events( ], } - if settings.model_configured(): + if effective_settings.model_configured(): async for event in _stream_with_claude_sdk( - settings=settings, + settings=effective_settings, message=message, ui_mode=ui_mode, agent_configuration=agent_configuration, @@ -306,7 +329,7 @@ async def stream_turn_events( yield event return - tooling = RuntimeTooling(settings=settings) + tooling = RuntimeTooling(settings=effective_settings) skill_keys = ", ".join(sorted(agent_configuration.get("skills", {}).keys())[:4]) or "default" reply = ( f"**Graph Management Assistant ({ui_mode})**\n\n" @@ -315,7 +338,7 @@ async def stream_turn_events( "Configure Vertex AI (`CLAUDE_CODE_USE_VERTEX=1`, `ANTHROPIC_VERTEX_PROJECT_ID`, " "`CLOUD_ML_REGION`) or `ANTHROPIC_API_KEY` to enable live model execution. " "Graph and mutation tools are wired via " - f"`{settings.api_base_url}` using the injected workload token." + f"`{effective_settings.api_base_url}` using the injected workload token." ) if message.lower().startswith("search graph:"): slug = message.split(":", 1)[1].strip() diff --git a/src/agent-runtime/kartograph_agent_runtime/server.py b/src/agent-runtime/kartograph_agent_runtime/server.py index ccbeb96ce..82e414465 100644 --- a/src/agent-runtime/kartograph_agent_runtime/server.py +++ b/src/agent-runtime/kartograph_agent_runtime/server.py @@ -28,6 +28,10 @@ class TurnRequest(BaseModel): ui_mode: str = Field(default="initial-schema-design") agent_configuration: dict[str, Any] = Field(default_factory=dict) message_history: list[dict[str, Any]] = Field(default_factory=list) + workload_token: str | None = Field( + default=None, + description="Fresh scoped JWT for Kartograph schema/mutation tools (preferred over container env).", + ) def _workspace_ready() -> bool: @@ -66,6 +70,7 @@ async def event_stream() -> AsyncIterator[str]: agent_configuration=request.agent_configuration, message_history=request.message_history, turn_timeout_seconds=settings.turn_timeout_seconds, + workload_token=request.workload_token, ): if event.get("type") == "done": logger.info( diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index f95fb4d2f..93bade7e8 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -21,7 +21,7 @@ def test_build_workspace_prompt_appendix_prefers_sources_index(tmp_path: Path) -> None: package_id = "pkg-1" - package_root = tmp_path / "repository-files" / package_id / "pkg" / "api" + package_root = tmp_path / "repository-files" / "hyperfleet-api" / "pkg" / "api" package_root.mkdir(parents=True) (package_root / "adapter_status_types_test.go").write_text("package api\n", encoding="utf-8") (tmp_path / "sources-index.json").write_text( @@ -33,8 +33,10 @@ def test_build_workspace_prompt_appendix_prefers_sources_index(tmp_path: Path) - { "job_package_id": package_id, "data_source_id": "ds-hyperfleet-api", + "data_source_name": "Hyperfleet API", + "repository_folder": "hyperfleet-api", "entry_count": 142, - "repository_root": f"repository-files/{package_id}", + "repository_root": "repository-files/hyperfleet-api", "sample_paths": ["pkg/api/adapter_status_types_test.go"], } ], @@ -47,7 +49,8 @@ def test_build_workspace_prompt_appendix_prefers_sources_index(tmp_path: Path) - AgentRuntimeSettings(KARTOGRAPH_WORKSPACE_DIR=str(tmp_path)) ) - assert "ds-hyperfleet-api" in appendix + assert "hyperfleet-api" in appendix + assert "Hyperfleet API" in appendix assert "142 file(s)" in appendix assert "pkg/api/adapter_status_types_test.go" in appendix @@ -55,7 +58,7 @@ def test_build_workspace_prompt_appendix_prefers_sources_index(tmp_path: Path) - def test_build_workspace_prompt_appendix_lists_materialized_repository_files( tmp_path: Path, ) -> None: - package_root = tmp_path / "repository-files" / "pkg-1" / "pkg" / "api" + package_root = tmp_path / "repository-files" / "hyperfleet-api" / "pkg" / "api" package_root.mkdir(parents=True) (package_root / "adapter_status_types_test.go").write_text("package api\n", encoding="utf-8") @@ -63,7 +66,8 @@ def test_build_workspace_prompt_appendix_lists_materialized_repository_files( AgentRuntimeSettings(KARTOGRAPH_WORKSPACE_DIR=str(tmp_path)) ) - assert "repository-files/<job_package_id>/" in appendix + assert "repository-files/<data_source_name>/" in appendix + assert "hyperfleet-api" in appendix assert "pkg/api/adapter_status_types_test.go" in appendix diff --git a/src/api/extraction/application/chat_turn_service.py b/src/api/extraction/application/chat_turn_service.py index 84220026b..3fab81dbb 100644 --- a/src/api/extraction/application/chat_turn_service.py +++ b/src/api/extraction/application/chat_turn_service.py @@ -13,6 +13,7 @@ GraphManagementUiMode, SessionJobPackagePhase, ) +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer from extraction.ports.chat_agent import IExtractionChatAgent @@ -25,10 +26,12 @@ def __init__( session_service: ExtractionAgentSessionService, runtime_service: IStickySessionRuntimeService, chat_agent: IExtractionChatAgent, + credential_issuer: ScopedWorkloadCredentialIssuer | None = None, ) -> None: self._session_service = session_service self._runtime_service = runtime_service self._chat_agent = chat_agent + self._credential_issuer = credential_issuer async def stream_runtime_warmup( self, @@ -125,12 +128,20 @@ async def stream_chat_turn( ], } + workload_token: str | None = None + if self._credential_issuer is not None: + workload_token = self._credential_issuer.issue_for_sticky_session( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + ).token + assistant_reply: str | None = None stream_failed = False async for event in self._chat_agent.stream_turn( session=session, user_message=trimmed, ui_mode=ui_mode, + workload_token=workload_token, ): if event.get("type") == "thinking": recent = event.get("recent") diff --git a/src/api/extraction/application/repository_workspace_paths.py b/src/api/extraction/application/repository_workspace_paths.py new file mode 100644 index 000000000..e049386c5 --- /dev/null +++ b/src/api/extraction/application/repository_workspace_paths.py @@ -0,0 +1,18 @@ +"""Filesystem-safe folder names for sticky session repository materialization.""" + +from __future__ import annotations + +import re + +_UNSAFE_CHARS = re.compile(r"[^a-z0-9]+") +_MULTI_DASH = re.compile(r"-{2,}") + + +def repository_folder_for_data_source(*, name: str, data_source_id: str) -> str: + """Derive a stable, human-readable directory name for one data source.""" + slug = _UNSAFE_CHARS.sub("-", name.strip().lower()).strip("-") + slug = _MULTI_DASH.sub("-", slug) + if slug: + return slug + fallback = _UNSAFE_CHARS.sub("-", data_source_id.strip().lower()).strip("-") + return fallback or "data-source" diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 7c9c5f314..b063acb52 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -83,6 +83,8 @@ ## Repository context -Use Read/Grep/Glob on prepared JobPackage files under `repository-files/<job_package_id>/` -to infer domain concepts — then model them as ontology types, not as ad-hoc API discoveries. +Use Read/Grep/Glob on prepared JobPackage files under `repository-files/<data_source_name>/` +(one folder per connected data source for this knowledge graph; folder names are slugified +data source names such as `hyperfleet-api`, not other knowledge graphs) to infer domain +concepts — then model them as ontology types, not as ad-hoc API discoveries. """.strip() diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index d461a0055..182b9b6b3 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -38,6 +38,7 @@ class ResolvedExtractionSkillPack: "Never fabricate repository content or credentials.", "Keep recommendations scoped to the active knowledge graph.", "Use kartograph_* schema tools for ontology and JSONL mutations; never probe /management or /graph HTTP routes manually.", + "Format user-facing replies in GitHub-flavored Markdown (headings, lists, fenced code blocks, tables) for readability in the chat UI.", ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { @@ -55,6 +56,7 @@ class ResolvedExtractionSkillPack: "All write paths must remain mutation-log auditable.", "Treat schema edits as secondary unless explicitly requested.", "Avoid broad destructive changes without explicit confirmation.", + "Format user-facing replies in GitHub-flavored Markdown (headings, lists, fenced code blocks, tables) for readability in the chat UI.", ), }, } diff --git a/src/api/extraction/application/sticky_session_runtime_service.py b/src/api/extraction/application/sticky_session_runtime_service.py index 453c43e2d..52d49f24f 100644 --- a/src/api/extraction/application/sticky_session_runtime_service.py +++ b/src/api/extraction/application/sticky_session_runtime_service.py @@ -114,12 +114,13 @@ async def ensure_runtime_for_chat( readiness=readiness, gate=gate, ) - expected_package_ids = await self._bootstrap_builder.resolve_job_package_ids( + expected_packages = await self._bootstrap_builder.resolve_job_packages( knowledge_graph_id=knowledge_graph_id, include_job_packages=include_job_packages, ) stored_materialization = session.runtime_context.get("workspace_materialization", {}) stored_package_ids = tuple(stored_materialization.get("job_package_ids") or ()) + expected_package_ids = tuple(source.package_id for source in expected_packages) if ( await self._health_checker.is_healthy(runtime_base_url=runtime_base_url) and stored_package_ids == expected_package_ids @@ -232,7 +233,7 @@ async def _stream_prepare_runtime( readiness=readiness, gate=gate, ) - package_ids = await self._bootstrap_builder.resolve_job_package_ids( + job_packages = await self._bootstrap_builder.resolve_job_packages( knowledge_graph_id=knowledge_graph_id, include_job_packages=include_job_packages, ) @@ -243,7 +244,8 @@ async def _stream_prepare_runtime( include_job_packages=include_job_packages, ) session.runtime_context["workspace_materialization"] = { - "job_package_ids": list(package_ids), + "job_package_ids": [source.package_id for source in job_packages], + "repository_folders": [source.repository_folder for source in job_packages], } recent, event = thinking_event(recent, "Starting isolated Claude Agent SDK container") yield event diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index fbd2387fe..f36212610 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -127,4 +127,5 @@ def get_extraction_chat_turn_service( session_service=session_service, runtime_service=runtime_service, chat_agent=create_extraction_chat_agent(runtime_settings), + credential_issuer=get_workload_credential_issuer(), ) diff --git a/src/api/extraction/domain/prepared_job_package_source.py b/src/api/extraction/domain/prepared_job_package_source.py new file mode 100644 index 000000000..0aeaf0dc2 --- /dev/null +++ b/src/api/extraction/domain/prepared_job_package_source.py @@ -0,0 +1,15 @@ +"""Prepared JobPackage metadata for sticky session workspace materialization.""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class PreparedJobPackageSource: + """One materializable JobPackage snapshot for a data source.""" + + package_id: str + data_source_id: str + data_source_name: str + repository_folder: str diff --git a/src/api/extraction/infrastructure/deterministic_chat_agent.py b/src/api/extraction/infrastructure/deterministic_chat_agent.py index d1ebbd7eb..c2a1651d9 100644 --- a/src/api/extraction/infrastructure/deterministic_chat_agent.py +++ b/src/api/extraction/infrastructure/deterministic_chat_agent.py @@ -18,6 +18,7 @@ async def stream_turn( session: ExtractionAgentSession, user_message: str, ui_mode: GraphManagementUiMode, + workload_token: str | None = None, ) -> AsyncIterator[dict[str, Any]]: yield { "type": "thinking", diff --git a/src/api/extraction/infrastructure/prepared_job_package_reader.py b/src/api/extraction/infrastructure/prepared_job_package_reader.py index 1265dcf94..489c431ab 100644 --- a/src/api/extraction/infrastructure/prepared_job_package_reader.py +++ b/src/api/extraction/infrastructure/prepared_job_package_reader.py @@ -7,12 +7,14 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession +from extraction.application.repository_workspace_paths import repository_folder_for_data_source +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource from shared_kernel.job_package.reader import JobPackageReader from shared_kernel.job_package.value_objects import JobPackageId class SqlPreparedJobPackageReader: - """Reads latest materializable JobPackage ids from outbox events for one KG.""" + """Reads latest materializable JobPackage snapshots from outbox events for one KG.""" def __init__( self, @@ -25,19 +27,21 @@ def __init__( async def list_latest_for_knowledge_graph( self, *, knowledge_graph_id: str - ) -> tuple[str, ...]: + ) -> tuple[PreparedJobPackageSource, ...]: result = await self._session.execute( text( """ SELECT payload->>'data_source_id' AS data_source_id, payload->>'job_package_id' AS job_package_id, + ds.name AS data_source_name, occurred_at - FROM outbox - WHERE event_type IN ('IngestionPrepared', 'JobPackageProduced') + FROM outbox o + LEFT JOIN data_sources ds ON ds.id = payload->>'data_source_id' + WHERE o.event_type IN ('IngestionPrepared', 'JobPackageProduced') AND payload->>'knowledge_graph_id' = :knowledge_graph_id AND payload->>'job_package_id' IS NOT NULL - ORDER BY payload->>'data_source_id', occurred_at DESC + ORDER BY payload->>'data_source_id', o.occurred_at DESC """ ), {"knowledge_graph_id": knowledge_graph_id}, @@ -51,23 +55,36 @@ async def list_latest_for_knowledge_graph( continue by_source.setdefault(data_source_id, []).append(row) - selected: list[str] = [] + selected: list[PreparedJobPackageSource] = [] for data_source_id in sorted(by_source): - package_id = self._first_materializable_package_id( + source = self._first_materializable_source( + data_source_id=data_source_id, rows=by_source[data_source_id], ) - if package_id is not None: - selected.append(package_id) + if source is not None: + selected.append(source) return tuple(selected) - def _first_materializable_package_id(self, *, rows) -> str | None: + def _first_materializable_source( + self, *, data_source_id: str, rows + ) -> PreparedJobPackageSource | None: for row in rows: package_id = str(row.job_package_id or "").strip() if not package_id: continue - if self._package_has_repository_content(package_id): - return package_id + if not self._package_has_repository_content(package_id): + continue + data_source_name = str(row.data_source_name or "").strip() or data_source_id + return PreparedJobPackageSource( + package_id=package_id, + data_source_id=data_source_id, + data_source_name=data_source_name, + repository_folder=repository_folder_for_data_source( + name=data_source_name, + data_source_id=data_source_id, + ), + ) return None def _package_has_repository_content(self, package_id: str) -> bool: diff --git a/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py index b5f9d9eee..f6d78ed79 100644 --- a/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py +++ b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py @@ -33,6 +33,7 @@ async def stream_turn( session: ExtractionAgentSession, user_message: str, ui_mode: GraphManagementUiMode, + workload_token: str | None = None, ) -> AsyncIterator[dict[str, Any]]: sticky_runtime = session.runtime_context.get("sticky_runtime", {}) runtime_base_url = sticky_runtime.get("runtime_base_url") @@ -47,12 +48,14 @@ async def stream_turn( } return - payload = { + payload: dict[str, Any] = { "message": user_message, "ui_mode": ui_mode.value, "agent_configuration": session.runtime_context.get("agent_configuration", {}), "message_history": session.message_history[-20:], } + if workload_token and workload_token.strip(): + payload["workload_token"] = workload_token.strip() url = f"{runtime_base_url.rstrip('/')}/v1/turn" try: diff --git a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py index c646970c0..4193d1abf 100644 --- a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py +++ b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py @@ -10,6 +10,7 @@ ExtractionWorkloadRuntimeSettings, get_extraction_workload_runtime_settings, ) +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource from extraction.ports.prepared_job_packages import IPreparedJobPackageReader from extraction.ports.runtime import StickySessionRuntimeBootstrap @@ -30,13 +31,13 @@ def __init__( self._workdir_materializer = workdir_materializer self._runtime_settings = runtime_settings or get_extraction_workload_runtime_settings() - async def resolve_job_package_ids( + async def resolve_job_packages( self, *, knowledge_graph_id: str, include_job_packages: bool, - ) -> tuple[str, ...]: - """Return JobPackage IDs that would be materialized for one session.""" + ) -> tuple[PreparedJobPackageSource, ...]: + """Return JobPackage snapshots that would be materialized for one session.""" if not include_job_packages: return () return await self._prepared_job_package_reader.list_latest_for_knowledge_graph( @@ -54,15 +55,15 @@ async def build( if self._runtime_settings.backend != "container": return None - package_ids: tuple[str, ...] = () + job_packages: tuple[PreparedJobPackageSource, ...] = () if include_job_packages: - package_ids = await self._prepared_job_package_reader.list_latest_for_knowledge_graph( + job_packages = await self._prepared_job_package_reader.list_latest_for_knowledge_graph( knowledge_graph_id=knowledge_graph_id, ) host_session_work_dir = self._workdir_materializer.prepare( session_id=session_id, knowledge_graph_id=knowledge_graph_id, - job_package_ids=package_ids, + job_packages=job_packages, ) credentials = self._credential_issuer.issue_for_sticky_session( tenant_id=tenant_id, diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index bac5f08f3..8377098e4 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -7,6 +7,7 @@ import shutil import zipfile +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource from shared_kernel.job_package.path_safety import validate_zip_entry_name from shared_kernel.job_package.reader import JobPackageReader from shared_kernel.job_package.value_objects import JobPackageId @@ -32,7 +33,7 @@ def prepare( *, session_id: str, knowledge_graph_id: str, - job_package_ids: tuple[str, ...] = (), + job_packages: tuple[PreparedJobPackageSource, ...] = (), ) -> Path: """Create or refresh the host work directory for one sticky session.""" session_root = self._job_package_work_dir / "sticky-sessions" / session_id @@ -42,14 +43,11 @@ def prepare( _replace_directory(ingestion_context_dir) _replace_directory(repository_files_dir) - discovered = ( - self._discover_job_package_ids() - if job_package_ids is None - else job_package_ids - ) index_sources: list[dict[str, object]] = [] - for package_id in discovered: - archive_path = self._job_package_work_dir / JobPackageId(value=package_id).archive_name() + for source in job_packages: + archive_path = self._job_package_work_dir / JobPackageId( + value=source.package_id + ).archive_name() if not archive_path.exists(): continue reader = JobPackageReader(archive_path) @@ -57,19 +55,20 @@ def prepare( if manifest.entry_count <= 0: continue - package_dir = ingestion_context_dir / package_id + package_dir = ingestion_context_dir / source.package_id package_dir.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(archive_path) as archive: for entry_name in archive.namelist(): validate_zip_entry_name(entry_name) archive.extract(entry_name, path=package_dir) + repository_folder = source.repository_folder sample_paths: list[str] = [] for change in reader.iter_changeset(): if change.content_ref is None or not change.path: continue validate_zip_entry_name(change.path) - output_path = repository_files_dir / package_id / change.path + output_path = repository_files_dir / repository_folder / change.path output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_bytes(reader.read_content(change.content_ref)) if len(sample_paths) < 8: @@ -77,11 +76,13 @@ def prepare( index_sources.append( { - "job_package_id": package_id, - "data_source_id": manifest.data_source_id, + "job_package_id": source.package_id, + "data_source_id": source.data_source_id, + "data_source_name": source.data_source_name, + "repository_folder": repository_folder, "entry_count": manifest.entry_count, "sync_mode": str(manifest.sync_mode), - "repository_root": f"repository-files/{package_id}", + "repository_root": f"repository-files/{repository_folder}", "sample_paths": sample_paths, } ) @@ -95,14 +96,6 @@ def prepare( ) return session_root - def _discover_job_package_ids(self) -> tuple[str, ...]: - package_ids: list[str] = [] - for archive in sorted(self._job_package_work_dir.glob("job-package-*.zip")): - stem = archive.stem.removeprefix("job-package-") - if stem: - package_ids.append(stem) - return tuple(package_ids) - def _write_workspace_index( self, *, diff --git a/src/api/extraction/ports/chat_agent.py b/src/api/extraction/ports/chat_agent.py index 5729f4b4e..52f38893c 100644 --- a/src/api/extraction/ports/chat_agent.py +++ b/src/api/extraction/ports/chat_agent.py @@ -18,6 +18,7 @@ def stream_turn( session: ExtractionAgentSession, user_message: str, ui_mode: GraphManagementUiMode, + workload_token: str | None = None, ) -> AsyncIterator[dict[str, Any]]: """Yield NDJSON-style event dictionaries ending with a terminal done event.""" ... diff --git a/src/api/extraction/ports/prepared_job_packages.py b/src/api/extraction/ports/prepared_job_packages.py index 124b6768f..53c809570 100644 --- a/src/api/extraction/ports/prepared_job_packages.py +++ b/src/api/extraction/ports/prepared_job_packages.py @@ -4,12 +4,14 @@ from typing import Protocol +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource + class IPreparedJobPackageReader(Protocol): - """Read latest prepared JobPackage ids for one knowledge graph.""" + """Read latest prepared JobPackage snapshots for one knowledge graph.""" async def list_latest_for_knowledge_graph( self, *, knowledge_graph_id: str - ) -> tuple[str, ...]: - """Return latest JobPackage ids per data source for the knowledge graph.""" + ) -> tuple[PreparedJobPackageSource, ...]: + """Return latest materializable JobPackages per data source for the knowledge graph.""" ... diff --git a/src/api/tests/unit/extraction/application/test_chat_turn_service.py b/src/api/tests/unit/extraction/application/test_chat_turn_service.py index b579281c1..575db9e48 100644 --- a/src/api/tests/unit/extraction/application/test_chat_turn_service.py +++ b/src/api/tests/unit/extraction/application/test_chat_turn_service.py @@ -16,6 +16,7 @@ IngestionReadinessSnapshot, ) from extraction.infrastructure.deterministic_chat_agent import DeterministicExtractionChatAgent +from extraction.infrastructure.workload_credential_issuer import ScopedWorkloadCredentialIssuer from extraction.infrastructure.workload_runtime import InMemoryStickySessionRuntimeManager @@ -143,6 +144,55 @@ async def test_stream_chat_turn_persists_assistant_reply() -> None: assert active.runtime_context["sticky_runtime"]["container_id"] +class _TokenCapturingChatAgent(DeterministicExtractionChatAgent): + def __init__(self) -> None: + self.last_workload_token: str | None = None + + async def stream_turn(self, **kwargs): + self.last_workload_token = kwargs.get("workload_token") + async for event in super().stream_turn(**kwargs): + yield event + + +@pytest.mark.asyncio +async def test_stream_chat_turn_passes_fresh_workload_token_to_agent() -> None: + repo = _InMemoryAgentSessionRepository() + sticky = InMemoryStickySessionRuntimeManager() + session_service = ExtractionAgentSessionService(repository=repo) + runtime_service = StickySessionRuntimeService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader(IngestionReadinessSnapshot(1, 1)), + sticky_runtime_manager=sticky, + bootstrap_builder=_StaticBootstrapBuilder(), + health_checker=_InstantHealthChecker(), + runtime_backend="memory", + sticky_health_timeout_seconds=5.0, + ) + chat_agent = _TokenCapturingChatAgent() + service = ExtractionChatTurnService( + session_service=session_service, + runtime_service=runtime_service, + chat_agent=chat_agent, + credential_issuer=ScopedWorkloadCredentialIssuer(default_ttl=__import__("datetime").timedelta(minutes=5)), + ) + + events = [ + event + async for event in service.stream_chat_turn( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + message="Design entity types", + ) + ] + + assert events[-1]["ok"] is True + assert chat_agent.last_workload_token + + @pytest.mark.asyncio async def test_stream_chat_turn_wait_when_job_package_unprepared() -> None: service, repo = _build_chat_turn_service(readiness=IngestionReadinessSnapshot(2, 0)) diff --git a/src/api/tests/unit/extraction/application/test_repository_workspace_paths.py b/src/api/tests/unit/extraction/application/test_repository_workspace_paths.py new file mode 100644 index 000000000..a7d3b3496 --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_repository_workspace_paths.py @@ -0,0 +1,16 @@ +"""Unit tests for repository workspace folder naming.""" + +from extraction.application.repository_workspace_paths import repository_folder_for_data_source + + +def test_repository_folder_slugifies_data_source_name() -> None: + folder = repository_folder_for_data_source( + name="Hyperfleet API", + data_source_id="01JTESTDATASOURCE00000001", + ) + assert folder == "hyperfleet-api" + + +def test_repository_folder_falls_back_to_id_when_name_empty() -> None: + folder = repository_folder_for_data_source(name=" ", data_source_id="ds-abc") + assert folder == "ds-abc" diff --git a/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py index f16bc3e61..b52f47196 100644 --- a/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py +++ b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py @@ -69,7 +69,7 @@ async def read_for_knowledge_graph(self, *, knowledge_graph_id: str): class _StaticBootstrapBuilder: - async def resolve_job_package_ids(self, **kwargs): + async def resolve_job_packages(self, **kwargs): return () async def build(self, **kwargs): @@ -80,8 +80,17 @@ class _RecordingBootstrapBuilder: def __init__(self) -> None: self.calls: list[dict[str, object]] = [] - async def resolve_job_package_ids(self, **kwargs): - return ("pkg-1",) + async def resolve_job_packages(self, **kwargs): + from extraction.domain.prepared_job_package_source import PreparedJobPackageSource + + return ( + PreparedJobPackageSource( + package_id="pkg-1", + data_source_id="ds-1", + data_source_name="hyperfleet-api", + repository_folder="hyperfleet-api", + ), + ) async def build(self, **kwargs): self.calls.append(kwargs) diff --git a/src/api/tests/unit/extraction/infrastructure/test_prepared_job_package_reader.py b/src/api/tests/unit/extraction/infrastructure/test_prepared_job_package_reader.py index 320f91146..864d90327 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_prepared_job_package_reader.py +++ b/src/api/tests/unit/extraction/infrastructure/test_prepared_job_package_reader.py @@ -65,11 +65,13 @@ async def test_prefers_latest_non_empty_job_package_per_data_source( rows = [ MagicMock( data_source_id="ds-1", + data_source_name="Hyperfleet API", job_package_id=empty_id, occurred_at="2026-05-31T12:00:00Z", ), MagicMock( data_source_id="ds-1", + data_source_name="Hyperfleet API", job_package_id=full_id, occurred_at="2026-05-31T11:00:00Z", ), @@ -79,11 +81,14 @@ async def test_prefers_latest_non_empty_job_package_per_data_source( job_package_work_dir=tmp_path, ) - package_ids = await reader.list_latest_for_knowledge_graph( + sources = await reader.list_latest_for_knowledge_graph( knowledge_graph_id="kg-1", ) - assert package_ids == (full_id,) + assert len(sources) == 1 + assert sources[0].package_id == full_id + assert sources[0].data_source_name == "Hyperfleet API" + assert sources[0].repository_folder == "hyperfleet-api" async def test_skips_data_source_when_all_packages_are_empty(self, tmp_path: Path) -> None: empty_id = "01JEMPTY000000000000000000" @@ -91,6 +96,7 @@ async def test_skips_data_source_when_all_packages_are_empty(self, tmp_path: Pat rows = [ MagicMock( data_source_id="ds-1", + data_source_name="Hyperfleet API", job_package_id=empty_id, occurred_at="2026-05-31T12:00:00Z", ), @@ -100,8 +106,8 @@ async def test_skips_data_source_when_all_packages_are_empty(self, tmp_path: Pat job_package_work_dir=tmp_path, ) - package_ids = await reader.list_latest_for_knowledge_graph( + sources = await reader.list_latest_for_knowledge_graph( knowledge_graph_id="kg-1", ) - assert package_ids == () + assert sources == () diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index f9332d126..a4fc852f0 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -5,6 +5,7 @@ import json from pathlib import Path +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource from shared_kernel.job_package.builder import JobPackageBuilder from shared_kernel.job_package.value_objects import ( AdapterCheckpoint, @@ -20,97 +21,103 @@ ) -def _build_package(work_dir: Path, package_id: str) -> None: - content_bytes = b"# hello\n" - content_ref = ContentRef.from_bytes(content_bytes) +def _source( + *, + package_id: str, + data_source_id: str = "ds-1", + data_source_name: str = "hyperfleet-api", +) -> PreparedJobPackageSource: + return PreparedJobPackageSource( + package_id=package_id, + data_source_id=data_source_id, + data_source_name=data_source_name, + repository_folder=data_source_name.lower().replace(" ", "-"), + ) + + +def _build_package(work_dir: Path, package_id: str, *, with_file: bool) -> None: builder = JobPackageBuilder( data_source_id="ds-1", knowledge_graph_id="kg-1", sync_mode=SyncMode.FULL_REFRESH, package_id=JobPackageId(value=package_id), ) - ref = builder.add_content(content_bytes) - builder.add_changeset_entry( - ChangesetEntry( - operation=ChangeOperation.ADD, - id="file-1", - type="io.kartograph.change.file", - path="README.md", - content_ref=ref, - content_type="text/markdown", - metadata={}, + if with_file: + content = b"print('hello')\n" + ref = builder.add_content(content) + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id="file-1", + type="io.kartograph.change.file", + path="pkg/api/example.go", + content_ref=ref, + content_type="text/plain", + metadata={}, + ) ) - ) - builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={})) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "abc"})) builder.build(work_dir) def test_materializer_extracts_job_package_into_session_workspace(tmp_path: Path) -> None: package_id = "01JTESTPACK0000000000000000" - _build_package(tmp_path, package_id) + _build_package(tmp_path, package_id, with_file=True) materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) session_root = materializer.prepare( session_id="session-1", knowledge_graph_id="kg-1", - job_package_ids=(package_id,), + job_packages=(_source(package_id=package_id),), ) - repo_file = session_root / "repository-files" / package_id / "README.md" - assert repo_file.read_text(encoding="utf-8") == "# hello\n" + repo_file = session_root / "repository-files" / "hyperfleet-api" / "pkg/api/example.go" + assert repo_file.read_text(encoding="utf-8") == "print('hello')\n" -def test_materializer_does_not_discover_archives_when_package_ids_empty(tmp_path: Path) -> None: +def test_materializer_does_not_materialize_when_job_packages_empty(tmp_path: Path) -> None: package_id = "01JTESTPACK0000000000000001" - _build_package(tmp_path, package_id) + _build_package(tmp_path, package_id, with_file=True) materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) session_root = materializer.prepare( session_id="session-2", knowledge_graph_id="kg-1", - job_package_ids=(), + job_packages=(), ) assert not any((session_root / "repository-files").iterdir()) -def _build_empty_package(work_dir: Path, package_id: str) -> None: - builder = JobPackageBuilder( - data_source_id="ds-empty", - knowledge_graph_id="kg-1", - sync_mode=SyncMode.INCREMENTAL, - package_id=JobPackageId(value=package_id), - ) - builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "abc"})) - builder.build(work_dir) - - def test_materializer_skips_empty_job_packages(tmp_path: Path) -> None: empty_id = "01JEMPTY000000000000000000" full_id = "01JTESTPACK0000000000000003" - _build_empty_package(tmp_path, empty_id) - _build_package(tmp_path, full_id) + _build_package(tmp_path, empty_id, with_file=False) + _build_package(tmp_path, full_id, with_file=True) materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) session_root = materializer.prepare( session_id="session-empty", knowledge_graph_id="kg-1", - job_package_ids=(empty_id, full_id), + job_packages=( + _source(package_id=empty_id, data_source_name="empty-source"), + _source(package_id=full_id, data_source_name="hyperfleet-api"), + ), ) - assert not (session_root / "repository-files" / empty_id).exists() - assert (session_root / "repository-files" / full_id / "README.md").exists() + assert not (session_root / "repository-files" / "empty-source").exists() + assert (session_root / "repository-files" / "hyperfleet-api" / "pkg/api/example.go").exists() def test_materializer_writes_sources_index(tmp_path: Path) -> None: package_id = "01JTESTPACK0000000000000004" - _build_package(tmp_path, package_id) + _build_package(tmp_path, package_id, with_file=True) materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) session_root = materializer.prepare( session_id="session-index", knowledge_graph_id="kg-1", - job_package_ids=(package_id,), + job_packages=(_source(package_id=package_id, data_source_name="Hyperfleet E2E"),), ) index_path = session_root / "sources-index.json" @@ -120,25 +127,29 @@ def test_materializer_writes_sources_index(tmp_path: Path) -> None: assert len(payload["sources"]) == 1 source = payload["sources"][0] assert source["job_package_id"] == package_id + assert source["data_source_name"] == "Hyperfleet E2E" + assert source["repository_folder"] == "hyperfleet-e2e" assert source["entry_count"] == 1 - assert source["sample_paths"] == ["README.md"] + assert source["sample_paths"] == ["pkg/api/example.go"] + assert source["repository_root"] == "repository-files/hyperfleet-e2e" def test_materializer_refresh_preserves_session_root_directory(tmp_path: Path) -> None: package_id = "01JTESTPACK0000000000000002" - _build_package(tmp_path, package_id) + _build_package(tmp_path, package_id, with_file=True) materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) + packages = (_source(package_id=package_id),) first_root = materializer.prepare( session_id="session-3", knowledge_graph_id="kg-1", - job_package_ids=(package_id,), + job_packages=packages, ) second_root = materializer.prepare( session_id="session-3", knowledge_graph_id="kg-1", - job_package_ids=(package_id,), + job_packages=packages, ) assert first_root == second_root - assert (second_root / "repository-files" / package_id / "README.md").exists() + assert (second_root / "repository-files" / "hyperfleet-api" / "pkg/api/example.go").exists() diff --git a/src/dev-ui/app/assets/css/main.css b/src/dev-ui/app/assets/css/main.css index b07e56d90..4540ddb2c 100644 --- a/src/dev-ui/app/assets/css/main.css +++ b/src/dev-ui/app/assets/css/main.css @@ -127,3 +127,98 @@ @apply bg-background text-foreground; } } + +@layer components { + .chat-md { + @apply text-sm leading-relaxed text-foreground; + } + + .chat-md > :first-child { + margin-top: 0; + } + + .chat-md > :last-child { + margin-bottom: 0; + } + + .chat-md h1, + .chat-md h2, + .chat-md h3, + .chat-md h4 { + @apply font-semibold tracking-tight text-foreground; + } + + .chat-md h1 { + @apply mt-4 mb-2 text-lg; + } + + .chat-md h2 { + @apply mt-4 mb-2 text-base; + } + + .chat-md h3 { + @apply mt-3 mb-1.5 text-sm; + } + + .chat-md h4 { + @apply mt-2 mb-1 text-sm; + } + + .chat-md p { + @apply my-2; + } + + .chat-md ul, + .chat-md ol { + @apply my-2 ml-5 space-y-1; + } + + .chat-md ul { + @apply list-disc; + } + + .chat-md ol { + @apply list-decimal; + } + + .chat-md li > p { + @apply my-1; + } + + .chat-md blockquote { + @apply my-3 border-l-2 border-amber-500/60 pl-3 text-muted-foreground italic; + } + + .chat-md hr { + @apply my-4 border-border; + } + + .chat-md a { + @apply font-medium text-primary underline underline-offset-2 hover:text-primary/90; + } + + .chat-md code { + @apply rounded bg-muted px-1 py-0.5 font-mono text-[0.85em] text-foreground; + } + + .chat-md pre { + @apply my-3 overflow-x-auto rounded-lg border border-border/80 bg-muted/60 p-3; + } + + .chat-md pre code { + @apply bg-transparent p-0 text-xs leading-relaxed; + } + + .chat-md table { + @apply my-3 w-full border-collapse text-left text-xs; + } + + .chat-md th, + .chat-md td { + @apply border border-border px-2 py-1 align-top; + } + + .chat-md th { + @apply bg-muted/50 font-semibold; + } +} diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index 6e143ae21..dcf835af8 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -1,5 +1,7 @@ <script setup lang="ts"> import { computed, nextTick, onMounted, ref, watch } from 'vue' +import DOMPurify from 'isomorphic-dompurify' +import { marked } from 'marked' import { Bot, Loader2, RefreshCw, RotateCcw, Send, Sparkles, User } from 'lucide-vue-next' import { normalizeThinkingActivityLines, @@ -79,12 +81,33 @@ const chatScrollRef = ref<HTMLElement | null>(null) const textareaRef = ref<HTMLTextAreaElement | null>(null) const composerInputId = 'graph-management-chat-input' +marked.setOptions({ gfm: true, breaks: true }) + const messageHistory = computed(() => props.session?.message_history ?? []) -const chatInputDisabled = computed( - () => props.loading || props.clearing || props.sending || props.inputDisabled || props.forbidden, +const showInitialConversationLoading = computed( + () => props.loading && messageHistory.value.length === 0, +) + +const showConversationRefreshIndicator = computed( + () => props.loading && messageHistory.value.length > 0, +) + +const composerBlocked = computed( + () => props.loading || props.clearing || props.inputDisabled || props.forbidden, +) + +const chatSendDisabled = computed( + () => composerBlocked.value || props.sending || !props.draftMessage.trim(), ) +const sendDisabledReason = computed(() => { + if (props.sending) { + return 'Wait for the assistant to finish this turn before sending.' + } + return props.inputDisabledReason ?? undefined +}) + const showRuntimeActivity = computed( () => props.preparingRuntime || props.sending, ) @@ -108,10 +131,14 @@ function messageText(entry: ConversationEntry): string { } function scrollToBottom() { - const el = chatScrollRef.value - if (el) { - el.scrollTop = el.scrollHeight - } + requestAnimationFrame(() => { + requestAnimationFrame(() => { + const el = chatScrollRef.value + if (el) { + el.scrollTop = el.scrollHeight + } + }) + }) } function adjustTextareaHeight() { @@ -130,32 +157,19 @@ function adjustTextareaHeight() { function handleComposerEnter(event: KeyboardEvent) { if (event.shiftKey) return - if (chatInputDisabled.value || !props.draftMessage.trim()) return + if (chatSendDisabled.value) return event.preventDefault() sendDraftMessage() } -function renderAssistantHtml(text: string): string { - let s = text.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>') - s = s.replace(/\*\*([^*]+)\*\*/g, '<strong class="font-semibold text-foreground">$1</strong>') - s = s.replace( - /`([^`]+)`/g, - '<code class="rounded bg-muted px-1 py-0.5 text-xs font-mono text-foreground">$1</code>', - ) - s = s.replace( - /^> (.+)$/gm, - '<p class="my-2 border-l-2 border-amber-500/60 pl-3 text-sm text-muted-foreground italic">$1</p>', - ) - s = s.replace( - /\[([^\]]+)\]\(([^)]+)\)/g, - '<a class="text-primary font-medium underline underline-offset-2 hover:text-primary/90" href="$2">$1</a>', - ) - s = s.replace(/## (.+)$/gm, '<h3 class="text-base font-semibold mt-3 mb-1 text-foreground">$1</h3>') - s = s.replace(/### (.+)$/gm, '<h4 class="text-sm font-semibold mt-2 text-foreground">$1</h4>') - s = s.replace(/^---$/gm, '<hr class="my-3 border-border" />') - s = s.replace(/\n\n+/g, '<br /><br />') - s = s.replace(/\n/g, '<br />') - return s +function renderAssistantMarkdown(text: string): string { + const trimmed = text.trim() + if (!trimmed) return '' + const raw = marked.parse(trimmed, { async: false }) as string + return DOMPurify.sanitize(raw, { + USE_PROFILES: { html: true }, + ADD_ATTR: ['target', 'rel', 'class'], + }) } function confirmClearChat() { @@ -165,19 +179,28 @@ function confirmClearChat() { function sendDraftMessage() { const trimmed = props.draftMessage.trim() - if (!trimmed || chatInputDisabled.value) return + if (!trimmed || chatSendDisabled.value) return emit('sendMessage', trimmed) emit('update:draftMessage', '') void nextTick(() => adjustTextareaHeight()) } watch( - () => [messageHistory.value.length, props.activityLines, props.sending], + () => props.session, async () => { await nextTick() scrollToBottom() }, - { deep: true }, + { deep: true, flush: 'post' }, +) + +watch( + () => [props.activityLines, props.sending, props.loading, showRuntimeActivity.value], + async () => { + await nextTick() + scrollToBottom() + }, + { deep: true, flush: 'post' }, ) watch( @@ -195,7 +218,10 @@ watch( ) onMounted(() => { - void nextTick(() => adjustTextareaHeight()) + void nextTick(() => { + adjustTextareaHeight() + scrollToBottom() + }) }) </script> @@ -265,7 +291,7 @@ onMounted(() => { class="min-h-[14rem] max-h-[min(32rem,60vh)] space-y-4 overflow-y-auto bg-muted/10 px-4 py-4 sm:px-6" > <div - v-if="loading" + v-if="showInitialConversationLoading" class="flex flex-col items-center justify-center gap-3 py-12 text-muted-foreground" aria-busy="true" aria-live="polite" @@ -274,6 +300,15 @@ onMounted(() => { <p class="text-center text-sm text-foreground/80">Loading conversation session…</p> </div> <template v-else> + <div + v-if="showConversationRefreshIndicator" + class="flex items-center justify-center gap-2 py-1 text-xs text-muted-foreground" + aria-busy="true" + aria-live="polite" + > + <Loader2 class="size-3.5 shrink-0 animate-spin" /> + <span>Refreshing conversation…</span> + </div> <div v-for="(entry, idx) in messageHistory" :key="`msg-${idx}-${entry.role ?? 'unknown'}`" @@ -305,7 +340,7 @@ onMounted(() => { <div v-else class="chat-md space-y-1 break-words [&_a]:break-all [&_code]:break-all" - v-html="renderAssistantHtml(messageText(entry))" + v-html="renderAssistantMarkdown(messageText(entry))" /> </div> </div> @@ -366,7 +401,7 @@ onMounted(() => { ref="textareaRef" :value="draftMessage" rows="1" - :disabled="chatInputDisabled" + :disabled="composerBlocked" :placeholder="inputPlaceholder" class="w-full flex-1 resize-none rounded-md border border-input bg-background px-3 py-2 text-sm leading-relaxed shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring disabled:opacity-50" @input="emit('update:draftMessage', ($event.target as HTMLTextAreaElement).value)" @@ -375,8 +410,8 @@ onMounted(() => { <Button type="button" class="h-10 min-h-10 w-full shrink-0 sm:w-auto sm:px-6" - :disabled="chatInputDisabled || !draftMessage.trim()" - :title="inputDisabledReason ?? undefined" + :disabled="chatSendDisabled" + :title="sendDisabledReason" @click="sendDraftMessage" > <Loader2 v-if="sending" class="size-4 animate-spin" /> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 8d1e12f6e..6cacbc39d 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -371,7 +371,9 @@ describe('Shared conversation panel - extraction UX contract', () => { expect(sharedConversationPanelVue).toContain('normalizeThinkingActivityLines') expect(sharedConversationPanelVue).toContain('THINKING_DISPLAY_LINE_COUNT') expect(sharedConversationPanelVue).toContain('chatScrollRef') - expect(sharedConversationPanelVue).toContain('renderAssistantHtml') + expect(sharedConversationPanelVue).toContain('renderAssistantMarkdown') + expect(sharedConversationPanelVue).toContain('showInitialConversationLoading') + expect(sharedConversationPanelVue).toContain('showConversationRefreshIndicator') expect(sharedConversationPanelVue).toContain('scrollToBottom') expect(sharedConversationPanelVue).toContain('el.scrollTop = el.scrollHeight') }) diff --git a/src/dev-ui/package-lock.json b/src/dev-ui/package-lock.json new file mode 100644 index 000000000..0f4dffae0 --- /dev/null +++ b/src/dev-ui/package-lock.json @@ -0,0 +1,9739 @@ +{ + "name": "ui", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "ui", + "hasInstallScript": true, + "dependencies": { + "@codemirror/autocomplete": "^6.20.0", + "@codemirror/commands": "^6.10.2", + "@codemirror/lang-json": "^6.0.2", + "@codemirror/language": "^6.12.1", + "@codemirror/lint": "^6.9.4", + "@codemirror/search": "^6.6.0", + "@codemirror/state": "^6.5.4", + "@codemirror/view": "^6.39.14", + "@cosmograph/cosmograph": "2.0.1", + "@lezer/common": "^1.5.1", + "@lezer/highlight": "^1.2.3", + "@tailwindcss/vite": "^4.1.18", + "@tanstack/vue-table": "^8.21.3", + "@tanstack/vue-virtual": "^3.13.18", + "@vueuse/core": "^14.2.1", + "class-variance-authority": "^0.7.1", + "clsx": "^2.1.1", + "codemirror": "^6.0.2", + "cytoscape": "^3.33.1", + "cytoscape-cise": "^2.0.1", + "cytoscape-fcose": "^2.2.0", + "isomorphic-dompurify": "^2.36.0", + "lucide-vue-next": "^0.563.0", + "marked": "^15.0.12", + "nuxt": "^4.3.1", + "oidc-client-ts": "^3.4.1", + "reka-ui": "^2.8.0", + "tailwind-merge": "^3.4.0", + "tailwindcss": "^4.1.18", + "vue": "^3.5.28", + "vue-router": "^4.6.4", + "vue-sonner": "^2.0.9" + }, + "devDependencies": { + "@types/cytoscape": "^3.31.0", + "@vitejs/plugin-vue": "^5.2.1", + "@vue/test-utils": "^2.4.6", + "happy-dom": "^15.11.7", + "tw-animate-css": "^1.4.0", + "typescript": "^5.8.3", + "vitest": "^2.1.9", + "vue-tsc": "^2.2.10" + } + }, + "node_modules/@acemir/cssom": { + "version": "0.9.31", + "resolved": "https://registry.npmjs.org/@acemir/cssom/-/cssom-0.9.31.tgz", + "integrity": "sha512-ZnR3GSaH+/vJ0YlHau21FjfLYjMpYVIzTD8M8vIEQvIGxeOXyXdzCI140rrCY862p/C/BbzWsjc1dgnM9mkoTA==", + "license": "MIT" + }, + "node_modules/@asamuzakjp/css-color": { + "version": "5.1.11", + "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-5.1.11.tgz", + "integrity": "sha512-KVw6qIiCTUQhByfTd78h2yD1/00waTmm9uy/R7Ck/ctUyAPj+AEDLkQIdJW0T8+qGgj3j5bpNKK7Q3G+LedJWg==", + "license": "MIT", + "dependencies": { + "@asamuzakjp/generational-cache": "^1.0.1", + "@csstools/css-calc": "^3.2.0", + "@csstools/css-color-parser": "^4.1.0", + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + } + }, + "node_modules/@asamuzakjp/dom-selector": { + "version": "6.8.1", + "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-6.8.1.tgz", + "integrity": "sha512-MvRz1nCqW0fsy8Qz4dnLIvhOlMzqDVBabZx6lH+YywFDdjXhMY37SmpV1XFX3JzG5GWHn63j6HX6QPr3lZXHvQ==", + "license": "MIT", + "dependencies": { + "@asamuzakjp/nwsapi": "^2.3.9", + "bidi-js": "^1.0.3", + "css-tree": "^3.1.0", + "is-potential-custom-element-name": "^1.0.1", + "lru-cache": "^11.2.6" + } + }, + "node_modules/@asamuzakjp/dom-selector/node_modules/lru-cache": { + "version": "11.5.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.5.1.tgz", + "integrity": "sha512-RPimw/7aMdv2oqRrxKwvZXcPfwBrn/JZ2xYcY9Hus/6LaS3VOAKVWKWgNLCFSiOm1ESXinjsDlidVU7JlnCN2A==", + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@asamuzakjp/generational-cache": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@asamuzakjp/generational-cache/-/generational-cache-1.0.1.tgz", + "integrity": "sha512-wajfB8KqzMCN2KGNFdLkReeHncd0AslUSrvHVvvYWuU8ghncRJoA50kT3zP9MVL0+9g4/67H+cdvBskj9THPzg==", + "license": "MIT", + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + } + }, + "node_modules/@asamuzakjp/nwsapi": { + "version": "2.3.9", + "resolved": "https://registry.npmjs.org/@asamuzakjp/nwsapi/-/nwsapi-2.3.9.tgz", + "integrity": "sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==", + "license": "MIT" + }, + "node_modules/@babel/code-frame": { + "version": "7.29.0", + "license": "MIT", + "dependencies": { + "@babel/helper-validator-identifier": "^7.28.5", + "js-tokens": "^4.0.0", + "picocolors": "^1.1.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/compat-data": { + "version": "7.29.3", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/core": { + "version": "7.29.0", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.29.0", + "@babel/generator": "^7.29.0", + "@babel/helper-compilation-targets": "^7.28.6", + "@babel/helper-module-transforms": "^7.28.6", + "@babel/helpers": "^7.28.6", + "@babel/parser": "^7.29.0", + "@babel/template": "^7.28.6", + "@babel/traverse": "^7.29.0", + "@babel/types": "^7.29.0", + "@jridgewell/remapping": "^2.3.5", + "convert-source-map": "^2.0.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.2.3", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/babel" + } + }, + "node_modules/@babel/core/node_modules/semver": { + "version": "6.3.1", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/@babel/generator": { + "version": "7.29.1", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.29.0", + "@babel/types": "^7.29.0", + "@jridgewell/gen-mapping": "^0.3.12", + "@jridgewell/trace-mapping": "^0.3.28", + "jsesc": "^3.0.2" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-annotate-as-pure": { + "version": "7.27.3", + "license": "MIT", + "dependencies": { + "@babel/types": "^7.27.3" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-compilation-targets": { + "version": "7.28.6", + "license": "MIT", + "dependencies": { + "@babel/compat-data": "^7.28.6", + "@babel/helper-validator-option": "^7.27.1", + "browserslist": "^4.24.0", + "lru-cache": "^5.1.1", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-compilation-targets/node_modules/semver": { + "version": "6.3.1", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/@babel/helper-create-class-features-plugin": { + "version": "7.29.3", + "license": "MIT", + "dependencies": { + "@babel/helper-annotate-as-pure": "^7.27.3", + "@babel/helper-member-expression-to-functions": "^7.28.5", + "@babel/helper-optimise-call-expression": "^7.27.1", + "@babel/helper-replace-supers": "^7.28.6", + "@babel/helper-skip-transparent-expression-wrappers": "^7.27.1", + "@babel/traverse": "^7.29.0", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-create-class-features-plugin/node_modules/semver": { + "version": "6.3.1", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/@babel/helper-globals": { + "version": "7.28.0", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-member-expression-to-functions": { + "version": "7.28.5", + "license": "MIT", + "dependencies": { + "@babel/traverse": "^7.28.5", + "@babel/types": "^7.28.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-imports": { + "version": "7.28.6", + "license": "MIT", + "dependencies": { + "@babel/traverse": "^7.28.6", + "@babel/types": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-transforms": { + "version": "7.28.6", + "license": "MIT", + "dependencies": { + "@babel/helper-module-imports": "^7.28.6", + "@babel/helper-validator-identifier": "^7.28.5", + "@babel/traverse": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-optimise-call-expression": { + "version": "7.27.1", + "license": "MIT", + "dependencies": { + "@babel/types": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-plugin-utils": { + "version": "7.28.6", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-replace-supers": { + "version": "7.28.6", + "license": "MIT", + "dependencies": { + "@babel/helper-member-expression-to-functions": "^7.28.5", + "@babel/helper-optimise-call-expression": "^7.27.1", + "@babel/traverse": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-skip-transparent-expression-wrappers": { + "version": "7.27.1", + "license": "MIT", + "dependencies": { + "@babel/traverse": "^7.27.1", + "@babel/types": "^7.27.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.27.1", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.28.5", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-option": { + "version": "7.27.1", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helpers": { + "version": "7.29.2", + "license": "MIT", + "dependencies": { + "@babel/template": "^7.28.6", + "@babel/types": "^7.29.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.29.3", + "license": "MIT", + "dependencies": { + "@babel/types": "^7.29.0" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/plugin-syntax-jsx": { + "version": "7.28.6", + "license": "MIT", + "dependencies": { + "@babel/helper-plugin-utils": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-syntax-typescript": { + "version": "7.28.6", + "license": "MIT", + "dependencies": { + "@babel/helper-plugin-utils": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/plugin-transform-typescript": { + "version": "7.28.6", + "license": "MIT", + "dependencies": { + "@babel/helper-annotate-as-pure": "^7.27.3", + "@babel/helper-create-class-features-plugin": "^7.28.6", + "@babel/helper-plugin-utils": "^7.28.6", + "@babel/helper-skip-transparent-expression-wrappers": "^7.27.1", + "@babel/plugin-syntax-typescript": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@babel/template": { + "version": "7.28.6", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.28.6", + "@babel/parser": "^7.28.6", + "@babel/types": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse": { + "version": "7.29.0", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.29.0", + "@babel/generator": "^7.29.0", + "@babel/helper-globals": "^7.28.0", + "@babel/parser": "^7.29.0", + "@babel/template": "^7.28.6", + "@babel/types": "^7.29.0", + "debug": "^4.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/types": { + "version": "7.29.0", + "license": "MIT", + "dependencies": { + "@babel/helper-string-parser": "^7.27.1", + "@babel/helper-validator-identifier": "^7.28.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@bramus/specificity": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/@bramus/specificity/-/specificity-2.4.2.tgz", + "integrity": "sha512-ctxtJ/eA+t+6q2++vj5j7FYX3nRu311q1wfYH3xjlLOsczhlhxAg2FWNUXhpGvAw3BWo1xBcvOV6/YLc2r5FJw==", + "license": "MIT", + "dependencies": { + "css-tree": "^3.0.0" + }, + "bin": { + "specificity": "bin/cli.js" + } + }, + "node_modules/@clack/core": { + "version": "1.3.1", + "license": "MIT", + "dependencies": { + "fast-wrap-ansi": "^0.2.0", + "sisteransi": "^1.0.5" + }, + "engines": { + "node": ">= 20.12.0" + } + }, + "node_modules/@clack/prompts": { + "version": "1.4.0", + "license": "MIT", + "dependencies": { + "@clack/core": "1.3.1", + "fast-string-width": "^3.0.2", + "fast-wrap-ansi": "^0.2.0", + "sisteransi": "^1.0.5" + }, + "engines": { + "node": ">= 20.12.0" + } + }, + "node_modules/@cloudflare/kv-asset-handler": { + "version": "0.4.2", + "license": "MIT OR Apache-2.0", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@codemirror/autocomplete": { + "version": "6.20.2", + "license": "MIT", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.17.0", + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@codemirror/commands": { + "version": "6.10.3", + "license": "MIT", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.6.0", + "@codemirror/view": "^6.27.0", + "@lezer/common": "^1.1.0" + } + }, + "node_modules/@codemirror/lang-json": { + "version": "6.0.2", + "license": "MIT", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@lezer/json": "^1.0.0" + } + }, + "node_modules/@codemirror/language": { + "version": "6.12.3", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.23.0", + "@lezer/common": "^1.5.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0", + "style-mod": "^4.0.0" + } + }, + "node_modules/@codemirror/lint": { + "version": "6.9.6", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.42.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/search": { + "version": "6.7.0", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.37.0", + "crelt": "^1.0.5" + } + }, + "node_modules/@codemirror/state": { + "version": "6.6.0", + "license": "MIT", + "dependencies": { + "@marijn/find-cluster-break": "^1.0.0" + } + }, + "node_modules/@codemirror/view": { + "version": "6.43.0", + "license": "MIT", + "dependencies": { + "@codemirror/state": "^6.6.0", + "crelt": "^1.0.6", + "style-mod": "^4.1.0", + "w3c-keyname": "^2.2.4" + } + }, + "node_modules/@colordx/core": { + "version": "5.4.3", + "license": "MIT" + }, + "node_modules/@cosmograph/cosmograph": { + "version": "2.0.1", + "license": "CC-BY-NC-4.0", + "dependencies": { + "@cosmograph/ui": "2.0.1", + "@cosmos.gl/graph": "^2.6.2-rc.0", + "@duckdb/duckdb-wasm": "1.29.1-dev260.0", + "@interacta/css-labels": "^0.1.3-beta.1", + "@uwdata/mosaic-core": "^0.21.1", + "@uwdata/mosaic-plot": "^0.21.1", + "@uwdata/mosaic-sql": "^0.21.1", + "@uwdata/vgplot": "^0.21.1", + "apache-arrow": "17.0.0", + "d3-array": "^3.2.4", + "d3-brush": "^3.0.0", + "d3-color": "^3.1.0", + "d3-interpolate": "^3.0.1", + "d3-scale": "^4.0.2", + "d3-selection": "^3.0.0", + "dompurify": "^3.2.6" + } + }, + "node_modules/@cosmograph/ui": { + "version": "2.0.1", + "license": "CC-BY-NC-4.0", + "dependencies": { + "@juggle/resize-observer": "^3.4.0", + "d3-array": "^3.2.4", + "d3-axis": "^3.0.0", + "d3-brush": "^3.0.0", + "d3-format": "^3.1.0", + "d3-scale": "^4.0.2", + "d3-selection": "^3.0.0", + "d3-time-format": "^4.1.0", + "d3-transition": "^3.0.1" + } + }, + "node_modules/@cosmos.gl/graph": { + "version": "2.6.4", + "license": "MIT", + "dependencies": { + "d3-array": "^3.2.0", + "d3-color": "^3.1.0", + "d3-drag": "^3.0.0", + "d3-ease": "^3.0.1", + "d3-scale": "^4.0.2", + "d3-selection": "^3.0.0", + "d3-transition": "^3.0.1", + "d3-zoom": "^3.0.0", + "dompurify": "^3.2.6", + "gl-bench": "^1.0.42", + "gl-matrix": "^3.4.3", + "random": "^4.1.0", + "regl": "^2.1.0" + }, + "engines": { + "node": ">=12.2.0", + "npm": ">=7.0.0" + } + }, + "node_modules/@csstools/color-helpers": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-6.0.2.tgz", + "integrity": "sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/@csstools/css-calc": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-3.2.1.tgz", + "integrity": "sha512-DtdHlgXh5ZkA43cwBcAm+huzgJiwx3ZTWVjBs94kwz2xKqSimDA3lBgCjphYgwgVUMWatSM0pDd8TILB1yrVVg==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-color-parser": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-4.1.1.tgz", + "integrity": "sha512-eZ5XOtyhK+mggRafYUWzA0tvaYOFgdY8AkgQiCJF9qNAePnUo/zmsqqYubBBb3sQ8uNUaSKTY9s9klfRaAXL0g==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "dependencies": { + "@csstools/color-helpers": "^6.0.2", + "@csstools/css-calc": "^3.2.1" + }, + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-parser-algorithms": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-4.0.0.tgz", + "integrity": "sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-syntax-patches-for-csstree": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.1.4.tgz", + "integrity": "sha512-wgsqt92b7C7tQhIdPNxj0n9zuUbQlvAuI1exyzeNrOKOi62SD7ren8zqszmpVREjAOqg8cD2FqYhQfAuKjk4sw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "peerDependencies": { + "css-tree": "^3.2.1" + }, + "peerDependenciesMeta": { + "css-tree": { + "optional": true + } + } + }, + "node_modules/@csstools/css-tokenizer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-4.0.0.tgz", + "integrity": "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/@duckdb/duckdb-wasm": { + "version": "1.29.1-dev260.0", + "license": "MIT", + "dependencies": { + "apache-arrow": "^17.0.0" + } + }, + "node_modules/@dxup/nuxt": { + "version": "0.4.1", + "license": "MIT", + "dependencies": { + "@dxup/unimport": "^0.1.2", + "@nuxt/kit": "^4.4.2", + "chokidar": "^5.0.0", + "pathe": "^2.0.3", + "tinyglobby": "^0.2.16" + }, + "peerDependencies": { + "typescript": "*" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@dxup/unimport": { + "version": "0.1.2", + "license": "MIT" + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.28.0", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@exodus/bytes": { + "version": "1.15.1", + "resolved": "https://registry.npmjs.org/@exodus/bytes/-/bytes-1.15.1.tgz", + "integrity": "sha512-S6mL0yNB/Abt9Ei4tq8gDhcczc4S3+vQ4ra7vxnAf+YHC02srtqxKKZghx2Dq6p0e66THKwR6r8N6P95wEty7Q==", + "license": "MIT", + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + }, + "peerDependencies": { + "@noble/hashes": "^1.8.0 || ^2.0.0" + }, + "peerDependenciesMeta": { + "@noble/hashes": { + "optional": true + } + } + }, + "node_modules/@floating-ui/core": { + "version": "1.7.5", + "license": "MIT", + "dependencies": { + "@floating-ui/utils": "^0.2.11" + } + }, + "node_modules/@floating-ui/dom": { + "version": "1.7.6", + "license": "MIT", + "dependencies": { + "@floating-ui/core": "^1.7.5", + "@floating-ui/utils": "^0.2.11" + } + }, + "node_modules/@floating-ui/utils": { + "version": "0.2.11", + "license": "MIT" + }, + "node_modules/@floating-ui/vue": { + "version": "1.1.11", + "license": "MIT", + "dependencies": { + "@floating-ui/dom": "^1.7.6", + "@floating-ui/utils": "^0.2.11", + "vue-demi": ">=0.13.0" + } + }, + "node_modules/@floating-ui/vue/node_modules/vue-demi": { + "version": "0.14.10", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "vue-demi-fix": "bin/vue-demi-fix.js", + "vue-demi-switch": "bin/vue-demi-switch.js" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "@vue/composition-api": "^1.0.0-rc.1", + "vue": "^3.0.0-0 || ^2.6.0" + }, + "peerDependenciesMeta": { + "@vue/composition-api": { + "optional": true + } + } + }, + "node_modules/@interacta/css-labels": { + "version": "0.1.3-beta.2", + "license": "MIT" + }, + "node_modules/@internationalized/date": { + "version": "3.12.1", + "license": "Apache-2.0", + "dependencies": { + "@swc/helpers": "^0.5.0" + } + }, + "node_modules/@internationalized/number": { + "version": "3.6.6", + "license": "Apache-2.0", + "dependencies": { + "@swc/helpers": "^0.5.0" + } + }, + "node_modules/@ioredis/commands": { + "version": "1.5.1", + "license": "MIT" + }, + "node_modules/@isaacs/cliui": { + "version": "8.0.2", + "license": "ISC", + "dependencies": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@isaacs/fs-minipass": { + "version": "4.0.1", + "license": "ISC", + "dependencies": { + "minipass": "^7.0.4" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.13", + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/remapping": { + "version": "2.3.5", + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/source-map": { + "version": "0.3.11", + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.25" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.31", + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@juggle/resize-observer": { + "version": "3.4.0", + "license": "Apache-2.0" + }, + "node_modules/@kwsites/file-exists": { + "version": "1.1.1", + "license": "MIT", + "dependencies": { + "debug": "^4.1.1" + } + }, + "node_modules/@kwsites/promise-deferred": { + "version": "1.1.1", + "license": "MIT" + }, + "node_modules/@lezer/common": { + "version": "1.5.2", + "license": "MIT" + }, + "node_modules/@lezer/highlight": { + "version": "1.2.3", + "license": "MIT", + "dependencies": { + "@lezer/common": "^1.3.0" + } + }, + "node_modules/@lezer/json": { + "version": "1.0.3", + "license": "MIT", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@lezer/lr": { + "version": "1.4.10", + "license": "MIT", + "dependencies": { + "@lezer/common": "^1.0.0" + } + }, + "node_modules/@mapbox/node-pre-gyp": { + "version": "2.0.3", + "license": "BSD-3-Clause", + "dependencies": { + "consola": "^3.2.3", + "detect-libc": "^2.0.0", + "https-proxy-agent": "^7.0.5", + "node-fetch": "^2.6.7", + "nopt": "^8.0.0", + "semver": "^7.5.3", + "tar": "^7.4.0" + }, + "bin": { + "node-pre-gyp": "bin/node-pre-gyp" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@mapbox/node-pre-gyp/node_modules/abbrev": { + "version": "3.0.1", + "license": "ISC", + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/@mapbox/node-pre-gyp/node_modules/nopt": { + "version": "8.1.0", + "license": "ISC", + "dependencies": { + "abbrev": "^3.0.0" + }, + "bin": { + "nopt": "bin/nopt.js" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/@marijn/find-cluster-break": { + "version": "1.0.2", + "license": "MIT" + }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "license": "MIT", + "dependencies": { + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "license": "MIT", + "dependencies": { + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nuxt/cli": { + "version": "3.35.2", + "license": "MIT", + "dependencies": { + "@bomb.sh/tab": "^0.0.15", + "@clack/prompts": "^1.3.0", + "c12": "^3.3.4", + "citty": "^0.2.2", + "confbox": "^0.2.4", + "consola": "^3.4.2", + "debug": "^4.4.3", + "defu": "^6.1.7", + "exsolve": "^1.0.8", + "fuse.js": "^7.3.0", + "fzf": "^0.5.2", + "giget": "^3.2.0", + "jiti": "^2.7.0", + "listhen": "^1.10.0", + "nypm": "^0.6.6", + "ofetch": "^1.5.1", + "ohash": "^2.0.11", + "pathe": "^2.0.3", + "perfect-debounce": "^2.1.0", + "pkg-types": "^2.3.1", + "scule": "^1.3.0", + "semver": "^7.8.0", + "srvx": "^0.11.15", + "std-env": "^4.1.0", + "tinyclip": "^0.1.12", + "tinyexec": "^1.1.2", + "ufo": "^1.6.4", + "youch": "^4.1.1" + }, + "bin": { + "nuxi": "bin/nuxi.mjs", + "nuxi-ng": "bin/nuxi.mjs", + "nuxt": "bin/nuxi.mjs", + "nuxt-cli": "bin/nuxi.mjs" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + }, + "peerDependencies": { + "@nuxt/schema": "^4.4.5" + }, + "peerDependenciesMeta": { + "@nuxt/schema": { + "optional": true + } + } + }, + "node_modules/@nuxt/cli/node_modules/@bomb.sh/tab": { + "version": "0.0.15", + "license": "MIT", + "bin": { + "tab": "dist/bin/cli.mjs" + }, + "peerDependencies": { + "cac": "^6.7.14", + "citty": "^0.1.6 || ^0.2.0", + "commander": "^13.1.0" + }, + "peerDependenciesMeta": { + "cac": { + "optional": true + }, + "citty": { + "optional": true + }, + "commander": { + "optional": true + } + } + }, + "node_modules/@nuxt/cli/node_modules/commander": { + "version": "13.1.0", + "license": "MIT", + "optional": true, + "peer": true, + "engines": { + "node": ">=18" + } + }, + "node_modules/@nuxt/devalue": { + "version": "2.0.2", + "license": "MIT" + }, + "node_modules/@nuxt/devtools": { + "version": "3.2.4", + "license": "MIT", + "dependencies": { + "@nuxt/devtools-kit": "3.2.4", + "@nuxt/devtools-wizard": "3.2.4", + "@nuxt/kit": "^4.4.2", + "@vue/devtools-core": "^8.1.0", + "@vue/devtools-kit": "^8.1.0", + "birpc": "^4.0.0", + "consola": "^3.4.2", + "destr": "^2.0.5", + "error-stack-parser-es": "^1.0.5", + "execa": "^8.0.1", + "fast-npm-meta": "^1.4.2", + "get-port-please": "^3.2.0", + "hookable": "^6.1.0", + "image-meta": "^0.2.2", + "is-installed-globally": "^1.0.0", + "launch-editor": "^2.13.1", + "local-pkg": "^1.1.2", + "magicast": "^0.5.2", + "nypm": "^0.6.5", + "ohash": "^2.0.11", + "pathe": "^2.0.3", + "perfect-debounce": "^2.1.0", + "pkg-types": "^2.3.0", + "semver": "^7.7.4", + "simple-git": "^3.33.0", + "sirv": "^3.0.2", + "structured-clone-es": "^2.0.0", + "tinyglobby": "^0.2.15", + "vite-plugin-inspect": "^11.3.3", + "vite-plugin-vue-tracer": "^1.3.0", + "which": "^6.0.1", + "ws": "^8.19.0" + }, + "bin": { + "devtools": "cli.mjs" + }, + "peerDependencies": { + "@vitejs/devtools": "*", + "vite": ">=6.0" + }, + "peerDependenciesMeta": { + "@vitejs/devtools": { + "optional": true + } + } + }, + "node_modules/@nuxt/devtools-kit": { + "version": "3.2.4", + "license": "MIT", + "dependencies": { + "@nuxt/kit": "^4.4.2", + "execa": "^8.0.1" + }, + "peerDependencies": { + "vite": ">=6.0" + } + }, + "node_modules/@nuxt/devtools-wizard": { + "version": "3.2.4", + "license": "MIT", + "dependencies": { + "@clack/prompts": "^1.1.0", + "consola": "^3.4.2", + "diff": "^8.0.3", + "execa": "^8.0.1", + "magicast": "^0.5.2", + "pathe": "^2.0.3", + "pkg-types": "^2.3.0", + "semver": "^7.7.4" + }, + "bin": { + "devtools-wizard": "cli.mjs" + } + }, + "node_modules/@nuxt/devtools/node_modules/isexe": { + "version": "4.0.0", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=20" + } + }, + "node_modules/@nuxt/devtools/node_modules/which": { + "version": "6.0.1", + "license": "ISC", + "dependencies": { + "isexe": "^4.0.0" + }, + "bin": { + "node-which": "bin/which.js" + }, + "engines": { + "node": "^20.17.0 || >=22.9.0" + } + }, + "node_modules/@nuxt/kit": { + "version": "4.4.6", + "license": "MIT", + "dependencies": { + "c12": "^3.3.4", + "consola": "^3.4.2", + "defu": "^6.1.7", + "destr": "^2.0.5", + "errx": "^0.1.0", + "exsolve": "^1.0.8", + "ignore": "^7.0.5", + "jiti": "^2.7.0", + "klona": "^2.0.6", + "mlly": "^1.8.2", + "ohash": "^2.0.11", + "pathe": "^2.0.3", + "pkg-types": "^2.3.1", + "rc9": "^3.0.1", + "scule": "^1.3.0", + "semver": "^7.8.0", + "tinyglobby": "^0.2.16", + "ufo": "^1.6.4", + "unctx": "^2.5.0", + "untyped": "^2.0.0" + }, + "engines": { + "node": ">=18.12.0" + } + }, + "node_modules/@nuxt/nitro-server": { + "version": "4.4.6", + "license": "MIT", + "dependencies": { + "@nuxt/devalue": "^2.0.2", + "@nuxt/kit": "4.4.6", + "@unhead/vue": "^2.1.15", + "@vue/shared": "^3.5.34", + "consola": "^3.4.2", + "defu": "^6.1.7", + "destr": "^2.0.5", + "devalue": "^5.8.1", + "errx": "^0.1.0", + "escape-string-regexp": "^5.0.0", + "exsolve": "^1.0.8", + "h3": "^1.15.11", + "impound": "^1.1.5", + "klona": "^2.0.6", + "mocked-exports": "^0.1.1", + "nitropack": "^2.13.4", + "nypm": "^0.6.6", + "ohash": "^2.0.11", + "pathe": "^2.0.3", + "rou3": "^0.8.1", + "std-env": "^4.1.0", + "ufo": "^1.6.4", + "unctx": "^2.5.0", + "unstorage": "^1.17.5", + "vue": "^3.5.34", + "vue-bundle-renderer": "^2.2.0", + "vue-devtools-stub": "^0.1.0" + }, + "engines": { + "node": "^22.12.0 || ^24.11.0 || >=26.0.0" + }, + "peerDependencies": { + "@babel/plugin-proposal-decorators": "^7.25.0", + "@babel/plugin-syntax-typescript": "^7.25.0", + "@rollup/plugin-babel": "^6.0.0 || ^7.0.0", + "nuxt": "^4.4.6" + }, + "peerDependenciesMeta": { + "@babel/plugin-proposal-decorators": { + "optional": true + }, + "@babel/plugin-syntax-typescript": { + "optional": true + }, + "@rollup/plugin-babel": { + "optional": true + } + } + }, + "node_modules/@nuxt/schema": { + "version": "4.4.6", + "license": "MIT", + "dependencies": { + "@vue/shared": "^3.5.34", + "defu": "^6.1.7", + "pathe": "^2.0.3", + "pkg-types": "^2.3.1", + "std-env": "^4.1.0" + }, + "engines": { + "node": "^14.18.0 || >=16.10.0" + } + }, + "node_modules/@nuxt/telemetry": { + "version": "2.8.0", + "license": "MIT", + "dependencies": { + "citty": "^0.2.1", + "consola": "^3.4.2", + "ofetch": "^2.0.0-alpha.3", + "rc9": "^3.0.0", + "std-env": "^4.0.0" + }, + "bin": { + "nuxt-telemetry": "bin/nuxt-telemetry.mjs" + }, + "engines": { + "node": ">=18.12.0" + }, + "peerDependencies": { + "@nuxt/kit": ">=3.0.0" + } + }, + "node_modules/@nuxt/telemetry/node_modules/ofetch": { + "version": "2.0.0-alpha.3", + "license": "MIT" + }, + "node_modules/@nuxt/vite-builder": { + "version": "4.4.6", + "license": "MIT", + "dependencies": { + "@nuxt/kit": "4.4.6", + "@rollup/plugin-replace": "^6.0.3", + "@vitejs/plugin-vue": "^6.0.7", + "@vitejs/plugin-vue-jsx": "^5.1.5", + "autoprefixer": "^10.5.0", + "consola": "^3.4.2", + "cssnano": "^8.0.1", + "defu": "^6.1.7", + "escape-string-regexp": "^5.0.0", + "exsolve": "^1.0.8", + "get-port-please": "^3.2.0", + "jiti": "^2.7.0", + "knitwork": "^1.3.0", + "magic-string": "^0.30.21", + "mlly": "^1.8.2", + "mocked-exports": "^0.1.1", + "nypm": "^0.6.6", + "pathe": "^2.0.3", + "pkg-types": "^2.3.1", + "postcss": "^8.5.14", + "seroval": "^1.5.4", + "std-env": "^4.1.0", + "ufo": "^1.6.4", + "unenv": "^2.0.0-rc.24", + "vite": "^7.3.3", + "vite-node": "^5.3.0", + "vite-plugin-checker": "^0.13.0", + "vue-bundle-renderer": "^2.2.0" + }, + "engines": { + "node": "^22.12.0 || ^24.11.0 || >=26.0.0" + }, + "peerDependencies": { + "@babel/plugin-proposal-decorators": "^7.25.0", + "@babel/plugin-syntax-jsx": "^7.25.0", + "nuxt": "4.4.6", + "rolldown": "^1.0.0-beta.38", + "rollup-plugin-visualizer": "^6.0.0 || ^7.0.1", + "vue": "^3.3.4" + }, + "peerDependenciesMeta": { + "@babel/plugin-proposal-decorators": { + "optional": true + }, + "@babel/plugin-syntax-jsx": { + "optional": true + }, + "rolldown": { + "optional": true + }, + "rollup-plugin-visualizer": { + "optional": true + } + } + }, + "node_modules/@nuxt/vite-builder/node_modules/@esbuild/linux-x64": { + "version": "0.27.7", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@nuxt/vite-builder/node_modules/@vitejs/plugin-vue": { + "version": "6.0.7", + "license": "MIT", + "dependencies": { + "@rolldown/pluginutils": "^1.0.1" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "peerDependencies": { + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0", + "vue": "^3.2.25" + } + }, + "node_modules/@nuxt/vite-builder/node_modules/esbuild": { + "version": "0.27.7", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.7", + "@esbuild/android-arm": "0.27.7", + "@esbuild/android-arm64": "0.27.7", + "@esbuild/android-x64": "0.27.7", + "@esbuild/darwin-arm64": "0.27.7", + "@esbuild/darwin-x64": "0.27.7", + "@esbuild/freebsd-arm64": "0.27.7", + "@esbuild/freebsd-x64": "0.27.7", + "@esbuild/linux-arm": "0.27.7", + "@esbuild/linux-arm64": "0.27.7", + "@esbuild/linux-ia32": "0.27.7", + "@esbuild/linux-loong64": "0.27.7", + "@esbuild/linux-mips64el": "0.27.7", + "@esbuild/linux-ppc64": "0.27.7", + "@esbuild/linux-riscv64": "0.27.7", + "@esbuild/linux-s390x": "0.27.7", + "@esbuild/linux-x64": "0.27.7", + "@esbuild/netbsd-arm64": "0.27.7", + "@esbuild/netbsd-x64": "0.27.7", + "@esbuild/openbsd-arm64": "0.27.7", + "@esbuild/openbsd-x64": "0.27.7", + "@esbuild/openharmony-arm64": "0.27.7", + "@esbuild/sunos-x64": "0.27.7", + "@esbuild/win32-arm64": "0.27.7", + "@esbuild/win32-ia32": "0.27.7", + "@esbuild/win32-x64": "0.27.7" + } + }, + "node_modules/@nuxt/vite-builder/node_modules/vite": { + "version": "7.3.3", + "license": "MIT", + "dependencies": { + "esbuild": "^0.27.0", + "fdir": "^6.5.0", + "picomatch": "^4.0.3", + "postcss": "^8.5.6", + "rollup": "^4.43.0", + "tinyglobby": "^0.2.15" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^20.19.0 || >=22.12.0", + "jiti": ">=1.21.0", + "less": "^4.0.0", + "lightningcss": "^1.21.0", + "sass": "^1.70.0", + "sass-embedded": "^1.70.0", + "stylus": ">=0.54.8", + "sugarss": "^5.0.0", + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "jiti": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, + "node_modules/@observablehq/plot": { + "version": "0.6.17", + "license": "ISC", + "dependencies": { + "d3": "^7.9.0", + "interval-tree-1d": "^1.0.0", + "isoformat": "^0.2.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@one-ini/wasm": { + "version": "0.1.1", + "dev": true, + "license": "MIT" + }, + "node_modules/@oxc-minify/binding-linux-x64-gnu": { + "version": "0.131.0", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxc-parser/binding-linux-x64-gnu": { + "version": "0.131.0", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@oxc-project/types": { + "version": "0.131.0", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/Boshen" + } + }, + "node_modules/@oxc-transform/binding-linux-x64-gnu": { + "version": "0.131.0", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@parcel/watcher": { + "version": "2.5.6", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "detect-libc": "^2.0.3", + "is-glob": "^4.0.3", + "node-addon-api": "^7.0.0", + "picomatch": "^4.0.3" + }, + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "@parcel/watcher-android-arm64": "2.5.6", + "@parcel/watcher-darwin-arm64": "2.5.6", + "@parcel/watcher-darwin-x64": "2.5.6", + "@parcel/watcher-freebsd-x64": "2.5.6", + "@parcel/watcher-linux-arm-glibc": "2.5.6", + "@parcel/watcher-linux-arm-musl": "2.5.6", + "@parcel/watcher-linux-arm64-glibc": "2.5.6", + "@parcel/watcher-linux-arm64-musl": "2.5.6", + "@parcel/watcher-linux-x64-glibc": "2.5.6", + "@parcel/watcher-linux-x64-musl": "2.5.6", + "@parcel/watcher-win32-arm64": "2.5.6", + "@parcel/watcher-win32-ia32": "2.5.6", + "@parcel/watcher-win32-x64": "2.5.6" + } + }, + "node_modules/@parcel/watcher-linux-x64-glibc": { + "version": "2.5.6", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-wasm": { + "version": "2.5.6", + "bundleDependencies": [ + "napi-wasm" + ], + "license": "MIT", + "dependencies": { + "is-glob": "^4.0.3", + "napi-wasm": "^1.1.0", + "picomatch": "^4.0.3" + }, + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-wasm/node_modules/napi-wasm": { + "version": "1.1.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=14" + } + }, + "node_modules/@polka/url": { + "version": "1.0.0-next.29", + "license": "MIT" + }, + "node_modules/@poppinss/colors": { + "version": "4.1.6", + "license": "MIT", + "dependencies": { + "kleur": "^4.1.5" + } + }, + "node_modules/@poppinss/dumper": { + "version": "0.7.0", + "license": "MIT", + "dependencies": { + "@poppinss/colors": "^4.1.5", + "@sindresorhus/is": "^7.0.2", + "supports-color": "^10.0.0" + } + }, + "node_modules/@poppinss/dumper/node_modules/supports-color": { + "version": "10.2.2", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, + "node_modules/@poppinss/exception": { + "version": "1.2.3", + "license": "MIT" + }, + "node_modules/@rolldown/pluginutils": { + "version": "1.0.1", + "license": "MIT" + }, + "node_modules/@rollup/plugin-alias": { + "version": "6.0.0", + "license": "MIT", + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "rollup": ">=4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/plugin-commonjs": { + "version": "29.0.2", + "license": "MIT", + "dependencies": { + "@rollup/pluginutils": "^5.0.1", + "commondir": "^1.0.1", + "estree-walker": "^2.0.2", + "fdir": "^6.2.0", + "is-reference": "1.2.1", + "magic-string": "^0.30.3", + "picomatch": "^4.0.2" + }, + "engines": { + "node": ">=16.0.0 || 14 >= 14.17" + }, + "peerDependencies": { + "rollup": "^2.68.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/plugin-inject": { + "version": "5.0.5", + "license": "MIT", + "dependencies": { + "@rollup/pluginutils": "^5.0.1", + "estree-walker": "^2.0.2", + "magic-string": "^0.30.3" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/plugin-json": { + "version": "6.1.0", + "license": "MIT", + "dependencies": { + "@rollup/pluginutils": "^5.1.0" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/plugin-node-resolve": { + "version": "16.0.3", + "license": "MIT", + "dependencies": { + "@rollup/pluginutils": "^5.0.1", + "@types/resolve": "1.20.2", + "deepmerge": "^4.2.2", + "is-module": "^1.0.0", + "resolve": "^1.22.1" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^2.78.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/plugin-replace": { + "version": "6.0.3", + "license": "MIT", + "dependencies": { + "@rollup/pluginutils": "^5.0.1", + "magic-string": "^0.30.3" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/plugin-terser": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "serialize-javascript": "^7.0.3", + "smob": "^1.0.0", + "terser": "^5.17.4" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "rollup": "^2.0.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/pluginutils": { + "version": "5.3.0", + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0", + "estree-walker": "^2.0.2", + "picomatch": "^4.0.2" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.60.4", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@simple-git/args-pathspec": { + "version": "1.0.3", + "license": "MIT" + }, + "node_modules/@simple-git/argv-parser": { + "version": "1.1.1", + "license": "MIT", + "dependencies": { + "@simple-git/args-pathspec": "^1.0.3" + } + }, + "node_modules/@sindresorhus/is": { + "version": "7.2.0", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sindresorhus/is?sponsor=1" + } + }, + "node_modules/@sindresorhus/merge-streams": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@speed-highlight/core": { + "version": "1.2.15", + "license": "CC0-1.0" + }, + "node_modules/@swc/helpers": { + "version": "0.5.21", + "license": "Apache-2.0", + "dependencies": { + "tslib": "^2.8.0" + } + }, + "node_modules/@tailwindcss/node": { + "version": "4.3.0", + "license": "MIT", + "dependencies": { + "@jridgewell/remapping": "^2.3.5", + "enhanced-resolve": "^5.21.0", + "jiti": "^2.6.1", + "lightningcss": "1.32.0", + "magic-string": "^0.30.21", + "source-map-js": "^1.2.1", + "tailwindcss": "4.3.0" + } + }, + "node_modules/@tailwindcss/oxide": { + "version": "4.3.0", + "license": "MIT", + "engines": { + "node": ">= 20" + }, + "optionalDependencies": { + "@tailwindcss/oxide-android-arm64": "4.3.0", + "@tailwindcss/oxide-darwin-arm64": "4.3.0", + "@tailwindcss/oxide-darwin-x64": "4.3.0", + "@tailwindcss/oxide-freebsd-x64": "4.3.0", + "@tailwindcss/oxide-linux-arm-gnueabihf": "4.3.0", + "@tailwindcss/oxide-linux-arm64-gnu": "4.3.0", + "@tailwindcss/oxide-linux-arm64-musl": "4.3.0", + "@tailwindcss/oxide-linux-x64-gnu": "4.3.0", + "@tailwindcss/oxide-linux-x64-musl": "4.3.0", + "@tailwindcss/oxide-wasm32-wasi": "4.3.0", + "@tailwindcss/oxide-win32-arm64-msvc": "4.3.0", + "@tailwindcss/oxide-win32-x64-msvc": "4.3.0" + } + }, + "node_modules/@tailwindcss/oxide-linux-x64-gnu": { + "version": "4.3.0", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/vite": { + "version": "4.3.0", + "license": "MIT", + "dependencies": { + "@tailwindcss/node": "4.3.0", + "@tailwindcss/oxide": "4.3.0", + "tailwindcss": "4.3.0" + }, + "peerDependencies": { + "vite": "^5.2.0 || ^6 || ^7 || ^8" + } + }, + "node_modules/@tanstack/table-core": { + "version": "8.21.3", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + } + }, + "node_modules/@tanstack/virtual-core": { + "version": "3.15.0", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + } + }, + "node_modules/@tanstack/vue-table": { + "version": "8.21.3", + "license": "MIT", + "dependencies": { + "@tanstack/table-core": "8.21.3" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + }, + "peerDependencies": { + "vue": ">=3.2" + } + }, + "node_modules/@tanstack/vue-virtual": { + "version": "3.13.25", + "license": "MIT", + "dependencies": { + "@tanstack/virtual-core": "3.15.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + }, + "peerDependencies": { + "vue": "^2.7.0 || ^3.0.0" + } + }, + "node_modules/@types/command-line-args": { + "version": "5.2.3", + "license": "MIT" + }, + "node_modules/@types/command-line-usage": { + "version": "5.0.4", + "license": "MIT" + }, + "node_modules/@types/cytoscape": { + "version": "3.31.0", + "deprecated": "This is a stub types definition. cytoscape provides its own type definitions, so you do not need this installed.", + "dev": true, + "license": "MIT", + "dependencies": { + "cytoscape": "*" + } + }, + "node_modules/@types/estree": { + "version": "1.0.9", + "license": "MIT" + }, + "node_modules/@types/jsesc": { + "version": "2.5.1", + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "20.19.41", + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/@types/resolve": { + "version": "1.20.2", + "license": "MIT" + }, + "node_modules/@types/trusted-types": { + "version": "2.0.7", + "license": "MIT", + "optional": true + }, + "node_modules/@types/web-bluetooth": { + "version": "0.0.21", + "license": "MIT" + }, + "node_modules/@unhead/vue": { + "version": "2.1.15", + "license": "MIT", + "dependencies": { + "hookable": "^6.0.1", + "unhead": "2.1.15" + }, + "funding": { + "url": "https://github.com/sponsors/harlan-zw" + }, + "peerDependencies": { + "vue": ">=3.5.18" + } + }, + "node_modules/@uwdata/flechette": { + "version": "2.5.0", + "license": "BSD-3-Clause" + }, + "node_modules/@uwdata/mosaic-core": { + "version": "0.21.1", + "license": "BSD-3-Clause", + "dependencies": { + "@duckdb/duckdb-wasm": "1.30.0", + "@uwdata/flechette": "^2.2.5", + "@uwdata/mosaic-sql": "^0.21.1" + } + }, + "node_modules/@uwdata/mosaic-core/node_modules/@duckdb/duckdb-wasm": { + "version": "1.30.0", + "license": "MIT", + "dependencies": { + "apache-arrow": "^17.0.0" + } + }, + "node_modules/@uwdata/mosaic-inputs": { + "version": "0.21.1", + "license": "BSD-3-Clause", + "dependencies": { + "@uwdata/mosaic-core": "^0.21.1", + "@uwdata/mosaic-sql": "^0.21.1" + } + }, + "node_modules/@uwdata/mosaic-plot": { + "version": "0.21.1", + "license": "BSD-3-Clause", + "dependencies": { + "@observablehq/plot": "^0.6.17", + "@uwdata/mosaic-core": "^0.21.1", + "@uwdata/mosaic-sql": "^0.21.1", + "d3": "^7.9.0" + } + }, + "node_modules/@uwdata/mosaic-sql": { + "version": "0.21.1", + "license": "BSD-3-Clause" + }, + "node_modules/@uwdata/vgplot": { + "version": "0.21.1", + "license": "BSD-3-Clause", + "dependencies": { + "@uwdata/mosaic-core": "^0.21.1", + "@uwdata/mosaic-inputs": "^0.21.1", + "@uwdata/mosaic-plot": "^0.21.1", + "@uwdata/mosaic-sql": "^0.21.1" + } + }, + "node_modules/@vercel/nft": { + "version": "1.5.0", + "license": "MIT", + "dependencies": { + "@mapbox/node-pre-gyp": "^2.0.0", + "@rollup/pluginutils": "^5.1.3", + "acorn": "^8.6.0", + "acorn-import-attributes": "^1.9.5", + "async-sema": "^3.1.1", + "bindings": "^1.4.0", + "estree-walker": "2.0.2", + "glob": "^13.0.0", + "graceful-fs": "^4.2.9", + "node-gyp-build": "^4.2.2", + "picomatch": "^4.0.2", + "resolve-from": "^5.0.0" + }, + "bin": { + "nft": "out/cli.js" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/@vercel/nft/node_modules/balanced-match": { + "version": "4.0.4", + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/@vercel/nft/node_modules/brace-expansion": { + "version": "5.0.6", + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/@vercel/nft/node_modules/glob": { + "version": "13.0.6", + "license": "BlueOak-1.0.0", + "dependencies": { + "minimatch": "^10.2.2", + "minipass": "^7.1.3", + "path-scurry": "^2.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@vercel/nft/node_modules/lru-cache": { + "version": "11.5.0", + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@vercel/nft/node_modules/minimatch": { + "version": "10.2.5", + "license": "BlueOak-1.0.0", + "dependencies": { + "brace-expansion": "^5.0.5" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@vercel/nft/node_modules/path-scurry": { + "version": "2.0.2", + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^11.0.0", + "minipass": "^7.1.2" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@vitejs/plugin-vue": { + "version": "5.2.4", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "peerDependencies": { + "vite": "^5.0.0 || ^6.0.0", + "vue": "^3.2.25" + } + }, + "node_modules/@vitejs/plugin-vue-jsx": { + "version": "5.1.5", + "license": "MIT", + "dependencies": { + "@babel/core": "^7.29.0", + "@babel/plugin-syntax-typescript": "^7.28.6", + "@babel/plugin-transform-typescript": "^7.28.6", + "@rolldown/pluginutils": "^1.0.0-rc.2", + "@vue/babel-plugin-jsx": "^2.0.1" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "peerDependencies": { + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0", + "vue": "^3.0.0" + } + }, + "node_modules/@vitest/expect": { + "version": "2.1.9", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/spy": "2.1.9", + "@vitest/utils": "2.1.9", + "chai": "^5.1.2", + "tinyrainbow": "^1.2.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/pretty-format": { + "version": "2.1.9", + "dev": true, + "license": "MIT", + "dependencies": { + "tinyrainbow": "^1.2.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/runner": { + "version": "2.1.9", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/utils": "2.1.9", + "pathe": "^1.1.2" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/runner/node_modules/pathe": { + "version": "1.1.2", + "dev": true, + "license": "MIT" + }, + "node_modules/@vitest/snapshot": { + "version": "2.1.9", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/pretty-format": "2.1.9", + "magic-string": "^0.30.12", + "pathe": "^1.1.2" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/snapshot/node_modules/pathe": { + "version": "1.1.2", + "dev": true, + "license": "MIT" + }, + "node_modules/@vitest/spy": { + "version": "2.1.9", + "dev": true, + "license": "MIT", + "dependencies": { + "tinyspy": "^3.0.2" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/utils": { + "version": "2.1.9", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/pretty-format": "2.1.9", + "loupe": "^3.1.2", + "tinyrainbow": "^1.2.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@volar/language-core": { + "version": "2.4.15", + "devOptional": true, + "license": "MIT", + "dependencies": { + "@volar/source-map": "2.4.15" + } + }, + "node_modules/@volar/source-map": { + "version": "2.4.15", + "devOptional": true, + "license": "MIT" + }, + "node_modules/@volar/typescript": { + "version": "2.4.15", + "devOptional": true, + "license": "MIT", + "dependencies": { + "@volar/language-core": "2.4.15", + "path-browserify": "^1.0.1", + "vscode-uri": "^3.0.8" + } + }, + "node_modules/@vue-macros/common": { + "version": "3.1.2", + "license": "MIT", + "dependencies": { + "@vue/compiler-sfc": "^3.5.22", + "ast-kit": "^2.1.2", + "local-pkg": "^1.1.2", + "magic-string-ast": "^1.0.2", + "unplugin-utils": "^0.3.0" + }, + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "url": "https://github.com/sponsors/vue-macros" + }, + "peerDependencies": { + "vue": "^2.7.0 || ^3.2.25" + }, + "peerDependenciesMeta": { + "vue": { + "optional": true + } + } + }, + "node_modules/@vue/babel-helper-vue-transform-on": { + "version": "2.0.1", + "license": "MIT" + }, + "node_modules/@vue/babel-plugin-jsx": { + "version": "2.0.1", + "license": "MIT", + "dependencies": { + "@babel/helper-module-imports": "^7.27.1", + "@babel/helper-plugin-utils": "^7.27.1", + "@babel/plugin-syntax-jsx": "^7.27.1", + "@babel/template": "^7.27.2", + "@babel/traverse": "^7.28.4", + "@babel/types": "^7.28.4", + "@vue/babel-helper-vue-transform-on": "2.0.1", + "@vue/babel-plugin-resolve-type": "2.0.1", + "@vue/shared": "^3.5.22" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + }, + "peerDependenciesMeta": { + "@babel/core": { + "optional": true + } + } + }, + "node_modules/@vue/babel-plugin-resolve-type": { + "version": "2.0.1", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.27.1", + "@babel/helper-module-imports": "^7.27.1", + "@babel/helper-plugin-utils": "^7.27.1", + "@babel/parser": "^7.28.4", + "@vue/compiler-sfc": "^3.5.22" + }, + "funding": { + "url": "https://github.com/sponsors/sxzz" + }, + "peerDependencies": { + "@babel/core": "^7.0.0-0" + } + }, + "node_modules/@vue/compiler-core": { + "version": "3.5.34", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.29.3", + "@vue/shared": "3.5.34", + "entities": "^7.0.1", + "estree-walker": "^2.0.2", + "source-map-js": "^1.2.1" + } + }, + "node_modules/@vue/compiler-dom": { + "version": "3.5.34", + "license": "MIT", + "dependencies": { + "@vue/compiler-core": "3.5.34", + "@vue/shared": "3.5.34" + } + }, + "node_modules/@vue/compiler-sfc": { + "version": "3.5.34", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.29.3", + "@vue/compiler-core": "3.5.34", + "@vue/compiler-dom": "3.5.34", + "@vue/compiler-ssr": "3.5.34", + "@vue/shared": "3.5.34", + "estree-walker": "^2.0.2", + "magic-string": "^0.30.21", + "postcss": "^8.5.14", + "source-map-js": "^1.2.1" + } + }, + "node_modules/@vue/compiler-ssr": { + "version": "3.5.34", + "license": "MIT", + "dependencies": { + "@vue/compiler-dom": "3.5.34", + "@vue/shared": "3.5.34" + } + }, + "node_modules/@vue/compiler-vue2": { + "version": "2.7.16", + "devOptional": true, + "license": "MIT", + "dependencies": { + "de-indent": "^1.0.2", + "he": "^1.2.0" + } + }, + "node_modules/@vue/devtools-api": { + "version": "6.6.4", + "license": "MIT" + }, + "node_modules/@vue/devtools-core": { + "version": "8.1.2", + "license": "MIT", + "dependencies": { + "@vue/devtools-kit": "^8.1.2", + "@vue/devtools-shared": "^8.1.2" + }, + "peerDependencies": { + "vue": "^3.0.0" + } + }, + "node_modules/@vue/devtools-kit": { + "version": "8.1.2", + "license": "MIT", + "dependencies": { + "@vue/devtools-shared": "^8.1.2", + "birpc": "^2.6.1", + "hookable": "^5.5.3", + "perfect-debounce": "^2.0.0" + } + }, + "node_modules/@vue/devtools-kit/node_modules/birpc": { + "version": "2.9.0", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@vue/devtools-kit/node_modules/hookable": { + "version": "5.5.3", + "license": "MIT" + }, + "node_modules/@vue/devtools-shared": { + "version": "8.1.2", + "license": "MIT" + }, + "node_modules/@vue/language-core": { + "version": "2.2.12", + "devOptional": true, + "license": "MIT", + "dependencies": { + "@volar/language-core": "2.4.15", + "@vue/compiler-dom": "^3.5.0", + "@vue/compiler-vue2": "^2.7.16", + "@vue/shared": "^3.5.0", + "alien-signals": "^1.0.3", + "minimatch": "^9.0.3", + "muggle-string": "^0.4.1", + "path-browserify": "^1.0.1" + }, + "peerDependencies": { + "typescript": "*" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@vue/reactivity": { + "version": "3.5.34", + "license": "MIT", + "dependencies": { + "@vue/shared": "3.5.34" + } + }, + "node_modules/@vue/runtime-core": { + "version": "3.5.34", + "license": "MIT", + "dependencies": { + "@vue/reactivity": "3.5.34", + "@vue/shared": "3.5.34" + } + }, + "node_modules/@vue/runtime-dom": { + "version": "3.5.34", + "license": "MIT", + "dependencies": { + "@vue/reactivity": "3.5.34", + "@vue/runtime-core": "3.5.34", + "@vue/shared": "3.5.34", + "csstype": "^3.2.3" + } + }, + "node_modules/@vue/server-renderer": { + "version": "3.5.34", + "license": "MIT", + "dependencies": { + "@vue/compiler-ssr": "3.5.34", + "@vue/shared": "3.5.34" + }, + "peerDependencies": { + "vue": "3.5.34" + } + }, + "node_modules/@vue/shared": { + "version": "3.5.34", + "license": "MIT" + }, + "node_modules/@vue/test-utils": { + "version": "2.4.10", + "dev": true, + "license": "MIT", + "dependencies": { + "js-beautify": "^1.14.9", + "vue-component-type-helpers": "^3.0.0" + }, + "peerDependencies": { + "@vue/compiler-dom": "3.x", + "@vue/server-renderer": "3.x", + "vue": "3.x" + }, + "peerDependenciesMeta": { + "@vue/server-renderer": { + "optional": true + } + } + }, + "node_modules/@vueuse/core": { + "version": "14.3.0", + "license": "MIT", + "dependencies": { + "@types/web-bluetooth": "^0.0.21", + "@vueuse/metadata": "14.3.0", + "@vueuse/shared": "14.3.0" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "vue": "^3.5.0" + } + }, + "node_modules/@vueuse/metadata": { + "version": "14.3.0", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@vueuse/shared": { + "version": "14.3.0", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "vue": "^3.5.0" + } + }, + "node_modules/abbrev": { + "version": "2.0.0", + "dev": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/abort-controller": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, + "node_modules/acorn": { + "version": "8.16.0", + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-import-attributes": { + "version": "1.9.5", + "license": "MIT", + "peerDependencies": { + "acorn": "^8" + } + }, + "node_modules/agent-base": { + "version": "7.1.4", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/alien-signals": { + "version": "1.0.13", + "devOptional": true, + "license": "MIT" + }, + "node_modules/ansi-regex": { + "version": "6.2.2", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/ansis": { + "version": "4.3.0", + "license": "ISC", + "engines": { + "node": ">=14" + } + }, + "node_modules/anymatch": { + "version": "3.1.3", + "license": "ISC", + "dependencies": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/anymatch/node_modules/picomatch": { + "version": "2.3.2", + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/apache-arrow": { + "version": "17.0.0", + "license": "Apache-2.0", + "dependencies": { + "@swc/helpers": "^0.5.11", + "@types/command-line-args": "^5.2.3", + "@types/command-line-usage": "^5.0.4", + "@types/node": "^20.13.0", + "command-line-args": "^5.2.1", + "command-line-usage": "^7.0.1", + "flatbuffers": "^24.3.25", + "json-bignum": "^0.0.3", + "tslib": "^2.6.2" + }, + "bin": { + "arrow2csv": "bin/arrow2csv.cjs" + } + }, + "node_modules/archiver": { + "version": "7.0.1", + "license": "MIT", + "dependencies": { + "archiver-utils": "^5.0.2", + "async": "^3.2.4", + "buffer-crc32": "^1.0.0", + "readable-stream": "^4.0.0", + "readdir-glob": "^1.1.2", + "tar-stream": "^3.0.0", + "zip-stream": "^6.0.1" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/archiver-utils": { + "version": "5.0.2", + "license": "MIT", + "dependencies": { + "glob": "^10.0.0", + "graceful-fs": "^4.2.0", + "is-stream": "^2.0.1", + "lazystream": "^1.0.0", + "lodash": "^4.17.15", + "normalize-path": "^3.0.0", + "readable-stream": "^4.0.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/archiver-utils/node_modules/is-stream": { + "version": "2.0.1", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/aria-hidden": { + "version": "1.2.6", + "license": "MIT", + "dependencies": { + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/array-back": { + "version": "3.1.0", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/assertion-error": { + "version": "2.0.1", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + } + }, + "node_modules/ast-kit": { + "version": "2.2.0", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.28.5", + "pathe": "^2.0.3" + }, + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "url": "https://github.com/sponsors/sxzz" + } + }, + "node_modules/ast-walker-scope": { + "version": "0.8.3", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.28.4", + "ast-kit": "^2.1.3" + }, + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "url": "https://github.com/sponsors/sxzz" + } + }, + "node_modules/async": { + "version": "3.2.6", + "license": "MIT" + }, + "node_modules/async-sema": { + "version": "3.1.1", + "license": "MIT" + }, + "node_modules/autoprefixer": { + "version": "10.5.0", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/autoprefixer" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.2", + "caniuse-lite": "^1.0.30001787", + "fraction.js": "^5.3.4", + "picocolors": "^1.1.1", + "postcss-value-parser": "^4.2.0" + }, + "bin": { + "autoprefixer": "bin/autoprefixer" + }, + "engines": { + "node": "^10 || ^12 || >=14" + }, + "peerDependencies": { + "postcss": "^8.1.0" + } + }, + "node_modules/avsdf-base": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "layout-base": "^1.0.0" + } + }, + "node_modules/b4a": { + "version": "1.8.1", + "license": "Apache-2.0", + "peerDependencies": { + "react-native-b4a": "*" + }, + "peerDependenciesMeta": { + "react-native-b4a": { + "optional": true + } + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "license": "MIT" + }, + "node_modules/bare-events": { + "version": "2.8.3", + "license": "Apache-2.0", + "peerDependencies": { + "bare-abort-controller": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + } + } + }, + "node_modules/bare-fs": { + "version": "4.7.1", + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.5.4", + "bare-path": "^3.0.0", + "bare-stream": "^2.6.4", + "bare-url": "^2.2.2", + "fast-fifo": "^1.3.2" + }, + "engines": { + "bare": ">=1.16.0" + }, + "peerDependencies": { + "bare-buffer": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + } + } + }, + "node_modules/bare-os": { + "version": "3.9.1", + "license": "Apache-2.0", + "engines": { + "bare": ">=1.14.0" + } + }, + "node_modules/bare-path": { + "version": "3.0.0", + "license": "Apache-2.0", + "dependencies": { + "bare-os": "^3.0.1" + } + }, + "node_modules/bare-stream": { + "version": "2.13.1", + "license": "Apache-2.0", + "dependencies": { + "streamx": "^2.25.0", + "teex": "^1.0.1" + }, + "peerDependencies": { + "bare-abort-controller": "*", + "bare-buffer": "*", + "bare-events": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + }, + "bare-buffer": { + "optional": true + }, + "bare-events": { + "optional": true + } + } + }, + "node_modules/bare-url": { + "version": "2.4.3", + "license": "Apache-2.0", + "dependencies": { + "bare-path": "^3.0.0" + } + }, + "node_modules/base64-js": { + "version": "1.5.1", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/baseline-browser-mapping": { + "version": "2.10.31", + "license": "Apache-2.0", + "bin": { + "baseline-browser-mapping": "dist/cli.cjs" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/bidi-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz", + "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==", + "license": "MIT", + "dependencies": { + "require-from-string": "^2.0.2" + } + }, + "node_modules/binary-search-bounds": { + "version": "2.0.5", + "license": "MIT" + }, + "node_modules/bindings": { + "version": "1.5.0", + "license": "MIT", + "dependencies": { + "file-uri-to-path": "1.0.0" + } + }, + "node_modules/birpc": { + "version": "4.0.0", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/boolbase": { + "version": "1.0.0", + "license": "ISC" + }, + "node_modules/brace-expansion": { + "version": "2.1.0", + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "license": "MIT", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/browserslist": { + "version": "4.28.2", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "baseline-browser-mapping": "^2.10.12", + "caniuse-lite": "^1.0.30001782", + "electron-to-chromium": "^1.5.328", + "node-releases": "^2.0.36", + "update-browserslist-db": "^1.2.3" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + } + }, + "node_modules/buffer": { + "version": "6.0.3", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.2.1" + } + }, + "node_modules/buffer-crc32": { + "version": "1.0.0", + "license": "MIT", + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/buffer-from": { + "version": "1.1.2", + "license": "MIT" + }, + "node_modules/bundle-name": { + "version": "4.1.0", + "license": "MIT", + "dependencies": { + "run-applescript": "^7.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/c12": { + "version": "3.3.4", + "license": "MIT", + "dependencies": { + "chokidar": "^5.0.0", + "confbox": "^0.2.4", + "defu": "^6.1.6", + "dotenv": "^17.3.1", + "exsolve": "^1.0.8", + "giget": "^3.2.0", + "jiti": "^2.6.1", + "ohash": "^2.0.11", + "pathe": "^2.0.3", + "perfect-debounce": "^2.1.0", + "pkg-types": "^2.3.0", + "rc9": "^3.0.1" + }, + "peerDependencies": { + "magicast": "*" + }, + "peerDependenciesMeta": { + "magicast": { + "optional": true + } + } + }, + "node_modules/cac": { + "version": "6.7.14", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/caniuse-api": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "browserslist": "^4.0.0", + "caniuse-lite": "^1.0.0", + "lodash.memoize": "^4.1.2", + "lodash.uniq": "^4.5.0" + } + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001793", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "CC-BY-4.0" + }, + "node_modules/chai": { + "version": "5.3.3", + "dev": true, + "license": "MIT", + "dependencies": { + "assertion-error": "^2.0.1", + "check-error": "^2.1.1", + "deep-eql": "^5.0.1", + "loupe": "^3.1.0", + "pathval": "^2.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/chalk": { + "version": "4.1.2", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/chalk-template": { + "version": "0.4.0", + "license": "MIT", + "dependencies": { + "chalk": "^4.1.2" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/chalk-template?sponsor=1" + } + }, + "node_modules/check-error": { + "version": "2.1.3", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 16" + } + }, + "node_modules/chokidar": { + "version": "5.0.0", + "license": "MIT", + "dependencies": { + "readdirp": "^5.0.0" + }, + "engines": { + "node": ">= 20.19.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + } + }, + "node_modules/chownr": { + "version": "3.0.0", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/citty": { + "version": "0.2.2", + "license": "MIT" + }, + "node_modules/class-variance-authority": { + "version": "0.7.1", + "license": "Apache-2.0", + "dependencies": { + "clsx": "^2.1.1" + }, + "funding": { + "url": "https://polar.sh/cva" + } + }, + "node_modules/cliui": { + "version": "9.0.1", + "license": "ISC", + "dependencies": { + "string-width": "^7.2.0", + "strip-ansi": "^7.1.0", + "wrap-ansi": "^9.0.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/cliui/node_modules/ansi-styles": { + "version": "6.2.3", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/cliui/node_modules/emoji-regex": { + "version": "10.6.0", + "license": "MIT" + }, + "node_modules/cliui/node_modules/string-width": { + "version": "7.2.0", + "license": "MIT", + "dependencies": { + "emoji-regex": "^10.3.0", + "get-east-asian-width": "^1.0.0", + "strip-ansi": "^7.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/cliui/node_modules/wrap-ansi": { + "version": "9.0.2", + "license": "MIT", + "dependencies": { + "ansi-styles": "^6.2.1", + "string-width": "^7.0.0", + "strip-ansi": "^7.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/clsx": { + "version": "2.1.1", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/cluster-key-slot": { + "version": "1.1.2", + "license": "Apache-2.0", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/codemirror": { + "version": "6.0.2", + "license": "MIT", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/commands": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/lint": "^6.0.0", + "@codemirror/search": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.0.0" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "license": "MIT" + }, + "node_modules/command-line-args": { + "version": "5.2.1", + "license": "MIT", + "dependencies": { + "array-back": "^3.1.0", + "find-replace": "^3.0.0", + "lodash.camelcase": "^4.3.0", + "typical": "^4.0.0" + }, + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/command-line-usage": { + "version": "7.0.4", + "license": "MIT", + "dependencies": { + "array-back": "^6.2.2", + "chalk-template": "^0.4.0", + "table-layout": "^4.1.1", + "typical": "^7.3.0" + }, + "engines": { + "node": ">=12.20.0" + } + }, + "node_modules/command-line-usage/node_modules/array-back": { + "version": "6.2.3", + "license": "MIT", + "engines": { + "node": ">=12.17" + } + }, + "node_modules/command-line-usage/node_modules/typical": { + "version": "7.3.0", + "license": "MIT", + "engines": { + "node": ">=12.17" + } + }, + "node_modules/commander": { + "version": "7.2.0", + "license": "MIT", + "engines": { + "node": ">= 10" + } + }, + "node_modules/commondir": { + "version": "1.0.1", + "license": "MIT" + }, + "node_modules/compatx": { + "version": "0.2.0", + "license": "MIT" + }, + "node_modules/compress-commons": { + "version": "6.0.2", + "license": "MIT", + "dependencies": { + "crc-32": "^1.2.0", + "crc32-stream": "^6.0.0", + "is-stream": "^2.0.1", + "normalize-path": "^3.0.0", + "readable-stream": "^4.0.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/compress-commons/node_modules/is-stream": { + "version": "2.0.1", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/confbox": { + "version": "0.2.4", + "license": "MIT" + }, + "node_modules/config-chain": { + "version": "1.1.13", + "dev": true, + "license": "MIT", + "dependencies": { + "ini": "^1.3.4", + "proto-list": "~1.2.1" + } + }, + "node_modules/consola": { + "version": "3.4.2", + "license": "MIT", + "engines": { + "node": "^14.18.0 || >=16.10.0" + } + }, + "node_modules/convert-source-map": { + "version": "2.0.0", + "license": "MIT" + }, + "node_modules/cookie-es": { + "version": "3.1.1", + "license": "MIT" + }, + "node_modules/core-util-is": { + "version": "1.0.3", + "license": "MIT" + }, + "node_modules/cose-base": { + "version": "2.2.0", + "license": "MIT", + "dependencies": { + "layout-base": "^2.0.0" + } + }, + "node_modules/cose-base/node_modules/layout-base": { + "version": "2.0.1", + "license": "MIT" + }, + "node_modules/crc-32": { + "version": "1.2.2", + "license": "Apache-2.0", + "bin": { + "crc32": "bin/crc32.njs" + }, + "engines": { + "node": ">=0.8" + } + }, + "node_modules/crc32-stream": { + "version": "6.0.0", + "license": "MIT", + "dependencies": { + "crc-32": "^1.2.0", + "readable-stream": "^4.0.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/crelt": { + "version": "1.0.6", + "license": "MIT" + }, + "node_modules/croner": { + "version": "10.0.1", + "funding": [ + { + "type": "other", + "url": "https://paypal.me/hexagonpp" + }, + { + "type": "github", + "url": "https://github.com/sponsors/hexagon" + } + ], + "license": "MIT", + "engines": { + "node": ">=18.0" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/crossws": { + "version": "0.3.5", + "license": "MIT", + "dependencies": { + "uncrypto": "^0.1.3" + } + }, + "node_modules/css-select": { + "version": "5.2.2", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-tree": { + "version": "3.2.1", + "license": "MIT", + "dependencies": { + "mdn-data": "2.27.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/cssesc": { + "version": "3.0.0", + "license": "MIT", + "bin": { + "cssesc": "bin/cssesc" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/cssnano": { + "version": "8.0.1", + "license": "MIT", + "dependencies": { + "cssnano-preset-default": "^8.0.1", + "lilconfig": "^3.1.3" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/cssnano" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/cssnano-preset-default": { + "version": "8.0.1", + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.2", + "cssnano-utils": "^6.0.0", + "postcss-calc": "^10.1.1", + "postcss-colormin": "^8.0.0", + "postcss-convert-values": "^8.0.0", + "postcss-discard-comments": "^8.0.0", + "postcss-discard-duplicates": "^8.0.0", + "postcss-discard-empty": "^8.0.0", + "postcss-discard-overridden": "^8.0.0", + "postcss-merge-longhand": "^8.0.0", + "postcss-merge-rules": "^8.0.0", + "postcss-minify-font-values": "^8.0.0", + "postcss-minify-gradients": "^8.0.0", + "postcss-minify-params": "^8.0.0", + "postcss-minify-selectors": "^8.0.1", + "postcss-normalize-charset": "^8.0.0", + "postcss-normalize-display-values": "^8.0.0", + "postcss-normalize-positions": "^8.0.0", + "postcss-normalize-repeat-style": "^8.0.0", + "postcss-normalize-string": "^8.0.0", + "postcss-normalize-timing-functions": "^8.0.0", + "postcss-normalize-unicode": "^8.0.0", + "postcss-normalize-url": "^8.0.0", + "postcss-normalize-whitespace": "^8.0.0", + "postcss-ordered-values": "^8.0.0", + "postcss-reduce-initial": "^8.0.0", + "postcss-reduce-transforms": "^8.0.0", + "postcss-svgo": "^8.0.0", + "postcss-unique-selectors": "^8.0.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/cssnano-utils": { + "version": "6.0.0", + "license": "MIT", + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/csso": { + "version": "5.0.5", + "license": "MIT", + "dependencies": { + "css-tree": "~2.2.0" + }, + "engines": { + "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0", + "npm": ">=7.0.0" + } + }, + "node_modules/csso/node_modules/css-tree": { + "version": "2.2.1", + "license": "MIT", + "dependencies": { + "mdn-data": "2.0.28", + "source-map-js": "^1.0.1" + }, + "engines": { + "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0", + "npm": ">=7.0.0" + } + }, + "node_modules/csso/node_modules/mdn-data": { + "version": "2.0.28", + "license": "CC0-1.0" + }, + "node_modules/cssstyle": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-6.2.0.tgz", + "integrity": "sha512-Fm5NvhYathRnXNVndkUsCCuR63DCLVVwGOOwQw782coXFi5HhkXdu289l59HlXZBawsyNccXfWRYvLzcDCdDig==", + "license": "MIT", + "dependencies": { + "@asamuzakjp/css-color": "^5.0.1", + "@csstools/css-syntax-patches-for-csstree": "^1.0.28", + "css-tree": "^3.1.0", + "lru-cache": "^11.2.6" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/cssstyle/node_modules/lru-cache": { + "version": "11.5.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.5.1.tgz", + "integrity": "sha512-RPimw/7aMdv2oqRrxKwvZXcPfwBrn/JZ2xYcY9Hus/6LaS3VOAKVWKWgNLCFSiOm1ESXinjsDlidVU7JlnCN2A==", + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/csstype": { + "version": "3.2.3", + "license": "MIT" + }, + "node_modules/cytoscape": { + "version": "3.33.4", + "license": "MIT", + "engines": { + "node": ">=0.10" + } + }, + "node_modules/cytoscape-cise": { + "version": "2.0.1", + "license": "MIT", + "dependencies": { + "avsdf-base": "^1.0.0", + "cose-base": "^2.2.0" + }, + "peerDependencies": { + "cytoscape": "^3.2.0" + } + }, + "node_modules/cytoscape-fcose": { + "version": "2.2.0", + "license": "MIT", + "dependencies": { + "cose-base": "^2.2.0" + }, + "peerDependencies": { + "cytoscape": "^3.2.0" + } + }, + "node_modules/d3": { + "version": "7.9.0", + "license": "ISC", + "dependencies": { + "d3-array": "3", + "d3-axis": "3", + "d3-brush": "3", + "d3-chord": "3", + "d3-color": "3", + "d3-contour": "4", + "d3-delaunay": "6", + "d3-dispatch": "3", + "d3-drag": "3", + "d3-dsv": "3", + "d3-ease": "3", + "d3-fetch": "3", + "d3-force": "3", + "d3-format": "3", + "d3-geo": "3", + "d3-hierarchy": "3", + "d3-interpolate": "3", + "d3-path": "3", + "d3-polygon": "3", + "d3-quadtree": "3", + "d3-random": "3", + "d3-scale": "4", + "d3-scale-chromatic": "3", + "d3-selection": "3", + "d3-shape": "3", + "d3-time": "3", + "d3-time-format": "4", + "d3-timer": "3", + "d3-transition": "3", + "d3-zoom": "3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-array": { + "version": "3.2.4", + "license": "ISC", + "dependencies": { + "internmap": "1 - 2" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-axis": { + "version": "3.0.0", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-brush": { + "version": "3.0.0", + "license": "ISC", + "dependencies": { + "d3-dispatch": "1 - 3", + "d3-drag": "2 - 3", + "d3-interpolate": "1 - 3", + "d3-selection": "3", + "d3-transition": "3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-chord": { + "version": "3.0.1", + "license": "ISC", + "dependencies": { + "d3-path": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-color": { + "version": "3.1.0", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-contour": { + "version": "4.0.2", + "license": "ISC", + "dependencies": { + "d3-array": "^3.2.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-delaunay": { + "version": "6.0.4", + "license": "ISC", + "dependencies": { + "delaunator": "5" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-dispatch": { + "version": "3.0.1", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-drag": { + "version": "3.0.0", + "license": "ISC", + "dependencies": { + "d3-dispatch": "1 - 3", + "d3-selection": "3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-dsv": { + "version": "3.0.1", + "license": "ISC", + "dependencies": { + "commander": "7", + "iconv-lite": "0.6", + "rw": "1" + }, + "bin": { + "csv2json": "bin/dsv2json.js", + "csv2tsv": "bin/dsv2dsv.js", + "dsv2dsv": "bin/dsv2dsv.js", + "dsv2json": "bin/dsv2json.js", + "json2csv": "bin/json2dsv.js", + "json2dsv": "bin/json2dsv.js", + "json2tsv": "bin/json2dsv.js", + "tsv2csv": "bin/dsv2dsv.js", + "tsv2json": "bin/dsv2json.js" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-ease": { + "version": "3.0.1", + "license": "BSD-3-Clause", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-fetch": { + "version": "3.0.1", + "license": "ISC", + "dependencies": { + "d3-dsv": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-force": { + "version": "3.0.0", + "license": "ISC", + "dependencies": { + "d3-dispatch": "1 - 3", + "d3-quadtree": "1 - 3", + "d3-timer": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-format": { + "version": "3.1.2", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-geo": { + "version": "3.1.1", + "license": "ISC", + "dependencies": { + "d3-array": "2.5.0 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-hierarchy": { + "version": "3.1.2", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-interpolate": { + "version": "3.0.1", + "license": "ISC", + "dependencies": { + "d3-color": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-path": { + "version": "3.1.0", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-polygon": { + "version": "3.0.1", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-quadtree": { + "version": "3.0.1", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-random": { + "version": "3.0.1", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-scale": { + "version": "4.0.2", + "license": "ISC", + "dependencies": { + "d3-array": "2.10.0 - 3", + "d3-format": "1 - 3", + "d3-interpolate": "1.2.0 - 3", + "d3-time": "2.1.1 - 3", + "d3-time-format": "2 - 4" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-scale-chromatic": { + "version": "3.1.0", + "license": "ISC", + "dependencies": { + "d3-color": "1 - 3", + "d3-interpolate": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-selection": { + "version": "3.0.0", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-shape": { + "version": "3.2.0", + "license": "ISC", + "dependencies": { + "d3-path": "^3.1.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time": { + "version": "3.1.0", + "license": "ISC", + "dependencies": { + "d3-array": "2 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-time-format": { + "version": "4.1.0", + "license": "ISC", + "dependencies": { + "d3-time": "1 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-timer": { + "version": "3.0.1", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/d3-transition": { + "version": "3.0.1", + "license": "ISC", + "dependencies": { + "d3-color": "1 - 3", + "d3-dispatch": "1 - 3", + "d3-ease": "1 - 3", + "d3-interpolate": "1 - 3", + "d3-timer": "1 - 3" + }, + "engines": { + "node": ">=12" + }, + "peerDependencies": { + "d3-selection": "2 - 3" + } + }, + "node_modules/d3-zoom": { + "version": "3.0.0", + "license": "ISC", + "dependencies": { + "d3-dispatch": "1 - 3", + "d3-drag": "2 - 3", + "d3-interpolate": "1 - 3", + "d3-selection": "2 - 3", + "d3-transition": "2 - 3" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/data-urls": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-7.0.0.tgz", + "integrity": "sha512-23XHcCF+coGYevirZceTVD7NdJOqVn+49IHyxgszm+JIiHLoB2TkmPtsYkNWT1pvRSGkc35L6NHs0yHkN2SumA==", + "license": "MIT", + "dependencies": { + "whatwg-mimetype": "^5.0.0", + "whatwg-url": "^16.0.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + } + }, + "node_modules/data-urls/node_modules/tr46": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz", + "integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==", + "license": "MIT", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/data-urls/node_modules/webidl-conversions": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz", + "integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=20" + } + }, + "node_modules/data-urls/node_modules/whatwg-mimetype": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz", + "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==", + "license": "MIT", + "engines": { + "node": ">=20" + } + }, + "node_modules/data-urls/node_modules/whatwg-url": { + "version": "16.0.1", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-16.0.1.tgz", + "integrity": "sha512-1to4zXBxmXHV3IiSSEInrreIlu02vUOvrhxJJH5vcxYTBDAx51cqZiKdyTxlecdKNSjj8EcxGBxNf6Vg+945gw==", + "license": "MIT", + "dependencies": { + "@exodus/bytes": "^1.11.0", + "tr46": "^6.0.0", + "webidl-conversions": "^8.0.1" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + } + }, + "node_modules/db0": { + "version": "0.3.4", + "license": "MIT", + "peerDependencies": { + "@electric-sql/pglite": "*", + "@libsql/client": "*", + "better-sqlite3": "*", + "drizzle-orm": "*", + "mysql2": "*", + "sqlite3": "*" + }, + "peerDependenciesMeta": { + "@electric-sql/pglite": { + "optional": true + }, + "@libsql/client": { + "optional": true + }, + "better-sqlite3": { + "optional": true + }, + "drizzle-orm": { + "optional": true + }, + "mysql2": { + "optional": true + }, + "sqlite3": { + "optional": true + } + } + }, + "node_modules/de-indent": { + "version": "1.0.2", + "devOptional": true, + "license": "MIT" + }, + "node_modules/debug": { + "version": "4.4.3", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/decimal.js": { + "version": "10.6.0", + "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz", + "integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==", + "license": "MIT" + }, + "node_modules/deep-eql": { + "version": "5.0.2", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/deepmerge": { + "version": "4.3.1", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/default-browser": { + "version": "5.5.0", + "license": "MIT", + "dependencies": { + "bundle-name": "^4.1.0", + "default-browser-id": "^5.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/default-browser-id": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/define-lazy-prop": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/defu": { + "version": "6.1.7", + "license": "MIT" + }, + "node_modules/delaunator": { + "version": "5.1.0", + "license": "ISC", + "dependencies": { + "robust-predicates": "^3.0.2" + } + }, + "node_modules/denque": { + "version": "2.1.0", + "license": "Apache-2.0", + "engines": { + "node": ">=0.10" + } + }, + "node_modules/depd": { + "version": "2.0.0", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/destr": { + "version": "2.0.5", + "license": "MIT" + }, + "node_modules/detect-libc": { + "version": "2.1.2", + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, + "node_modules/devalue": { + "version": "5.8.1", + "license": "MIT" + }, + "node_modules/diff": { + "version": "8.0.4", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/dom-serializer/node_modules/entities": { + "version": "4.5.0", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/dompurify": { + "version": "3.4.5", + "license": "(MPL-2.0 OR Apache-2.0)", + "optionalDependencies": { + "@types/trusted-types": "^2.0.7" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/dot-prop": { + "version": "10.1.0", + "license": "MIT", + "dependencies": { + "type-fest": "^5.0.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/dotenv": { + "version": "17.4.2", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/duplexer": { + "version": "0.1.2", + "license": "MIT" + }, + "node_modules/eastasianwidth": { + "version": "0.2.0", + "license": "MIT" + }, + "node_modules/editorconfig": { + "version": "1.0.7", + "dev": true, + "license": "MIT", + "dependencies": { + "@one-ini/wasm": "0.1.1", + "commander": "^10.0.0", + "minimatch": "^9.0.1", + "semver": "^7.5.3" + }, + "bin": { + "editorconfig": "bin/editorconfig" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/editorconfig/node_modules/commander": { + "version": "10.0.1", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14" + } + }, + "node_modules/ee-first": { + "version": "1.1.1", + "license": "MIT" + }, + "node_modules/electron-to-chromium": { + "version": "1.5.360", + "license": "ISC" + }, + "node_modules/emoji-regex": { + "version": "9.2.2", + "license": "MIT" + }, + "node_modules/encodeurl": { + "version": "2.0.0", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/enhanced-resolve": { + "version": "5.21.6", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.4", + "tapable": "^2.3.3" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/entities": { + "version": "7.0.1", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/error-stack-parser-es": { + "version": "1.0.5", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/errx": { + "version": "0.1.0", + "license": "MIT" + }, + "node_modules/es-errors": { + "version": "1.3.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-module-lexer": { + "version": "2.1.0", + "license": "MIT" + }, + "node_modules/esbuild": { + "version": "0.28.0", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.28.0", + "@esbuild/android-arm": "0.28.0", + "@esbuild/android-arm64": "0.28.0", + "@esbuild/android-x64": "0.28.0", + "@esbuild/darwin-arm64": "0.28.0", + "@esbuild/darwin-x64": "0.28.0", + "@esbuild/freebsd-arm64": "0.28.0", + "@esbuild/freebsd-x64": "0.28.0", + "@esbuild/linux-arm": "0.28.0", + "@esbuild/linux-arm64": "0.28.0", + "@esbuild/linux-ia32": "0.28.0", + "@esbuild/linux-loong64": "0.28.0", + "@esbuild/linux-mips64el": "0.28.0", + "@esbuild/linux-ppc64": "0.28.0", + "@esbuild/linux-riscv64": "0.28.0", + "@esbuild/linux-s390x": "0.28.0", + "@esbuild/linux-x64": "0.28.0", + "@esbuild/netbsd-arm64": "0.28.0", + "@esbuild/netbsd-x64": "0.28.0", + "@esbuild/openbsd-arm64": "0.28.0", + "@esbuild/openbsd-x64": "0.28.0", + "@esbuild/openharmony-arm64": "0.28.0", + "@esbuild/sunos-x64": "0.28.0", + "@esbuild/win32-arm64": "0.28.0", + "@esbuild/win32-ia32": "0.28.0", + "@esbuild/win32-x64": "0.28.0" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/escape-html": { + "version": "1.0.3", + "license": "MIT" + }, + "node_modules/escape-string-regexp": { + "version": "5.0.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/estree-walker": { + "version": "2.0.2", + "license": "MIT" + }, + "node_modules/etag": { + "version": "1.8.1", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/events": { + "version": "3.3.0", + "license": "MIT", + "engines": { + "node": ">=0.8.x" + } + }, + "node_modules/events-universal": { + "version": "1.0.1", + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.7.0" + } + }, + "node_modules/execa": { + "version": "8.0.1", + "license": "MIT", + "dependencies": { + "cross-spawn": "^7.0.3", + "get-stream": "^8.0.1", + "human-signals": "^5.0.0", + "is-stream": "^3.0.0", + "merge-stream": "^2.0.0", + "npm-run-path": "^5.1.0", + "onetime": "^6.0.0", + "signal-exit": "^4.1.0", + "strip-final-newline": "^3.0.0" + }, + "engines": { + "node": ">=16.17" + }, + "funding": { + "url": "https://github.com/sindresorhus/execa?sponsor=1" + } + }, + "node_modules/expect-type": { + "version": "1.3.0", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/exsolve": { + "version": "1.0.8", + "license": "MIT" + }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "license": "MIT" + }, + "node_modules/fast-glob": { + "version": "3.3.3", + "license": "MIT", + "dependencies": { + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.2", + "merge2": "^1.3.0", + "micromatch": "^4.0.8" + }, + "engines": { + "node": ">=8.6.0" + } + }, + "node_modules/fast-npm-meta": { + "version": "1.5.1", + "license": "MIT", + "bin": { + "fast-npm-meta": "dist/cli.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/fast-string-truncated-width": { + "version": "3.0.3", + "license": "MIT" + }, + "node_modules/fast-string-width": { + "version": "3.0.2", + "license": "MIT", + "dependencies": { + "fast-string-truncated-width": "^3.0.2" + } + }, + "node_modules/fast-wrap-ansi": { + "version": "0.2.2", + "license": "MIT", + "dependencies": { + "fast-string-width": "^3.0.2" + } + }, + "node_modules/fastq": { + "version": "1.20.1", + "license": "ISC", + "dependencies": { + "reusify": "^1.0.4" + } + }, + "node_modules/fdir": { + "version": "6.5.0", + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/file-uri-to-path": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/fill-range": { + "version": "7.1.1", + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/find-replace": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "array-back": "^3.0.1" + }, + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/flatbuffers": { + "version": "24.12.23", + "license": "Apache-2.0" + }, + "node_modules/foreground-child": { + "version": "3.3.1", + "license": "ISC", + "dependencies": { + "cross-spawn": "^7.0.6", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/fraction.js": { + "version": "5.3.4", + "license": "MIT", + "engines": { + "node": "*" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/rawify" + } + }, + "node_modules/fresh": { + "version": "2.0.0", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/fuse.js": { + "version": "7.3.0", + "license": "Apache-2.0", + "engines": { + "node": ">=10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/krisk" + } + }, + "node_modules/fzf": { + "version": "0.5.2", + "license": "BSD-3-Clause" + }, + "node_modules/gensync": { + "version": "1.0.0-beta.2", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-east-asian-width": { + "version": "1.6.0", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/get-port-please": { + "version": "3.2.0", + "license": "MIT" + }, + "node_modules/get-stream": { + "version": "8.0.1", + "license": "MIT", + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/giget": { + "version": "3.2.0", + "license": "MIT", + "bin": { + "giget": "dist/cli.mjs" + } + }, + "node_modules/gl-bench": { + "version": "1.0.42", + "license": "MIT" + }, + "node_modules/gl-matrix": { + "version": "3.4.4", + "license": "MIT" + }, + "node_modules/glob": { + "version": "10.5.0", + "license": "ISC", + "dependencies": { + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/glob-parent": { + "version": "5.1.2", + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/global-directory": { + "version": "4.0.1", + "license": "MIT", + "dependencies": { + "ini": "4.1.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/global-directory/node_modules/ini": { + "version": "4.1.1", + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/globby": { + "version": "16.2.0", + "license": "MIT", + "dependencies": { + "@sindresorhus/merge-streams": "^4.0.0", + "fast-glob": "^3.3.3", + "ignore": "^7.0.5", + "is-path-inside": "^4.0.0", + "slash": "^5.1.0", + "unicorn-magic": "^0.4.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "license": "ISC" + }, + "node_modules/gzip-size": { + "version": "7.0.0", + "license": "MIT", + "dependencies": { + "duplexer": "^0.1.2" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/h3": { + "version": "1.15.11", + "license": "MIT", + "dependencies": { + "cookie-es": "^1.2.3", + "crossws": "^0.3.5", + "defu": "^6.1.6", + "destr": "^2.0.5", + "iron-webcrypto": "^1.2.1", + "node-mock-http": "^1.0.4", + "radix3": "^1.1.2", + "ufo": "^1.6.3", + "uncrypto": "^0.1.3" + } + }, + "node_modules/h3/node_modules/cookie-es": { + "version": "1.2.3", + "license": "MIT" + }, + "node_modules/happy-dom": { + "version": "15.11.7", + "dev": true, + "license": "MIT", + "dependencies": { + "entities": "^4.5.0", + "webidl-conversions": "^7.0.0", + "whatwg-mimetype": "^3.0.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/happy-dom/node_modules/entities": { + "version": "4.5.0", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/has-flag": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/hasown": { + "version": "2.0.3", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/he": { + "version": "1.2.0", + "devOptional": true, + "license": "MIT", + "bin": { + "he": "bin/he" + } + }, + "node_modules/hookable": { + "version": "6.1.1", + "license": "MIT" + }, + "node_modules/html-encoding-sniffer": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz", + "integrity": "sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg==", + "license": "MIT", + "dependencies": { + "@exodus/bytes": "^1.6.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + } + }, + "node_modules/http-errors": { + "version": "2.0.1", + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/http-shutdown": { + "version": "1.2.2", + "license": "MIT", + "engines": { + "iojs": ">= 1.0.0", + "node": ">= 0.12.0" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/httpxy": { + "version": "0.5.3", + "license": "MIT" + }, + "node_modules/human-signals": { + "version": "5.0.0", + "license": "Apache-2.0", + "engines": { + "node": ">=16.17.0" + } + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/ieee754": { + "version": "1.2.1", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/ignore": { + "version": "7.0.5", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/image-meta": { + "version": "0.2.2", + "license": "MIT" + }, + "node_modules/impound": { + "version": "1.1.5", + "license": "MIT", + "dependencies": { + "@jridgewell/trace-mapping": "^0.3.31", + "es-module-lexer": "^2.0.0", + "pathe": "^2.0.3", + "unplugin": "^3.0.0", + "unplugin-utils": "^0.3.1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "license": "ISC" + }, + "node_modules/ini": { + "version": "1.3.8", + "dev": true, + "license": "ISC" + }, + "node_modules/internmap": { + "version": "2.0.3", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/interval-tree-1d": { + "version": "1.0.4", + "license": "MIT", + "dependencies": { + "binary-search-bounds": "^2.0.0" + } + }, + "node_modules/ioredis": { + "version": "5.10.1", + "license": "MIT", + "dependencies": { + "@ioredis/commands": "1.5.1", + "cluster-key-slot": "^1.1.0", + "debug": "^4.3.4", + "denque": "^2.1.0", + "lodash.defaults": "^4.2.0", + "lodash.isarguments": "^3.1.0", + "redis-errors": "^1.2.0", + "redis-parser": "^3.0.0", + "standard-as-callback": "^2.1.0" + }, + "engines": { + "node": ">=12.22.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/ioredis" + } + }, + "node_modules/iron-webcrypto": { + "version": "1.2.1", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/brc-dd" + } + }, + "node_modules/is-core-module": { + "version": "2.16.2", + "license": "MIT", + "dependencies": { + "hasown": "^2.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/is-docker": { + "version": "3.0.0", + "license": "MIT", + "bin": { + "is-docker": "cli.js" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-in-ssh": { + "version": "1.0.0", + "license": "MIT", + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-inside-container": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "is-docker": "^3.0.0" + }, + "bin": { + "is-inside-container": "cli.js" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-installed-globally": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "global-directory": "^4.0.1", + "is-path-inside": "^4.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-module": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/is-number": { + "version": "7.0.0", + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/is-path-inside": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-potential-custom-element-name": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", + "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==", + "license": "MIT" + }, + "node_modules/is-reference": { + "version": "1.2.1", + "license": "MIT", + "dependencies": { + "@types/estree": "*" + } + }, + "node_modules/is-stream": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/is-wsl": { + "version": "3.1.1", + "license": "MIT", + "dependencies": { + "is-inside-container": "^1.0.0" + }, + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/isarray": { + "version": "1.0.0", + "license": "MIT" + }, + "node_modules/isexe": { + "version": "2.0.0", + "license": "ISC" + }, + "node_modules/isoformat": { + "version": "0.2.1", + "license": "ISC" + }, + "node_modules/isomorphic-dompurify": { + "version": "2.36.0", + "resolved": "https://registry.npmjs.org/isomorphic-dompurify/-/isomorphic-dompurify-2.36.0.tgz", + "integrity": "sha512-E8YkGyPY3a/U5s0WOoc8Ok+3SWL/33yn2IHCoxCFLBUUPVy9WGa++akJZFxQCcJIhI+UvYhbrbnTIFQkHKZbgA==", + "license": "MIT", + "dependencies": { + "dompurify": "^3.3.1", + "jsdom": "^28.0.0" + }, + "engines": { + "node": ">=20.19.5" + } + }, + "node_modules/jackspeak": { + "version": "3.4.3", + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "optionalDependencies": { + "@pkgjs/parseargs": "^0.11.0" + } + }, + "node_modules/jiti": { + "version": "2.7.0", + "license": "MIT", + "bin": { + "jiti": "lib/jiti-cli.mjs" + } + }, + "node_modules/js-beautify": { + "version": "1.15.4", + "dev": true, + "license": "MIT", + "dependencies": { + "config-chain": "^1.1.13", + "editorconfig": "^1.0.4", + "glob": "^10.4.2", + "js-cookie": "^3.0.5", + "nopt": "^7.2.1" + }, + "bin": { + "css-beautify": "js/bin/css-beautify.js", + "html-beautify": "js/bin/html-beautify.js", + "js-beautify": "js/bin/js-beautify.js" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/js-cookie": { + "version": "3.0.7", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20" + } + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "license": "MIT" + }, + "node_modules/jsdom": { + "version": "28.1.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-28.1.0.tgz", + "integrity": "sha512-0+MoQNYyr2rBHqO1xilltfDjV9G7ymYGlAUazgcDLQaUf8JDHbuGwsxN6U9qWaElZ4w1B2r7yEGIL3GdeW3Rug==", + "license": "MIT", + "dependencies": { + "@acemir/cssom": "^0.9.31", + "@asamuzakjp/dom-selector": "^6.8.1", + "@bramus/specificity": "^2.4.2", + "@exodus/bytes": "^1.11.0", + "cssstyle": "^6.0.1", + "data-urls": "^7.0.0", + "decimal.js": "^10.6.0", + "html-encoding-sniffer": "^6.0.0", + "http-proxy-agent": "^7.0.2", + "https-proxy-agent": "^7.0.6", + "is-potential-custom-element-name": "^1.0.1", + "parse5": "^8.0.0", + "saxes": "^6.0.0", + "symbol-tree": "^3.2.4", + "tough-cookie": "^6.0.0", + "undici": "^7.21.0", + "w3c-xmlserializer": "^5.0.0", + "webidl-conversions": "^8.0.1", + "whatwg-mimetype": "^5.0.0", + "whatwg-url": "^16.0.0", + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + }, + "peerDependencies": { + "canvas": "^3.0.0" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, + "node_modules/jsdom/node_modules/tr46": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz", + "integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==", + "license": "MIT", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/jsdom/node_modules/webidl-conversions": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz", + "integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=20" + } + }, + "node_modules/jsdom/node_modules/whatwg-mimetype": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz", + "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==", + "license": "MIT", + "engines": { + "node": ">=20" + } + }, + "node_modules/jsdom/node_modules/whatwg-url": { + "version": "16.0.1", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-16.0.1.tgz", + "integrity": "sha512-1to4zXBxmXHV3IiSSEInrreIlu02vUOvrhxJJH5vcxYTBDAx51cqZiKdyTxlecdKNSjj8EcxGBxNf6Vg+945gw==", + "license": "MIT", + "dependencies": { + "@exodus/bytes": "^1.11.0", + "tr46": "^6.0.0", + "webidl-conversions": "^8.0.1" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + } + }, + "node_modules/jsesc": { + "version": "3.1.0", + "license": "MIT", + "bin": { + "jsesc": "bin/jsesc" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/json-bignum": { + "version": "0.0.3", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/json5": { + "version": "2.2.3", + "license": "MIT", + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/jwt-decode": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/kleur": { + "version": "4.1.5", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/klona": { + "version": "2.0.6", + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/knitwork": { + "version": "1.3.0", + "license": "MIT" + }, + "node_modules/launch-editor": { + "version": "2.13.2", + "license": "MIT", + "dependencies": { + "picocolors": "^1.1.1", + "shell-quote": "^1.8.3" + } + }, + "node_modules/layout-base": { + "version": "1.0.2", + "license": "MIT" + }, + "node_modules/lazystream": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "readable-stream": "^2.0.5" + }, + "engines": { + "node": ">= 0.6.3" + } + }, + "node_modules/lazystream/node_modules/readable-stream": { + "version": "2.3.8", + "license": "MIT", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/lazystream/node_modules/safe-buffer": { + "version": "5.1.2", + "license": "MIT" + }, + "node_modules/lazystream/node_modules/string_decoder": { + "version": "1.1.1", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, + "node_modules/lightningcss": { + "version": "1.32.0", + "license": "MPL-2.0", + "dependencies": { + "detect-libc": "^2.0.3" + }, + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "lightningcss-android-arm64": "1.32.0", + "lightningcss-darwin-arm64": "1.32.0", + "lightningcss-darwin-x64": "1.32.0", + "lightningcss-freebsd-x64": "1.32.0", + "lightningcss-linux-arm-gnueabihf": "1.32.0", + "lightningcss-linux-arm64-gnu": "1.32.0", + "lightningcss-linux-arm64-musl": "1.32.0", + "lightningcss-linux-x64-gnu": "1.32.0", + "lightningcss-linux-x64-musl": "1.32.0", + "lightningcss-win32-arm64-msvc": "1.32.0", + "lightningcss-win32-x64-msvc": "1.32.0" + } + }, + "node_modules/lightningcss-linux-x64-gnu": { + "version": "1.32.0", + "cpu": [ + "x64" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lilconfig": { + "version": "3.1.3", + "license": "MIT", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antonk52" + } + }, + "node_modules/listhen": { + "version": "1.10.0", + "license": "MIT", + "dependencies": { + "@parcel/watcher": "^2.5.6", + "@parcel/watcher-wasm": "^2.5.6", + "citty": "^0.2.2", + "consola": "^3.4.2", + "crossws": ">=0.2.0 <0.5.0", + "defu": "^6.1.7", + "get-port-please": "^3.2.0", + "h3": "^1.15.11", + "http-shutdown": "^1.2.2", + "jiti": "^2.6.1", + "mlly": "^1.8.2", + "node-forge": "^1.4.0", + "pathe": "^2.0.3", + "std-env": "^4.1.0", + "tinyclip": "^0.1.12", + "ufo": "^1.6.4", + "untun": "^0.1.3", + "uqr": "^0.1.3" + }, + "bin": { + "listen": "bin/listhen.mjs", + "listhen": "bin/listhen.mjs" + } + }, + "node_modules/local-pkg": { + "version": "1.2.1", + "license": "MIT", + "dependencies": { + "mlly": "^1.7.4", + "pkg-types": "^2.3.0", + "quansync": "^0.2.11" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/lodash": { + "version": "4.18.1", + "license": "MIT" + }, + "node_modules/lodash.camelcase": { + "version": "4.3.0", + "license": "MIT" + }, + "node_modules/lodash.defaults": { + "version": "4.2.0", + "license": "MIT" + }, + "node_modules/lodash.isarguments": { + "version": "3.1.0", + "license": "MIT" + }, + "node_modules/lodash.memoize": { + "version": "4.1.2", + "license": "MIT" + }, + "node_modules/lodash.uniq": { + "version": "4.5.0", + "license": "MIT" + }, + "node_modules/loupe": { + "version": "3.2.1", + "dev": true, + "license": "MIT" + }, + "node_modules/lru-cache": { + "version": "5.1.1", + "license": "ISC", + "dependencies": { + "yallist": "^3.0.2" + } + }, + "node_modules/lucide-vue-next": { + "version": "0.563.0", + "license": "ISC", + "peerDependencies": { + "vue": ">=3.0.1" + } + }, + "node_modules/magic-regexp": { + "version": "0.11.0", + "license": "MIT", + "dependencies": { + "magic-string": "^0.30.21", + "regexp-tree": "^0.1.27", + "type-level-regexp": "~0.1.17", + "unplugin": "^3.0.0" + } + }, + "node_modules/magic-string": { + "version": "0.30.21", + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/magic-string-ast": { + "version": "1.0.3", + "license": "MIT", + "dependencies": { + "magic-string": "^0.30.19" + }, + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "url": "https://github.com/sponsors/sxzz" + } + }, + "node_modules/magicast": { + "version": "0.5.3", + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.29.3", + "@babel/types": "^7.29.0", + "source-map-js": "^1.2.1" + } + }, + "node_modules/marked": { + "version": "15.0.12", + "resolved": "https://registry.npmjs.org/marked/-/marked-15.0.12.tgz", + "integrity": "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==", + "license": "MIT", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/mdn-data": { + "version": "2.27.1", + "license": "CC0-1.0" + }, + "node_modules/merge-stream": { + "version": "2.0.0", + "license": "MIT" + }, + "node_modules/merge2": { + "version": "1.4.1", + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/micromatch": { + "version": "4.0.8", + "license": "MIT", + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/micromatch/node_modules/picomatch": { + "version": "2.3.2", + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/mime": { + "version": "4.1.0", + "funding": [ + "https://github.com/sponsors/broofa" + ], + "license": "MIT", + "bin": { + "mime": "bin/cli.js" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/mime-db": { + "version": "1.54.0", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "3.0.2", + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/mimic-fn": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/minimatch": { + "version": "9.0.9", + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.2" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/minipass": { + "version": "7.1.3", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/minizlib": { + "version": "3.1.0", + "license": "MIT", + "dependencies": { + "minipass": "^7.1.2" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/mlly": { + "version": "1.8.2", + "license": "MIT", + "dependencies": { + "acorn": "^8.16.0", + "pathe": "^2.0.3", + "pkg-types": "^1.3.1", + "ufo": "^1.6.3" + } + }, + "node_modules/mlly/node_modules/confbox": { + "version": "0.1.8", + "license": "MIT" + }, + "node_modules/mlly/node_modules/pkg-types": { + "version": "1.3.1", + "license": "MIT", + "dependencies": { + "confbox": "^0.1.8", + "mlly": "^1.7.4", + "pathe": "^2.0.1" + } + }, + "node_modules/mocked-exports": { + "version": "0.1.1", + "license": "MIT" + }, + "node_modules/mrmime": { + "version": "2.0.1", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "license": "MIT" + }, + "node_modules/muggle-string": { + "version": "0.4.1", + "license": "MIT" + }, + "node_modules/nanoid": { + "version": "3.3.12", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/nanotar": { + "version": "0.3.0", + "license": "MIT" + }, + "node_modules/nitropack": { + "version": "2.13.4", + "license": "MIT", + "dependencies": { + "@cloudflare/kv-asset-handler": "^0.4.2", + "@rollup/plugin-alias": "^6.0.0", + "@rollup/plugin-commonjs": "^29.0.2", + "@rollup/plugin-inject": "^5.0.5", + "@rollup/plugin-json": "^6.1.0", + "@rollup/plugin-node-resolve": "^16.0.3", + "@rollup/plugin-replace": "^6.0.3", + "@rollup/plugin-terser": "^1.0.0", + "@vercel/nft": "^1.5.0", + "archiver": "^7.0.1", + "c12": "^3.3.4", + "chokidar": "^5.0.0", + "citty": "^0.2.2", + "compatx": "^0.2.0", + "confbox": "^0.2.4", + "consola": "^3.4.2", + "cookie-es": "^2.0.1", + "croner": "^10.0.1", + "crossws": "^0.3.5", + "db0": "^0.3.4", + "defu": "^6.1.7", + "destr": "^2.0.5", + "dot-prop": "^10.1.0", + "esbuild": "^0.28.0", + "escape-string-regexp": "^5.0.0", + "etag": "^1.8.1", + "exsolve": "^1.0.8", + "globby": "^16.2.0", + "gzip-size": "^7.0.0", + "h3": "^1.15.11", + "hookable": "^5.5.3", + "httpxy": "^0.5.1", + "ioredis": "^5.10.1", + "jiti": "^2.6.1", + "klona": "^2.0.6", + "knitwork": "^1.3.0", + "listhen": "^1.9.1", + "magic-string": "^0.30.21", + "magicast": "^0.5.2", + "mime": "^4.1.0", + "mlly": "^1.8.2", + "node-fetch-native": "^1.6.7", + "node-mock-http": "^1.0.4", + "ofetch": "^1.5.1", + "ohash": "^2.0.11", + "pathe": "^2.0.3", + "perfect-debounce": "^2.1.0", + "pkg-types": "^2.3.1", + "pretty-bytes": "^7.1.0", + "radix3": "^1.1.2", + "rollup": "^4.60.2", + "rollup-plugin-visualizer": "^7.0.1", + "scule": "^1.3.0", + "semver": "^7.7.4", + "serve-placeholder": "^2.0.2", + "serve-static": "^2.2.1", + "source-map": "^0.7.6", + "std-env": "^4.1.0", + "ufo": "^1.6.4", + "ultrahtml": "^1.6.0", + "uncrypto": "^0.1.3", + "unctx": "^2.5.0", + "unenv": "2.0.0-rc.24", + "unimport": "^6.2.0", + "unplugin-utils": "^0.3.1", + "unstorage": "^1.17.5", + "untyped": "^2.0.0", + "unwasm": "^0.5.3", + "youch": "^4.1.1", + "youch-core": "^0.3.3" + }, + "bin": { + "nitro": "dist/cli/index.mjs", + "nitropack": "dist/cli/index.mjs" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "peerDependencies": { + "xml2js": "^0.6.2" + }, + "peerDependenciesMeta": { + "xml2js": { + "optional": true + } + } + }, + "node_modules/nitropack/node_modules/cookie-es": { + "version": "2.0.1", + "license": "MIT" + }, + "node_modules/nitropack/node_modules/hookable": { + "version": "5.5.3", + "license": "MIT" + }, + "node_modules/node-addon-api": { + "version": "7.1.1", + "license": "MIT" + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/node-fetch-native": { + "version": "1.6.7", + "license": "MIT" + }, + "node_modules/node-forge": { + "version": "1.4.0", + "license": "(BSD-3-Clause OR GPL-2.0)", + "engines": { + "node": ">= 6.13.0" + } + }, + "node_modules/node-gyp-build": { + "version": "4.8.4", + "license": "MIT", + "bin": { + "node-gyp-build": "bin.js", + "node-gyp-build-optional": "optional.js", + "node-gyp-build-test": "build-test.js" + } + }, + "node_modules/node-mock-http": { + "version": "1.0.4", + "license": "MIT" + }, + "node_modules/node-releases": { + "version": "2.0.45", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/nopt": { + "version": "7.2.1", + "dev": true, + "license": "ISC", + "dependencies": { + "abbrev": "^2.0.0" + }, + "bin": { + "nopt": "bin/nopt.js" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/normalize-path": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/npm-run-path": { + "version": "5.3.0", + "license": "MIT", + "dependencies": { + "path-key": "^4.0.0" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/npm-run-path/node_modules/path-key": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/nth-check": { + "version": "2.1.1", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, + "node_modules/nuxt": { + "version": "4.4.6", + "license": "MIT", + "dependencies": { + "@dxup/nuxt": "^0.4.1", + "@nuxt/cli": "^3.35.2", + "@nuxt/devtools": "^3.2.4", + "@nuxt/kit": "4.4.6", + "@nuxt/nitro-server": "4.4.6", + "@nuxt/schema": "4.4.6", + "@nuxt/telemetry": "^2.8.0", + "@nuxt/vite-builder": "4.4.6", + "@unhead/vue": "^2.1.15", + "@vue/shared": "^3.5.34", + "chokidar": "^5.0.0", + "compatx": "^0.2.0", + "consola": "^3.4.2", + "cookie-es": "^3.1.1", + "defu": "^6.1.7", + "devalue": "^5.8.1", + "errx": "^0.1.0", + "escape-string-regexp": "^5.0.0", + "exsolve": "^1.0.8", + "hookable": "^6.1.1", + "ignore": "^7.0.5", + "impound": "^1.1.5", + "jiti": "^2.7.0", + "klona": "^2.0.6", + "knitwork": "^1.3.0", + "magic-string": "^0.30.21", + "mlly": "^1.8.2", + "nanotar": "^0.3.0", + "nypm": "^0.6.6", + "ofetch": "^1.5.1", + "ohash": "^2.0.11", + "on-change": "^6.0.2", + "oxc-minify": "^0.131.0", + "oxc-parser": "^0.131.0", + "oxc-transform": "^0.131.0", + "oxc-walker": "^1.0.0", + "pathe": "^2.0.3", + "perfect-debounce": "^2.1.0", + "picomatch": "^4.0.4", + "pkg-types": "^2.3.1", + "rou3": "^0.8.1", + "scule": "^1.3.0", + "semver": "^7.8.0", + "std-env": "^4.1.0", + "tinyglobby": "^0.2.16", + "ufo": "^1.6.4", + "ultrahtml": "^1.6.0", + "uncrypto": "^0.1.3", + "unctx": "^2.5.0", + "unimport": "^6.3.0", + "unplugin": "^3.0.0", + "unrouting": "^0.1.7", + "untyped": "^2.0.0", + "vue": "^3.5.34", + "vue-router": "^5.0.7" + }, + "bin": { + "nuxi": "bin/nuxt.mjs", + "nuxt": "bin/nuxt.mjs" + }, + "engines": { + "node": "^22.12.0 || ^24.11.0 || >=26.0.0" + }, + "peerDependencies": { + "@parcel/watcher": "^2.1.0", + "@types/node": ">=18.12.0" + }, + "peerDependenciesMeta": { + "@parcel/watcher": { + "optional": true + }, + "@types/node": { + "optional": true + } + } + }, + "node_modules/nuxt/node_modules/@babel/generator": { + "version": "8.0.0-rc.5", + "license": "MIT", + "dependencies": { + "@babel/parser": "^8.0.0-rc.5", + "@babel/types": "^8.0.0-rc.5", + "@jridgewell/gen-mapping": "^0.3.12", + "@jridgewell/trace-mapping": "^0.3.28", + "@types/jsesc": "^2.5.0", + "jsesc": "^3.0.2" + }, + "engines": { + "node": "^22.18.0 || >=24.11.0" + } + }, + "node_modules/nuxt/node_modules/@babel/helper-string-parser": { + "version": "8.0.0-rc.5", + "license": "MIT", + "engines": { + "node": "^22.18.0 || >=24.11.0" + } + }, + "node_modules/nuxt/node_modules/@babel/helper-validator-identifier": { + "version": "8.0.0-rc.5", + "license": "MIT", + "engines": { + "node": "^22.18.0 || >=24.11.0" + } + }, + "node_modules/nuxt/node_modules/@babel/parser": { + "version": "8.0.0-rc.5", + "license": "MIT", + "dependencies": { + "@babel/types": "^8.0.0-rc.5" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": "^22.18.0 || >=24.11.0" + } + }, + "node_modules/nuxt/node_modules/@babel/types": { + "version": "8.0.0-rc.5", + "license": "MIT", + "dependencies": { + "@babel/helper-string-parser": "^8.0.0-rc.5", + "@babel/helper-validator-identifier": "^8.0.0-rc.5" + }, + "engines": { + "node": "^22.18.0 || >=24.11.0" + } + }, + "node_modules/nuxt/node_modules/@vue/devtools-api": { + "version": "8.1.2", + "license": "MIT", + "dependencies": { + "@vue/devtools-kit": "^8.1.2" + } + }, + "node_modules/nuxt/node_modules/vue-router": { + "version": "5.0.7", + "license": "MIT", + "dependencies": { + "@babel/generator": "^8.0.0-rc.4", + "@vue-macros/common": "^3.1.1", + "@vue/devtools-api": "^8.1.1", + "ast-walker-scope": "^0.8.3", + "chokidar": "^5.0.0", + "json5": "^2.2.3", + "local-pkg": "^1.1.2", + "magic-string": "^0.30.21", + "mlly": "^1.8.0", + "muggle-string": "^0.4.1", + "pathe": "^2.0.3", + "picomatch": "^4.0.3", + "scule": "^1.3.0", + "tinyglobby": "^0.2.15", + "unplugin": "^3.0.0", + "unplugin-utils": "^0.3.1", + "yaml": "^2.8.2" + }, + "funding": { + "url": "https://github.com/sponsors/posva" + }, + "peerDependencies": { + "@pinia/colada": ">=0.21.2", + "@vue/compiler-sfc": "^3.5.34", + "pinia": "^3.0.4", + "vue": "^3.5.34" + }, + "peerDependenciesMeta": { + "@pinia/colada": { + "optional": true + }, + "@vue/compiler-sfc": { + "optional": true + }, + "pinia": { + "optional": true + } + } + }, + "node_modules/nypm": { + "version": "0.6.6", + "license": "MIT", + "dependencies": { + "citty": "^0.2.2", + "pathe": "^2.0.3", + "tinyexec": "^1.1.1" + }, + "bin": { + "nypm": "dist/cli.mjs" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/obug": { + "version": "2.1.1", + "funding": [ + "https://github.com/sponsors/sxzz", + "https://opencollective.com/debug" + ], + "license": "MIT" + }, + "node_modules/ofetch": { + "version": "1.5.1", + "license": "MIT", + "dependencies": { + "destr": "^2.0.5", + "node-fetch-native": "^1.6.7", + "ufo": "^1.6.1" + } + }, + "node_modules/ohash": { + "version": "2.0.11", + "license": "MIT" + }, + "node_modules/oidc-client-ts": { + "version": "3.5.0", + "license": "Apache-2.0", + "dependencies": { + "jwt-decode": "^4.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/on-change": { + "version": "6.0.2", + "license": "MIT", + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sindresorhus/on-change?sponsor=1" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/onetime": { + "version": "6.0.0", + "license": "MIT", + "dependencies": { + "mimic-fn": "^4.0.0" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/open": { + "version": "11.0.0", + "license": "MIT", + "dependencies": { + "default-browser": "^5.4.0", + "define-lazy-prop": "^3.0.0", + "is-in-ssh": "^1.0.0", + "is-inside-container": "^1.0.0", + "powershell-utils": "^0.1.0", + "wsl-utils": "^0.3.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/oxc-minify": { + "version": "0.131.0", + "license": "MIT", + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/sponsors/Boshen" + }, + "optionalDependencies": { + "@oxc-minify/binding-android-arm-eabi": "0.131.0", + "@oxc-minify/binding-android-arm64": "0.131.0", + "@oxc-minify/binding-darwin-arm64": "0.131.0", + "@oxc-minify/binding-darwin-x64": "0.131.0", + "@oxc-minify/binding-freebsd-x64": "0.131.0", + "@oxc-minify/binding-linux-arm-gnueabihf": "0.131.0", + "@oxc-minify/binding-linux-arm-musleabihf": "0.131.0", + "@oxc-minify/binding-linux-arm64-gnu": "0.131.0", + "@oxc-minify/binding-linux-arm64-musl": "0.131.0", + "@oxc-minify/binding-linux-ppc64-gnu": "0.131.0", + "@oxc-minify/binding-linux-riscv64-gnu": "0.131.0", + "@oxc-minify/binding-linux-riscv64-musl": "0.131.0", + "@oxc-minify/binding-linux-s390x-gnu": "0.131.0", + "@oxc-minify/binding-linux-x64-gnu": "0.131.0", + "@oxc-minify/binding-linux-x64-musl": "0.131.0", + "@oxc-minify/binding-openharmony-arm64": "0.131.0", + "@oxc-minify/binding-wasm32-wasi": "0.131.0", + "@oxc-minify/binding-win32-arm64-msvc": "0.131.0", + "@oxc-minify/binding-win32-ia32-msvc": "0.131.0", + "@oxc-minify/binding-win32-x64-msvc": "0.131.0" + } + }, + "node_modules/oxc-parser": { + "version": "0.131.0", + "license": "MIT", + "dependencies": { + "@oxc-project/types": "^0.131.0" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/sponsors/Boshen" + }, + "optionalDependencies": { + "@oxc-parser/binding-android-arm-eabi": "0.131.0", + "@oxc-parser/binding-android-arm64": "0.131.0", + "@oxc-parser/binding-darwin-arm64": "0.131.0", + "@oxc-parser/binding-darwin-x64": "0.131.0", + "@oxc-parser/binding-freebsd-x64": "0.131.0", + "@oxc-parser/binding-linux-arm-gnueabihf": "0.131.0", + "@oxc-parser/binding-linux-arm-musleabihf": "0.131.0", + "@oxc-parser/binding-linux-arm64-gnu": "0.131.0", + "@oxc-parser/binding-linux-arm64-musl": "0.131.0", + "@oxc-parser/binding-linux-ppc64-gnu": "0.131.0", + "@oxc-parser/binding-linux-riscv64-gnu": "0.131.0", + "@oxc-parser/binding-linux-riscv64-musl": "0.131.0", + "@oxc-parser/binding-linux-s390x-gnu": "0.131.0", + "@oxc-parser/binding-linux-x64-gnu": "0.131.0", + "@oxc-parser/binding-linux-x64-musl": "0.131.0", + "@oxc-parser/binding-openharmony-arm64": "0.131.0", + "@oxc-parser/binding-wasm32-wasi": "0.131.0", + "@oxc-parser/binding-win32-arm64-msvc": "0.131.0", + "@oxc-parser/binding-win32-ia32-msvc": "0.131.0", + "@oxc-parser/binding-win32-x64-msvc": "0.131.0" + } + }, + "node_modules/oxc-transform": { + "version": "0.131.0", + "license": "MIT", + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/sponsors/Boshen" + }, + "optionalDependencies": { + "@oxc-transform/binding-android-arm-eabi": "0.131.0", + "@oxc-transform/binding-android-arm64": "0.131.0", + "@oxc-transform/binding-darwin-arm64": "0.131.0", + "@oxc-transform/binding-darwin-x64": "0.131.0", + "@oxc-transform/binding-freebsd-x64": "0.131.0", + "@oxc-transform/binding-linux-arm-gnueabihf": "0.131.0", + "@oxc-transform/binding-linux-arm-musleabihf": "0.131.0", + "@oxc-transform/binding-linux-arm64-gnu": "0.131.0", + "@oxc-transform/binding-linux-arm64-musl": "0.131.0", + "@oxc-transform/binding-linux-ppc64-gnu": "0.131.0", + "@oxc-transform/binding-linux-riscv64-gnu": "0.131.0", + "@oxc-transform/binding-linux-riscv64-musl": "0.131.0", + "@oxc-transform/binding-linux-s390x-gnu": "0.131.0", + "@oxc-transform/binding-linux-x64-gnu": "0.131.0", + "@oxc-transform/binding-linux-x64-musl": "0.131.0", + "@oxc-transform/binding-openharmony-arm64": "0.131.0", + "@oxc-transform/binding-wasm32-wasi": "0.131.0", + "@oxc-transform/binding-win32-arm64-msvc": "0.131.0", + "@oxc-transform/binding-win32-ia32-msvc": "0.131.0", + "@oxc-transform/binding-win32-x64-msvc": "0.131.0" + } + }, + "node_modules/oxc-walker": { + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "magic-regexp": "^0.11.0" + }, + "peerDependencies": { + "oxc-parser": ">=0.98.0", + "rolldown": ">=1.0.0" + }, + "peerDependenciesMeta": { + "oxc-parser": { + "optional": true + }, + "rolldown": { + "optional": true + } + } + }, + "node_modules/package-json-from-dist": { + "version": "1.0.1", + "license": "BlueOak-1.0.0" + }, + "node_modules/parse5": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.1.tgz", + "integrity": "sha512-z1e/HMG90obSGeidlli3hj7cbocou0/wa5HacvI3ASx34PecNjNQeaHNo5WIZpWofN9kgkqV1q5YvXe3F0FoPw==", + "license": "MIT", + "dependencies": { + "entities": "^8.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-8.0.0.tgz", + "integrity": "sha512-zwfzJecQ/Uej6tusMqwAqU/6KL2XaB2VZ2Jg54Je6ahNBGNH6Ek6g3jjNCF0fG9EWQKGZNddNjU5F1ZQn/sBnA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/parseurl": { + "version": "1.3.3", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-browserify": { + "version": "1.0.1", + "devOptional": true, + "license": "MIT" + }, + "node_modules/path-key": { + "version": "3.1.1", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-parse": { + "version": "1.0.7", + "license": "MIT" + }, + "node_modules/path-scurry": { + "version": "1.11.1", + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^10.2.0", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" + }, + "engines": { + "node": ">=16 || 14 >=14.18" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/path-scurry/node_modules/lru-cache": { + "version": "10.4.3", + "license": "ISC" + }, + "node_modules/pathe": { + "version": "2.0.3", + "license": "MIT" + }, + "node_modules/pathval": { + "version": "2.0.1", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14.16" + } + }, + "node_modules/perfect-debounce": { + "version": "2.1.0", + "license": "MIT" + }, + "node_modules/picocolors": { + "version": "1.1.1", + "license": "ISC" + }, + "node_modules/picomatch": { + "version": "4.0.4", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/pkg-types": { + "version": "2.3.1", + "license": "MIT", + "dependencies": { + "confbox": "^0.2.4", + "exsolve": "^1.0.8", + "pathe": "^2.0.3" + } + }, + "node_modules/postcss": { + "version": "8.5.15", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "nanoid": "^3.3.12", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/postcss-calc": { + "version": "10.1.1", + "license": "MIT", + "dependencies": { + "postcss-selector-parser": "^7.0.0", + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^18.12 || ^20.9 || >=22.0" + }, + "peerDependencies": { + "postcss": "^8.4.38" + } + }, + "node_modules/postcss-colormin": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "@colordx/core": "^5.4.3", + "browserslist": "^4.28.2", + "caniuse-api": "^3.0.0", + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-convert-values": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.2", + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-discard-comments": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-selector-parser": "^7.1.1" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-discard-duplicates": { + "version": "8.0.0", + "license": "MIT", + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-discard-empty": { + "version": "8.0.0", + "license": "MIT", + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-discard-overridden": { + "version": "8.0.0", + "license": "MIT", + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-merge-longhand": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0", + "stylehacks": "^8.0.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-merge-rules": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.2", + "caniuse-api": "^3.0.0", + "cssnano-utils": "^6.0.0", + "postcss-selector-parser": "^7.1.1" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-minify-font-values": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-minify-gradients": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "@colordx/core": "^5.4.3", + "cssnano-utils": "^6.0.0", + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-minify-params": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.2", + "cssnano-utils": "^6.0.0", + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-minify-selectors": { + "version": "8.0.1", + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.1", + "caniuse-api": "^3.0.0", + "cssesc": "^3.0.0", + "postcss-selector-parser": "^7.1.1" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-normalize-charset": { + "version": "8.0.0", + "license": "MIT", + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-normalize-display-values": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-normalize-positions": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-normalize-repeat-style": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-normalize-string": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-normalize-timing-functions": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-normalize-unicode": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.2", + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-normalize-url": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-normalize-whitespace": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-ordered-values": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "cssnano-utils": "^6.0.0", + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-reduce-initial": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.2", + "caniuse-api": "^3.0.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-reduce-transforms": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-selector-parser": { + "version": "7.1.1", + "license": "MIT", + "dependencies": { + "cssesc": "^3.0.0", + "util-deprecate": "^1.0.2" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/postcss-svgo": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-value-parser": "^4.2.0", + "svgo": "^4.0.1" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-unique-selectors": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "postcss-selector-parser": "^7.1.1" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/postcss-value-parser": { + "version": "4.2.0", + "license": "MIT" + }, + "node_modules/powershell-utils": { + "version": "0.1.0", + "license": "MIT", + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/pretty-bytes": { + "version": "7.1.0", + "license": "MIT", + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/process": { + "version": "0.11.10", + "license": "MIT", + "engines": { + "node": ">= 0.6.0" + } + }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "license": "MIT" + }, + "node_modules/proper-lockfile": { + "version": "4.1.2", + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.4", + "retry": "^0.12.0", + "signal-exit": "^3.0.2" + } + }, + "node_modules/proper-lockfile/node_modules/signal-exit": { + "version": "3.0.7", + "license": "ISC" + }, + "node_modules/proto-list": { + "version": "1.2.4", + "dev": true, + "license": "ISC" + }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/quansync": { + "version": "0.2.11", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/antfu" + }, + { + "type": "individual", + "url": "https://github.com/sponsors/sxzz" + } + ], + "license": "MIT" + }, + "node_modules/queue-microtask": { + "version": "1.2.3", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/radix3": { + "version": "1.1.2", + "license": "MIT" + }, + "node_modules/random": { + "version": "4.1.0", + "license": "MIT", + "dependencies": { + "seedrandom": "^3.0.5" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/rc9": { + "version": "3.0.1", + "license": "MIT", + "dependencies": { + "defu": "^6.1.6", + "destr": "^2.0.5" + } + }, + "node_modules/readable-stream": { + "version": "4.7.0", + "license": "MIT", + "dependencies": { + "abort-controller": "^3.0.0", + "buffer": "^6.0.3", + "events": "^3.3.0", + "process": "^0.11.10", + "string_decoder": "^1.3.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + } + }, + "node_modules/readdir-glob": { + "version": "1.1.3", + "license": "Apache-2.0", + "dependencies": { + "minimatch": "^5.1.0" + } + }, + "node_modules/readdir-glob/node_modules/minimatch": { + "version": "5.1.9", + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/readdirp": { + "version": "5.0.0", + "license": "MIT", + "engines": { + "node": ">= 20.19.0" + }, + "funding": { + "type": "individual", + "url": "https://paulmillr.com/funding/" + } + }, + "node_modules/redis-errors": { + "version": "1.2.0", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/redis-parser": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "redis-errors": "^1.0.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/regexp-tree": { + "version": "0.1.27", + "license": "MIT", + "bin": { + "regexp-tree": "bin/regexp-tree" + } + }, + "node_modules/regl": { + "version": "2.1.1", + "license": "MIT" + }, + "node_modules/reka-ui": { + "version": "2.9.7", + "license": "MIT", + "dependencies": { + "@floating-ui/dom": "^1.6.13", + "@floating-ui/vue": "^1.1.6", + "@internationalized/date": "^3.5.0", + "@internationalized/number": "^3.5.0", + "@tanstack/vue-virtual": "^3.12.0", + "@vueuse/core": "^14.1.0", + "@vueuse/shared": "^14.1.0", + "aria-hidden": "^1.2.4", + "defu": "^6.1.5", + "ohash": "^2.0.11" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/zernonia" + }, + "peerDependencies": { + "vue": ">= 3.4.0" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/resolve": { + "version": "1.22.12", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "is-core-module": "^2.16.1", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + }, + "bin": { + "resolve": "bin/resolve" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/resolve-from": { + "version": "5.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/retry": { + "version": "0.12.0", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/reusify": { + "version": "1.1.0", + "license": "MIT", + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/robust-predicates": { + "version": "3.0.3", + "license": "Unlicense" + }, + "node_modules/rollup": { + "version": "4.60.4", + "license": "MIT", + "dependencies": { + "@types/estree": "1.0.8" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.60.4", + "@rollup/rollup-android-arm64": "4.60.4", + "@rollup/rollup-darwin-arm64": "4.60.4", + "@rollup/rollup-darwin-x64": "4.60.4", + "@rollup/rollup-freebsd-arm64": "4.60.4", + "@rollup/rollup-freebsd-x64": "4.60.4", + "@rollup/rollup-linux-arm-gnueabihf": "4.60.4", + "@rollup/rollup-linux-arm-musleabihf": "4.60.4", + "@rollup/rollup-linux-arm64-gnu": "4.60.4", + "@rollup/rollup-linux-arm64-musl": "4.60.4", + "@rollup/rollup-linux-loong64-gnu": "4.60.4", + "@rollup/rollup-linux-loong64-musl": "4.60.4", + "@rollup/rollup-linux-ppc64-gnu": "4.60.4", + "@rollup/rollup-linux-ppc64-musl": "4.60.4", + "@rollup/rollup-linux-riscv64-gnu": "4.60.4", + "@rollup/rollup-linux-riscv64-musl": "4.60.4", + "@rollup/rollup-linux-s390x-gnu": "4.60.4", + "@rollup/rollup-linux-x64-gnu": "4.60.4", + "@rollup/rollup-linux-x64-musl": "4.60.4", + "@rollup/rollup-openbsd-x64": "4.60.4", + "@rollup/rollup-openharmony-arm64": "4.60.4", + "@rollup/rollup-win32-arm64-msvc": "4.60.4", + "@rollup/rollup-win32-ia32-msvc": "4.60.4", + "@rollup/rollup-win32-x64-gnu": "4.60.4", + "@rollup/rollup-win32-x64-msvc": "4.60.4", + "fsevents": "~2.3.2" + } + }, + "node_modules/rollup-plugin-visualizer": { + "version": "7.0.1", + "license": "MIT", + "dependencies": { + "open": "^11.0.0", + "picomatch": "^4.0.2", + "source-map": "^0.7.4", + "yargs": "^18.0.0" + }, + "bin": { + "rollup-plugin-visualizer": "dist/bin/cli.js" + }, + "engines": { + "node": ">=22" + }, + "peerDependencies": { + "rolldown": "1.x || ^1.0.0-beta || ^1.0.0-rc", + "rollup": "2.x || 3.x || 4.x" + }, + "peerDependenciesMeta": { + "rolldown": { + "optional": true + }, + "rollup": { + "optional": true + } + } + }, + "node_modules/rollup/node_modules/@types/estree": { + "version": "1.0.8", + "license": "MIT" + }, + "node_modules/rou3": { + "version": "0.8.1", + "license": "MIT" + }, + "node_modules/run-applescript": { + "version": "7.1.0", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/run-parallel": { + "version": "1.2.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "queue-microtask": "^1.2.2" + } + }, + "node_modules/rw": { + "version": "1.3.3", + "license": "BSD-3-Clause" + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "license": "MIT" + }, + "node_modules/sax": { + "version": "1.6.0", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=11.0.0" + } + }, + "node_modules/saxes": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", + "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", + "license": "ISC", + "dependencies": { + "xmlchars": "^2.2.0" + }, + "engines": { + "node": ">=v12.22.7" + } + }, + "node_modules/scule": { + "version": "1.3.0", + "license": "MIT" + }, + "node_modules/seedrandom": { + "version": "3.0.5", + "license": "MIT" + }, + "node_modules/semver": { + "version": "7.8.0", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/send": { + "version": "1.2.1", + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "fresh": "^2.0.0", + "http-errors": "^2.0.1", + "mime-types": "^3.0.2", + "ms": "^2.1.3", + "on-finished": "^2.4.1", + "range-parser": "^1.2.1", + "statuses": "^2.0.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/serialize-javascript": { + "version": "7.0.5", + "license": "BSD-3-Clause", + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/seroval": { + "version": "1.5.4", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, + "node_modules/serve-placeholder": { + "version": "2.0.2", + "license": "MIT", + "dependencies": { + "defu": "^6.1.4" + } + }, + "node_modules/serve-static": { + "version": "2.2.1", + "license": "MIT", + "dependencies": { + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "parseurl": "^1.3.3", + "send": "^1.2.0" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "license": "ISC" + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/shell-quote": { + "version": "1.8.3", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/siginfo": { + "version": "2.0.0", + "dev": true, + "license": "ISC" + }, + "node_modules/signal-exit": { + "version": "4.1.0", + "license": "ISC", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/simple-git": { + "version": "3.36.0", + "license": "MIT", + "dependencies": { + "@kwsites/file-exists": "^1.1.1", + "@kwsites/promise-deferred": "^1.1.1", + "@simple-git/args-pathspec": "^1.0.3", + "@simple-git/argv-parser": "^1.1.0", + "debug": "^4.4.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/steveukx/git-js?sponsor=1" + } + }, + "node_modules/sirv": { + "version": "3.0.2", + "license": "MIT", + "dependencies": { + "@polka/url": "^1.0.0-next.24", + "mrmime": "^2.0.0", + "totalist": "^3.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/sisteransi": { + "version": "1.0.5", + "license": "MIT" + }, + "node_modules/slash": { + "version": "5.1.0", + "license": "MIT", + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/smob": { + "version": "1.6.2", + "license": "MIT", + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/source-map": { + "version": "0.7.6", + "license": "BSD-3-Clause", + "engines": { + "node": ">= 12" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-support": { + "version": "0.5.21", + "license": "MIT", + "dependencies": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "node_modules/source-map-support/node_modules/source-map": { + "version": "0.6.1", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/srvx": { + "version": "0.11.15", + "license": "MIT", + "bin": { + "srvx": "bin/srvx.mjs" + }, + "engines": { + "node": ">=20.16.0" + } + }, + "node_modules/stackback": { + "version": "0.0.2", + "dev": true, + "license": "MIT" + }, + "node_modules/standard-as-callback": { + "version": "2.1.0", + "license": "MIT" + }, + "node_modules/statuses": { + "version": "2.0.2", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/std-env": { + "version": "4.1.0", + "license": "MIT" + }, + "node_modules/streamx": { + "version": "2.25.0", + "license": "MIT", + "dependencies": { + "events-universal": "^1.0.0", + "fast-fifo": "^1.3.2", + "text-decoder": "^1.1.0" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, + "node_modules/string-width": { + "version": "5.1.2", + "license": "MIT", + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/string-width-cjs": { + "name": "string-width", + "version": "4.2.3", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width-cjs/node_modules/ansi-regex": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "license": "MIT" + }, + "node_modules/string-width-cjs/node_modules/strip-ansi": { + "version": "6.0.1", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "7.2.0", + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.2.2" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/strip-ansi-cjs": { + "name": "strip-ansi", + "version": "6.0.1", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi-cjs/node_modules/ansi-regex": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-final-newline": { + "version": "3.0.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/strip-literal": { + "version": "3.1.0", + "license": "MIT", + "dependencies": { + "js-tokens": "^9.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/strip-literal/node_modules/js-tokens": { + "version": "9.0.1", + "license": "MIT" + }, + "node_modules/structured-clone-es": { + "version": "2.0.0", + "license": "ISC" + }, + "node_modules/style-mod": { + "version": "4.1.3", + "license": "MIT" + }, + "node_modules/stylehacks": { + "version": "8.0.0", + "license": "MIT", + "dependencies": { + "browserslist": "^4.28.2", + "postcss-selector-parser": "^7.1.1" + }, + "engines": { + "node": "^22.11.0 || ^24.11.0 || >=26.0" + }, + "peerDependencies": { + "postcss": "^8.5.14" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/supports-preserve-symlinks-flag": { + "version": "1.0.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/svgo": { + "version": "4.0.1", + "license": "MIT", + "dependencies": { + "commander": "^11.1.0", + "css-select": "^5.1.0", + "css-tree": "^3.0.1", + "css-what": "^6.1.0", + "csso": "^5.0.5", + "picocolors": "^1.1.1", + "sax": "^1.5.0" + }, + "bin": { + "svgo": "bin/svgo.js" + }, + "engines": { + "node": ">=16" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/svgo" + } + }, + "node_modules/svgo/node_modules/commander": { + "version": "11.1.0", + "license": "MIT", + "engines": { + "node": ">=16" + } + }, + "node_modules/symbol-tree": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", + "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==", + "license": "MIT" + }, + "node_modules/table-layout": { + "version": "4.1.1", + "license": "MIT", + "dependencies": { + "array-back": "^6.2.2", + "wordwrapjs": "^5.1.0" + }, + "engines": { + "node": ">=12.17" + } + }, + "node_modules/table-layout/node_modules/array-back": { + "version": "6.2.3", + "license": "MIT", + "engines": { + "node": ">=12.17" + } + }, + "node_modules/tagged-tag": { + "version": "1.0.0", + "license": "MIT", + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/tailwind-merge": { + "version": "3.6.0", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/dcastil" + } + }, + "node_modules/tailwindcss": { + "version": "4.3.0", + "license": "MIT" + }, + "node_modules/tapable": { + "version": "2.3.3", + "license": "MIT", + "engines": { + "node": ">=6" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + } + }, + "node_modules/tar": { + "version": "7.5.15", + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/fs-minipass": "^4.0.0", + "chownr": "^3.0.0", + "minipass": "^7.1.2", + "minizlib": "^3.1.0", + "yallist": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/tar-stream": { + "version": "3.2.0", + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "bare-fs": "^4.5.5", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, + "node_modules/tar/node_modules/yallist": { + "version": "5.0.0", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/teex": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "streamx": "^2.12.5" + } + }, + "node_modules/terser": { + "version": "5.47.1", + "license": "BSD-2-Clause", + "dependencies": { + "@jridgewell/source-map": "^0.3.3", + "acorn": "^8.15.0", + "commander": "^2.20.0", + "source-map-support": "~0.5.20" + }, + "bin": { + "terser": "bin/terser" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/terser/node_modules/commander": { + "version": "2.20.3", + "license": "MIT" + }, + "node_modules/text-decoder": { + "version": "1.2.7", + "license": "Apache-2.0", + "dependencies": { + "b4a": "^1.6.4" + } + }, + "node_modules/tiny-invariant": { + "version": "1.3.3", + "license": "MIT" + }, + "node_modules/tinybench": { + "version": "2.9.0", + "dev": true, + "license": "MIT" + }, + "node_modules/tinyclip": { + "version": "0.1.12", + "license": "MIT", + "engines": { + "node": "^16.14.0 || >= 17.3.0" + } + }, + "node_modules/tinyexec": { + "version": "1.1.2", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/tinyglobby": { + "version": "0.2.16", + "license": "MIT", + "dependencies": { + "fdir": "^6.5.0", + "picomatch": "^4.0.4" + }, + "engines": { + "node": ">=12.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/SuperchupuDev" + } + }, + "node_modules/tinypool": { + "version": "1.1.1", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.0.0 || >=20.0.0" + } + }, + "node_modules/tinyrainbow": { + "version": "1.2.0", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/tinyspy": { + "version": "3.0.2", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/tldts": { + "version": "7.4.2", + "resolved": "https://registry.npmjs.org/tldts/-/tldts-7.4.2.tgz", + "integrity": "sha512-kCwffuaH8ntKtygnWe1b4BJKWiCUH30n5KfoTr6IchcXOwR7chAOFJxFrH3vjANafUYrIA4a7SDL+nn7SiR4Sw==", + "license": "MIT", + "dependencies": { + "tldts-core": "^7.4.2" + }, + "bin": { + "tldts": "bin/cli.js" + } + }, + "node_modules/tldts-core": { + "version": "7.4.2", + "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-7.4.2.tgz", + "integrity": "sha512-nwEyF4vl4RSJjwSjBUmOSxc3BFPoIFdlRthJ6e+5v9P3bHNsoD06UjuqMUspqp7vsEZ1beaHi1km+optiE17yA==", + "license": "MIT" + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + }, + "node_modules/toidentifier": { + "version": "1.0.1", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/totalist": { + "version": "3.0.1", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/tough-cookie": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-6.0.1.tgz", + "integrity": "sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==", + "license": "BSD-3-Clause", + "dependencies": { + "tldts": "^7.0.5" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/tr46": { + "version": "0.0.3", + "license": "MIT" + }, + "node_modules/tslib": { + "version": "2.8.1", + "license": "0BSD" + }, + "node_modules/tw-animate-css": { + "version": "1.4.0", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/Wombosvideo" + } + }, + "node_modules/type-fest": { + "version": "5.6.0", + "license": "(MIT OR CC0-1.0)", + "dependencies": { + "tagged-tag": "^1.0.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/type-level-regexp": { + "version": "0.1.17", + "license": "MIT" + }, + "node_modules/typescript": { + "version": "5.9.3", + "devOptional": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/typical": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ufo": { + "version": "1.6.4", + "license": "MIT" + }, + "node_modules/ultrahtml": { + "version": "1.6.0", + "license": "MIT" + }, + "node_modules/uncrypto": { + "version": "0.1.3", + "license": "MIT" + }, + "node_modules/unctx": { + "version": "2.5.0", + "license": "MIT", + "dependencies": { + "acorn": "^8.15.0", + "estree-walker": "^3.0.3", + "magic-string": "^0.30.21", + "unplugin": "^2.3.11" + } + }, + "node_modules/unctx/node_modules/estree-walker": { + "version": "3.0.3", + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0" + } + }, + "node_modules/unctx/node_modules/unplugin": { + "version": "2.3.11", + "license": "MIT", + "dependencies": { + "@jridgewell/remapping": "^2.3.5", + "acorn": "^8.15.0", + "picomatch": "^4.0.3", + "webpack-virtual-modules": "^0.6.2" + }, + "engines": { + "node": ">=18.12.0" + } + }, + "node_modules/undici": { + "version": "7.27.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.27.0.tgz", + "integrity": "sha512-+t2Z/GwkZQDtu00813aP66ygViGtPHKhhoFZpQKpKrE+9jIgES+Zw+mFNaDWOVRKiuJjuqKHzD3B1sfGg8+ZOQ==", + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, + "node_modules/undici-types": { + "version": "6.21.0", + "license": "MIT" + }, + "node_modules/unenv": { + "version": "2.0.0-rc.24", + "license": "MIT", + "dependencies": { + "pathe": "^2.0.3" + } + }, + "node_modules/unhead": { + "version": "2.1.15", + "license": "MIT", + "dependencies": { + "hookable": "^6.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/harlan-zw" + } + }, + "node_modules/unicorn-magic": { + "version": "0.4.0", + "license": "MIT", + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/unimport": { + "version": "6.3.0", + "license": "MIT", + "dependencies": { + "acorn": "^8.16.0", + "escape-string-regexp": "^5.0.0", + "estree-walker": "^3.0.3", + "local-pkg": "^1.1.2", + "magic-string": "^0.30.21", + "mlly": "^1.8.2", + "pathe": "^2.0.3", + "picomatch": "^4.0.4", + "pkg-types": "^2.3.1", + "scule": "^1.3.0", + "strip-literal": "^3.1.0", + "tinyglobby": "^0.2.16", + "unplugin": "^3.0.0", + "unplugin-utils": "^0.3.1" + }, + "engines": { + "node": ">=18.12.0" + }, + "peerDependencies": { + "oxc-parser": "*", + "rolldown": "^1.0.0" + }, + "peerDependenciesMeta": { + "oxc-parser": { + "optional": true + }, + "rolldown": { + "optional": true + } + } + }, + "node_modules/unimport/node_modules/estree-walker": { + "version": "3.0.3", + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0" + } + }, + "node_modules/unplugin": { + "version": "3.0.0", + "license": "MIT", + "dependencies": { + "@jridgewell/remapping": "^2.3.5", + "picomatch": "^4.0.3", + "webpack-virtual-modules": "^0.6.2" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/unplugin-utils": { + "version": "0.3.1", + "license": "MIT", + "dependencies": { + "pathe": "^2.0.3", + "picomatch": "^4.0.3" + }, + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "url": "https://github.com/sponsors/sxzz" + } + }, + "node_modules/unrouting": { + "version": "0.1.7", + "license": "MIT", + "dependencies": { + "escape-string-regexp": "^5.0.0", + "ufo": "^1.6.3" + } + }, + "node_modules/unstorage": { + "version": "1.17.5", + "license": "MIT", + "dependencies": { + "anymatch": "^3.1.3", + "chokidar": "^5.0.0", + "destr": "^2.0.5", + "h3": "^1.15.10", + "lru-cache": "^11.2.7", + "node-fetch-native": "^1.6.7", + "ofetch": "^1.5.1", + "ufo": "^1.6.3" + }, + "peerDependencies": { + "@azure/app-configuration": "^1.8.0", + "@azure/cosmos": "^4.2.0", + "@azure/data-tables": "^13.3.0", + "@azure/identity": "^4.6.0", + "@azure/keyvault-secrets": "^4.9.0", + "@azure/storage-blob": "^12.26.0", + "@capacitor/preferences": "^6 || ^7 || ^8", + "@deno/kv": ">=0.9.0", + "@netlify/blobs": "^6.5.0 || ^7.0.0 || ^8.1.0 || ^9.0.0 || ^10.0.0", + "@planetscale/database": "^1.19.0", + "@upstash/redis": "^1.34.3", + "@vercel/blob": ">=0.27.1", + "@vercel/functions": "^2.2.12 || ^3.0.0", + "@vercel/kv": "^1 || ^2 || ^3", + "aws4fetch": "^1.0.20", + "db0": ">=0.2.1", + "idb-keyval": "^6.2.1", + "ioredis": "^5.4.2", + "uploadthing": "^7.4.4" + }, + "peerDependenciesMeta": { + "@azure/app-configuration": { + "optional": true + }, + "@azure/cosmos": { + "optional": true + }, + "@azure/data-tables": { + "optional": true + }, + "@azure/identity": { + "optional": true + }, + "@azure/keyvault-secrets": { + "optional": true + }, + "@azure/storage-blob": { + "optional": true + }, + "@capacitor/preferences": { + "optional": true + }, + "@deno/kv": { + "optional": true + }, + "@netlify/blobs": { + "optional": true + }, + "@planetscale/database": { + "optional": true + }, + "@upstash/redis": { + "optional": true + }, + "@vercel/blob": { + "optional": true + }, + "@vercel/functions": { + "optional": true + }, + "@vercel/kv": { + "optional": true + }, + "aws4fetch": { + "optional": true + }, + "db0": { + "optional": true + }, + "idb-keyval": { + "optional": true + }, + "ioredis": { + "optional": true + }, + "uploadthing": { + "optional": true + } + } + }, + "node_modules/unstorage/node_modules/lru-cache": { + "version": "11.5.0", + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/untun": { + "version": "0.1.3", + "license": "MIT", + "dependencies": { + "citty": "^0.1.5", + "consola": "^3.2.3", + "pathe": "^1.1.1" + }, + "bin": { + "untun": "bin/untun.mjs" + } + }, + "node_modules/untun/node_modules/citty": { + "version": "0.1.6", + "license": "MIT", + "dependencies": { + "consola": "^3.2.3" + } + }, + "node_modules/untun/node_modules/pathe": { + "version": "1.1.2", + "license": "MIT" + }, + "node_modules/untyped": { + "version": "2.0.0", + "license": "MIT", + "dependencies": { + "citty": "^0.1.6", + "defu": "^6.1.4", + "jiti": "^2.4.2", + "knitwork": "^1.2.0", + "scule": "^1.3.0" + }, + "bin": { + "untyped": "dist/cli.mjs" + } + }, + "node_modules/untyped/node_modules/citty": { + "version": "0.1.6", + "license": "MIT", + "dependencies": { + "consola": "^3.2.3" + } + }, + "node_modules/unwasm": { + "version": "0.5.3", + "license": "MIT", + "dependencies": { + "exsolve": "^1.0.8", + "knitwork": "^1.3.0", + "magic-string": "^0.30.21", + "mlly": "^1.8.0", + "pathe": "^2.0.3", + "pkg-types": "^2.3.0" + } + }, + "node_modules/update-browserslist-db": { + "version": "1.2.3", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "escalade": "^3.2.0", + "picocolors": "^1.1.1" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/uqr": { + "version": "0.1.3", + "license": "MIT" + }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "license": "MIT" + }, + "node_modules/vite": { + "version": "6.4.2", + "license": "MIT", + "peer": true, + "dependencies": { + "esbuild": "^0.25.0", + "fdir": "^6.4.4", + "picomatch": "^4.0.2", + "postcss": "^8.5.3", + "rollup": "^4.34.9", + "tinyglobby": "^0.2.13" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^18.0.0 || ^20.0.0 || >=22.0.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", + "jiti": ">=1.21.0", + "less": "*", + "lightningcss": "^1.21.0", + "sass": "*", + "sass-embedded": "*", + "stylus": "*", + "sugarss": "*", + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "jiti": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, + "node_modules/vite-dev-rpc": { + "version": "1.1.0", + "license": "MIT", + "dependencies": { + "birpc": "^2.4.0", + "vite-hot-client": "^2.1.0" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "vite": "^2.9.0 || ^3.0.0-0 || ^4.0.0-0 || ^5.0.0-0 || ^6.0.1 || ^7.0.0-0" + } + }, + "node_modules/vite-dev-rpc/node_modules/birpc": { + "version": "2.9.0", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/vite-hot-client": { + "version": "2.2.0", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "vite": "^2.6.0 || ^3.0.0 || ^4.0.0 || ^5.0.0-0 || ^6.0.0-0 || ^7.0.0-0 || ^8.0.0" + } + }, + "node_modules/vite-node": { + "version": "5.3.0", + "license": "MIT", + "dependencies": { + "cac": "^6.7.14", + "es-module-lexer": "^2.0.0", + "obug": "^2.1.1", + "pathe": "^2.0.3", + "vite": "^7.3.1" + }, + "bin": { + "vite-node": "dist/cli.mjs" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://opencollective.com/antfu" + } + }, + "node_modules/vite-node/node_modules/@esbuild/linux-x64": { + "version": "0.27.7", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/vite-node/node_modules/esbuild": { + "version": "0.27.7", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.7", + "@esbuild/android-arm": "0.27.7", + "@esbuild/android-arm64": "0.27.7", + "@esbuild/android-x64": "0.27.7", + "@esbuild/darwin-arm64": "0.27.7", + "@esbuild/darwin-x64": "0.27.7", + "@esbuild/freebsd-arm64": "0.27.7", + "@esbuild/freebsd-x64": "0.27.7", + "@esbuild/linux-arm": "0.27.7", + "@esbuild/linux-arm64": "0.27.7", + "@esbuild/linux-ia32": "0.27.7", + "@esbuild/linux-loong64": "0.27.7", + "@esbuild/linux-mips64el": "0.27.7", + "@esbuild/linux-ppc64": "0.27.7", + "@esbuild/linux-riscv64": "0.27.7", + "@esbuild/linux-s390x": "0.27.7", + "@esbuild/linux-x64": "0.27.7", + "@esbuild/netbsd-arm64": "0.27.7", + "@esbuild/netbsd-x64": "0.27.7", + "@esbuild/openbsd-arm64": "0.27.7", + "@esbuild/openbsd-x64": "0.27.7", + "@esbuild/openharmony-arm64": "0.27.7", + "@esbuild/sunos-x64": "0.27.7", + "@esbuild/win32-arm64": "0.27.7", + "@esbuild/win32-ia32": "0.27.7", + "@esbuild/win32-x64": "0.27.7" + } + }, + "node_modules/vite-node/node_modules/vite": { + "version": "7.3.3", + "license": "MIT", + "dependencies": { + "esbuild": "^0.27.0", + "fdir": "^6.5.0", + "picomatch": "^4.0.3", + "postcss": "^8.5.6", + "rollup": "^4.43.0", + "tinyglobby": "^0.2.15" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^20.19.0 || >=22.12.0", + "jiti": ">=1.21.0", + "less": "^4.0.0", + "lightningcss": "^1.21.0", + "sass": "^1.70.0", + "sass-embedded": "^1.70.0", + "stylus": ">=0.54.8", + "sugarss": "^5.0.0", + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "jiti": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, + "node_modules/vite-plugin-checker": { + "version": "0.13.0", + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.27.1", + "chokidar": "^4.0.3", + "npm-run-path": "^6.0.0", + "picocolors": "^1.1.1", + "picomatch": "^4.0.4", + "proper-lockfile": "^4.1.2", + "tiny-invariant": "^1.3.3", + "tinyglobby": "^0.2.15", + "vscode-uri": "^3.1.0" + }, + "engines": { + "node": ">=16.11" + }, + "peerDependencies": { + "@biomejs/biome": ">=1.7", + "eslint": ">=9.39.4", + "meow": "^13.2.0 || ^14.0.0", + "optionator": "^0.9.4", + "oxlint": ">=1", + "stylelint": ">=16.26.1", + "typescript": "*", + "vite": ">=5.4.21", + "vls": "*", + "vti": "*", + "vue-tsc": "~2.2.10 || ^3.0.0" + }, + "peerDependenciesMeta": { + "@biomejs/biome": { + "optional": true + }, + "eslint": { + "optional": true + }, + "meow": { + "optional": true + }, + "optionator": { + "optional": true + }, + "oxlint": { + "optional": true + }, + "stylelint": { + "optional": true + }, + "typescript": { + "optional": true + }, + "vls": { + "optional": true + }, + "vti": { + "optional": true + }, + "vue-tsc": { + "optional": true + } + } + }, + "node_modules/vite-plugin-checker/node_modules/chokidar": { + "version": "4.0.3", + "license": "MIT", + "dependencies": { + "readdirp": "^4.0.1" + }, + "engines": { + "node": ">= 14.16.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + } + }, + "node_modules/vite-plugin-checker/node_modules/npm-run-path": { + "version": "6.0.0", + "license": "MIT", + "dependencies": { + "path-key": "^4.0.0", + "unicorn-magic": "^0.3.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/vite-plugin-checker/node_modules/path-key": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/vite-plugin-checker/node_modules/readdirp": { + "version": "4.1.2", + "license": "MIT", + "engines": { + "node": ">= 14.18.0" + }, + "funding": { + "type": "individual", + "url": "https://paulmillr.com/funding/" + } + }, + "node_modules/vite-plugin-checker/node_modules/unicorn-magic": { + "version": "0.3.0", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/vite-plugin-inspect": { + "version": "11.3.3", + "license": "MIT", + "dependencies": { + "ansis": "^4.1.0", + "debug": "^4.4.1", + "error-stack-parser-es": "^1.0.5", + "ohash": "^2.0.11", + "open": "^10.2.0", + "perfect-debounce": "^2.0.0", + "sirv": "^3.0.1", + "unplugin-utils": "^0.3.0", + "vite-dev-rpc": "^1.1.0" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "vite": "^6.0.0 || ^7.0.0-0" + }, + "peerDependenciesMeta": { + "@nuxt/kit": { + "optional": true + } + } + }, + "node_modules/vite-plugin-inspect/node_modules/open": { + "version": "10.2.0", + "license": "MIT", + "dependencies": { + "default-browser": "^5.2.1", + "define-lazy-prop": "^3.0.0", + "is-inside-container": "^1.0.0", + "wsl-utils": "^0.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/vite-plugin-inspect/node_modules/wsl-utils": { + "version": "0.1.0", + "license": "MIT", + "dependencies": { + "is-wsl": "^3.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/vite-plugin-vue-tracer": { + "version": "1.4.0", + "license": "MIT", + "dependencies": { + "estree-walker": "^3.0.3", + "exsolve": "^1.0.8", + "magic-string": "^0.30.21", + "pathe": "^2.0.3", + "source-map-js": "^1.2.1" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "vite": "^6.0.0 || ^7.0.0 || ^8.0.0-0", + "vue": "^3.5.0" + } + }, + "node_modules/vite-plugin-vue-tracer/node_modules/estree-walker": { + "version": "3.0.3", + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0" + } + }, + "node_modules/vite/node_modules/@esbuild/linux-x64": { + "version": "0.25.12", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "peer": true, + "engines": { + "node": ">=18" + } + }, + "node_modules/vite/node_modules/esbuild": { + "version": "0.25.12", + "hasInstallScript": true, + "license": "MIT", + "peer": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.25.12", + "@esbuild/android-arm": "0.25.12", + "@esbuild/android-arm64": "0.25.12", + "@esbuild/android-x64": "0.25.12", + "@esbuild/darwin-arm64": "0.25.12", + "@esbuild/darwin-x64": "0.25.12", + "@esbuild/freebsd-arm64": "0.25.12", + "@esbuild/freebsd-x64": "0.25.12", + "@esbuild/linux-arm": "0.25.12", + "@esbuild/linux-arm64": "0.25.12", + "@esbuild/linux-ia32": "0.25.12", + "@esbuild/linux-loong64": "0.25.12", + "@esbuild/linux-mips64el": "0.25.12", + "@esbuild/linux-ppc64": "0.25.12", + "@esbuild/linux-riscv64": "0.25.12", + "@esbuild/linux-s390x": "0.25.12", + "@esbuild/linux-x64": "0.25.12", + "@esbuild/netbsd-arm64": "0.25.12", + "@esbuild/netbsd-x64": "0.25.12", + "@esbuild/openbsd-arm64": "0.25.12", + "@esbuild/openbsd-x64": "0.25.12", + "@esbuild/openharmony-arm64": "0.25.12", + "@esbuild/sunos-x64": "0.25.12", + "@esbuild/win32-arm64": "0.25.12", + "@esbuild/win32-ia32": "0.25.12", + "@esbuild/win32-x64": "0.25.12" + } + }, + "node_modules/vitest": { + "version": "2.1.9", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/expect": "2.1.9", + "@vitest/mocker": "2.1.9", + "@vitest/pretty-format": "^2.1.9", + "@vitest/runner": "2.1.9", + "@vitest/snapshot": "2.1.9", + "@vitest/spy": "2.1.9", + "@vitest/utils": "2.1.9", + "chai": "^5.1.2", + "debug": "^4.3.7", + "expect-type": "^1.1.0", + "magic-string": "^0.30.12", + "pathe": "^1.1.2", + "std-env": "^3.8.0", + "tinybench": "^2.9.0", + "tinyexec": "^0.3.1", + "tinypool": "^1.0.1", + "tinyrainbow": "^1.2.0", + "vite": "^5.0.0", + "vite-node": "2.1.9", + "why-is-node-running": "^2.3.0" + }, + "bin": { + "vitest": "vitest.mjs" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + }, + "peerDependencies": { + "@edge-runtime/vm": "*", + "@types/node": "^18.0.0 || >=20.0.0", + "@vitest/browser": "2.1.9", + "@vitest/ui": "2.1.9", + "happy-dom": "*", + "jsdom": "*" + }, + "peerDependenciesMeta": { + "@edge-runtime/vm": { + "optional": true + }, + "@types/node": { + "optional": true + }, + "@vitest/browser": { + "optional": true + }, + "@vitest/ui": { + "optional": true + }, + "happy-dom": { + "optional": true + }, + "jsdom": { + "optional": true + } + } + }, + "node_modules/vitest/node_modules/@esbuild/linux-x64": { + "version": "0.21.5", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/vitest/node_modules/@vitest/mocker": { + "version": "2.1.9", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/spy": "2.1.9", + "estree-walker": "^3.0.3", + "magic-string": "^0.30.12" + }, + "funding": { + "url": "https://opencollective.com/vitest" + }, + "peerDependencies": { + "msw": "^2.4.9", + "vite": "^5.0.0" + }, + "peerDependenciesMeta": { + "msw": { + "optional": true + }, + "vite": { + "optional": true + } + } + }, + "node_modules/vitest/node_modules/es-module-lexer": { + "version": "1.7.0", + "dev": true, + "license": "MIT" + }, + "node_modules/vitest/node_modules/esbuild": { + "version": "0.21.5", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.21.5", + "@esbuild/android-arm": "0.21.5", + "@esbuild/android-arm64": "0.21.5", + "@esbuild/android-x64": "0.21.5", + "@esbuild/darwin-arm64": "0.21.5", + "@esbuild/darwin-x64": "0.21.5", + "@esbuild/freebsd-arm64": "0.21.5", + "@esbuild/freebsd-x64": "0.21.5", + "@esbuild/linux-arm": "0.21.5", + "@esbuild/linux-arm64": "0.21.5", + "@esbuild/linux-ia32": "0.21.5", + "@esbuild/linux-loong64": "0.21.5", + "@esbuild/linux-mips64el": "0.21.5", + "@esbuild/linux-ppc64": "0.21.5", + "@esbuild/linux-riscv64": "0.21.5", + "@esbuild/linux-s390x": "0.21.5", + "@esbuild/linux-x64": "0.21.5", + "@esbuild/netbsd-x64": "0.21.5", + "@esbuild/openbsd-x64": "0.21.5", + "@esbuild/sunos-x64": "0.21.5", + "@esbuild/win32-arm64": "0.21.5", + "@esbuild/win32-ia32": "0.21.5", + "@esbuild/win32-x64": "0.21.5" + } + }, + "node_modules/vitest/node_modules/estree-walker": { + "version": "3.0.3", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0" + } + }, + "node_modules/vitest/node_modules/pathe": { + "version": "1.1.2", + "dev": true, + "license": "MIT" + }, + "node_modules/vitest/node_modules/std-env": { + "version": "3.10.0", + "dev": true, + "license": "MIT" + }, + "node_modules/vitest/node_modules/tinyexec": { + "version": "0.3.2", + "dev": true, + "license": "MIT" + }, + "node_modules/vitest/node_modules/vite": { + "version": "5.4.21", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "^0.21.3", + "postcss": "^8.4.43", + "rollup": "^4.20.0" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^18.0.0 || >=20.0.0", + "less": "*", + "lightningcss": "^1.21.0", + "sass": "*", + "sass-embedded": "*", + "stylus": "*", + "sugarss": "*", + "terser": "^5.4.0" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + } + } + }, + "node_modules/vitest/node_modules/vite-node": { + "version": "2.1.9", + "dev": true, + "license": "MIT", + "dependencies": { + "cac": "^6.7.14", + "debug": "^4.3.7", + "es-module-lexer": "^1.5.4", + "pathe": "^1.1.2", + "vite": "^5.0.0" + }, + "bin": { + "vite-node": "vite-node.mjs" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/vscode-uri": { + "version": "3.1.0", + "license": "MIT" + }, + "node_modules/vue": { + "version": "3.5.34", + "license": "MIT", + "dependencies": { + "@vue/compiler-dom": "3.5.34", + "@vue/compiler-sfc": "3.5.34", + "@vue/runtime-dom": "3.5.34", + "@vue/server-renderer": "3.5.34", + "@vue/shared": "3.5.34" + }, + "peerDependencies": { + "typescript": "*" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/vue-bundle-renderer": { + "version": "2.2.0", + "license": "MIT", + "dependencies": { + "ufo": "^1.6.1" + } + }, + "node_modules/vue-component-type-helpers": { + "version": "3.3.1", + "dev": true, + "license": "MIT" + }, + "node_modules/vue-devtools-stub": { + "version": "0.1.0", + "license": "MIT" + }, + "node_modules/vue-router": { + "version": "4.6.4", + "license": "MIT", + "dependencies": { + "@vue/devtools-api": "^6.6.4" + }, + "funding": { + "url": "https://github.com/sponsors/posva" + }, + "peerDependencies": { + "vue": "^3.5.0" + } + }, + "node_modules/vue-sonner": { + "version": "2.0.9", + "license": "MIT", + "peerDependencies": { + "@nuxt/kit": "^4.0.3", + "@nuxt/schema": "^4.0.3", + "nuxt": "^4.0.3" + }, + "peerDependenciesMeta": { + "@nuxt/kit": { + "optional": true + }, + "@nuxt/schema": { + "optional": true + }, + "nuxt": { + "optional": true + } + } + }, + "node_modules/vue-tsc": { + "version": "2.2.12", + "devOptional": true, + "license": "MIT", + "dependencies": { + "@volar/typescript": "2.4.15", + "@vue/language-core": "2.2.12" + }, + "bin": { + "vue-tsc": "bin/vue-tsc.js" + }, + "peerDependencies": { + "typescript": ">=5.0.0" + } + }, + "node_modules/w3c-keyname": { + "version": "2.2.8", + "license": "MIT" + }, + "node_modules/w3c-xmlserializer": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", + "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", + "license": "MIT", + "dependencies": { + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/webidl-conversions": { + "version": "7.0.0", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + } + }, + "node_modules/webpack-virtual-modules": { + "version": "0.6.2", + "license": "MIT" + }, + "node_modules/whatwg-mimetype": { + "version": "3.0.0", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + } + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/whatwg-url/node_modules/webidl-conversions": { + "version": "3.0.1", + "license": "BSD-2-Clause" + }, + "node_modules/which": { + "version": "2.0.2", + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/why-is-node-running": { + "version": "2.3.0", + "dev": true, + "license": "MIT", + "dependencies": { + "siginfo": "^2.0.0", + "stackback": "0.0.2" + }, + "bin": { + "why-is-node-running": "cli.js" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wordwrapjs": { + "version": "5.1.1", + "license": "MIT", + "engines": { + "node": ">=12.17" + } + }, + "node_modules/wrap-ansi": { + "version": "8.1.0", + "license": "MIT", + "dependencies": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs": { + "name": "wrap-ansi", + "version": "7.0.0", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/ansi-regex": { + "version": "5.0.1", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "license": "MIT" + }, + "node_modules/wrap-ansi-cjs/node_modules/string-width": { + "version": "4.2.3", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { + "version": "6.0.1", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi/node_modules/ansi-styles": { + "version": "6.2.3", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/ws": { + "version": "8.20.1", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/wsl-utils": { + "version": "0.3.1", + "license": "MIT", + "dependencies": { + "is-wsl": "^3.1.0", + "powershell-utils": "^0.1.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/xml-name-validator": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", + "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", + "license": "Apache-2.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", + "license": "MIT" + }, + "node_modules/y18n": { + "version": "5.0.8", + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yallist": { + "version": "3.1.1", + "license": "ISC" + }, + "node_modules/yaml": { + "version": "2.9.0", + "license": "ISC", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" + } + }, + "node_modules/yargs": { + "version": "18.0.0", + "license": "MIT", + "dependencies": { + "cliui": "^9.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "string-width": "^7.2.0", + "y18n": "^5.0.5", + "yargs-parser": "^22.0.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=23" + } + }, + "node_modules/yargs-parser": { + "version": "22.0.0", + "license": "ISC", + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=23" + } + }, + "node_modules/yargs/node_modules/emoji-regex": { + "version": "10.6.0", + "license": "MIT" + }, + "node_modules/yargs/node_modules/string-width": { + "version": "7.2.0", + "license": "MIT", + "dependencies": { + "emoji-regex": "^10.3.0", + "get-east-asian-width": "^1.0.0", + "strip-ansi": "^7.1.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/youch": { + "version": "4.1.1", + "license": "MIT", + "dependencies": { + "@poppinss/colors": "^4.1.6", + "@poppinss/dumper": "^0.7.0", + "@speed-highlight/core": "^1.2.14", + "cookie-es": "^3.0.1", + "youch-core": "^0.3.3" + } + }, + "node_modules/youch-core": { + "version": "0.3.3", + "license": "MIT", + "dependencies": { + "@poppinss/exception": "^1.2.2", + "error-stack-parser-es": "^1.0.5" + } + }, + "node_modules/zip-stream": { + "version": "6.0.1", + "license": "MIT", + "dependencies": { + "archiver-utils": "^5.0.0", + "compress-commons": "^6.0.2", + "readable-stream": "^4.0.0" + }, + "engines": { + "node": ">= 14" + } + } + } +} diff --git a/src/dev-ui/package.json b/src/dev-ui/package.json index a13b33447..f6dd86772 100644 --- a/src/dev-ui/package.json +++ b/src/dev-ui/package.json @@ -12,7 +12,6 @@ "test:watch": "vitest" }, "dependencies": { - "@cosmograph/cosmograph": "2.0.1", "@codemirror/autocomplete": "^6.20.0", "@codemirror/commands": "^6.10.2", "@codemirror/lang-json": "^6.0.2", @@ -21,6 +20,7 @@ "@codemirror/search": "^6.6.0", "@codemirror/state": "^6.5.4", "@codemirror/view": "^6.39.14", + "@cosmograph/cosmograph": "2.0.1", "@lezer/common": "^1.5.1", "@lezer/highlight": "^1.2.3", "@tailwindcss/vite": "^4.1.18", @@ -33,7 +33,9 @@ "cytoscape": "^3.33.1", "cytoscape-cise": "^2.0.1", "cytoscape-fcose": "^2.2.0", + "isomorphic-dompurify": "^2.36.0", "lucide-vue-next": "^0.563.0", + "marked": "^15.0.12", "nuxt": "^4.3.1", "oidc-client-ts": "^3.4.1", "reka-ui": "^2.8.0", From 10584044317806777add05cc942862d2a340beac Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 15:02:25 -0400 Subject: [PATCH 093/153] fix(dev-ui): sync pnpm lockfile for markdown chat dependencies Docker dev installs from pnpm-lock.yaml; add marked and isomorphic-dompurify there and drop the npm package-lock.json added by mistake. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/dev-ui/package-lock.json | 9739 ---------------------------------- src/dev-ui/pnpm-lock.yaml | 361 +- 2 files changed, 359 insertions(+), 9741 deletions(-) delete mode 100644 src/dev-ui/package-lock.json diff --git a/src/dev-ui/package-lock.json b/src/dev-ui/package-lock.json deleted file mode 100644 index 0f4dffae0..000000000 --- a/src/dev-ui/package-lock.json +++ /dev/null @@ -1,9739 +0,0 @@ -{ - "name": "ui", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "ui", - "hasInstallScript": true, - "dependencies": { - "@codemirror/autocomplete": "^6.20.0", - "@codemirror/commands": "^6.10.2", - "@codemirror/lang-json": "^6.0.2", - "@codemirror/language": "^6.12.1", - "@codemirror/lint": "^6.9.4", - "@codemirror/search": "^6.6.0", - "@codemirror/state": "^6.5.4", - "@codemirror/view": "^6.39.14", - "@cosmograph/cosmograph": "2.0.1", - "@lezer/common": "^1.5.1", - "@lezer/highlight": "^1.2.3", - "@tailwindcss/vite": "^4.1.18", - "@tanstack/vue-table": "^8.21.3", - "@tanstack/vue-virtual": "^3.13.18", - "@vueuse/core": "^14.2.1", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "codemirror": "^6.0.2", - "cytoscape": "^3.33.1", - "cytoscape-cise": "^2.0.1", - "cytoscape-fcose": "^2.2.0", - "isomorphic-dompurify": "^2.36.0", - "lucide-vue-next": "^0.563.0", - "marked": "^15.0.12", - "nuxt": "^4.3.1", - "oidc-client-ts": "^3.4.1", - "reka-ui": "^2.8.0", - "tailwind-merge": "^3.4.0", - "tailwindcss": "^4.1.18", - "vue": "^3.5.28", - "vue-router": "^4.6.4", - "vue-sonner": "^2.0.9" - }, - "devDependencies": { - "@types/cytoscape": "^3.31.0", - "@vitejs/plugin-vue": "^5.2.1", - "@vue/test-utils": "^2.4.6", - "happy-dom": "^15.11.7", - "tw-animate-css": "^1.4.0", - "typescript": "^5.8.3", - "vitest": "^2.1.9", - "vue-tsc": "^2.2.10" - } - }, - "node_modules/@acemir/cssom": { - "version": "0.9.31", - "resolved": "https://registry.npmjs.org/@acemir/cssom/-/cssom-0.9.31.tgz", - "integrity": "sha512-ZnR3GSaH+/vJ0YlHau21FjfLYjMpYVIzTD8M8vIEQvIGxeOXyXdzCI140rrCY862p/C/BbzWsjc1dgnM9mkoTA==", - "license": "MIT" - }, - "node_modules/@asamuzakjp/css-color": { - "version": "5.1.11", - "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-5.1.11.tgz", - "integrity": "sha512-KVw6qIiCTUQhByfTd78h2yD1/00waTmm9uy/R7Ck/ctUyAPj+AEDLkQIdJW0T8+qGgj3j5bpNKK7Q3G+LedJWg==", - "license": "MIT", - "dependencies": { - "@asamuzakjp/generational-cache": "^1.0.1", - "@csstools/css-calc": "^3.2.0", - "@csstools/css-color-parser": "^4.1.0", - "@csstools/css-parser-algorithms": "^4.0.0", - "@csstools/css-tokenizer": "^4.0.0" - }, - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=24.0.0" - } - }, - "node_modules/@asamuzakjp/dom-selector": { - "version": "6.8.1", - "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-6.8.1.tgz", - "integrity": "sha512-MvRz1nCqW0fsy8Qz4dnLIvhOlMzqDVBabZx6lH+YywFDdjXhMY37SmpV1XFX3JzG5GWHn63j6HX6QPr3lZXHvQ==", - "license": "MIT", - "dependencies": { - "@asamuzakjp/nwsapi": "^2.3.9", - "bidi-js": "^1.0.3", - "css-tree": "^3.1.0", - "is-potential-custom-element-name": "^1.0.1", - "lru-cache": "^11.2.6" - } - }, - "node_modules/@asamuzakjp/dom-selector/node_modules/lru-cache": { - "version": "11.5.1", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.5.1.tgz", - "integrity": "sha512-RPimw/7aMdv2oqRrxKwvZXcPfwBrn/JZ2xYcY9Hus/6LaS3VOAKVWKWgNLCFSiOm1ESXinjsDlidVU7JlnCN2A==", - "license": "BlueOak-1.0.0", - "engines": { - "node": "20 || >=22" - } - }, - "node_modules/@asamuzakjp/generational-cache": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@asamuzakjp/generational-cache/-/generational-cache-1.0.1.tgz", - "integrity": "sha512-wajfB8KqzMCN2KGNFdLkReeHncd0AslUSrvHVvvYWuU8ghncRJoA50kT3zP9MVL0+9g4/67H+cdvBskj9THPzg==", - "license": "MIT", - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=24.0.0" - } - }, - "node_modules/@asamuzakjp/nwsapi": { - "version": "2.3.9", - "resolved": "https://registry.npmjs.org/@asamuzakjp/nwsapi/-/nwsapi-2.3.9.tgz", - "integrity": "sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==", - "license": "MIT" - }, - "node_modules/@babel/code-frame": { - "version": "7.29.0", - "license": "MIT", - "dependencies": { - "@babel/helper-validator-identifier": "^7.28.5", - "js-tokens": "^4.0.0", - "picocolors": "^1.1.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/compat-data": { - "version": "7.29.3", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/core": { - "version": "7.29.0", - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.29.0", - "@babel/generator": "^7.29.0", - "@babel/helper-compilation-targets": "^7.28.6", - "@babel/helper-module-transforms": "^7.28.6", - "@babel/helpers": "^7.28.6", - "@babel/parser": "^7.29.0", - "@babel/template": "^7.28.6", - "@babel/traverse": "^7.29.0", - "@babel/types": "^7.29.0", - "@jridgewell/remapping": "^2.3.5", - "convert-source-map": "^2.0.0", - "debug": "^4.1.0", - "gensync": "^1.0.0-beta.2", - "json5": "^2.2.3", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/babel" - } - }, - "node_modules/@babel/core/node_modules/semver": { - "version": "6.3.1", - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/generator": { - "version": "7.29.1", - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.29.0", - "@babel/types": "^7.29.0", - "@jridgewell/gen-mapping": "^0.3.12", - "@jridgewell/trace-mapping": "^0.3.28", - "jsesc": "^3.0.2" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-annotate-as-pure": { - "version": "7.27.3", - "license": "MIT", - "dependencies": { - "@babel/types": "^7.27.3" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-compilation-targets": { - "version": "7.28.6", - "license": "MIT", - "dependencies": { - "@babel/compat-data": "^7.28.6", - "@babel/helper-validator-option": "^7.27.1", - "browserslist": "^4.24.0", - "lru-cache": "^5.1.1", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-compilation-targets/node_modules/semver": { - "version": "6.3.1", - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/helper-create-class-features-plugin": { - "version": "7.29.3", - "license": "MIT", - "dependencies": { - "@babel/helper-annotate-as-pure": "^7.27.3", - "@babel/helper-member-expression-to-functions": "^7.28.5", - "@babel/helper-optimise-call-expression": "^7.27.1", - "@babel/helper-replace-supers": "^7.28.6", - "@babel/helper-skip-transparent-expression-wrappers": "^7.27.1", - "@babel/traverse": "^7.29.0", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0" - } - }, - "node_modules/@babel/helper-create-class-features-plugin/node_modules/semver": { - "version": "6.3.1", - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/@babel/helper-globals": { - "version": "7.28.0", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-member-expression-to-functions": { - "version": "7.28.5", - "license": "MIT", - "dependencies": { - "@babel/traverse": "^7.28.5", - "@babel/types": "^7.28.5" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-imports": { - "version": "7.28.6", - "license": "MIT", - "dependencies": { - "@babel/traverse": "^7.28.6", - "@babel/types": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-transforms": { - "version": "7.28.6", - "license": "MIT", - "dependencies": { - "@babel/helper-module-imports": "^7.28.6", - "@babel/helper-validator-identifier": "^7.28.5", - "@babel/traverse": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0" - } - }, - "node_modules/@babel/helper-optimise-call-expression": { - "version": "7.27.1", - "license": "MIT", - "dependencies": { - "@babel/types": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-plugin-utils": { - "version": "7.28.6", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-replace-supers": { - "version": "7.28.6", - "license": "MIT", - "dependencies": { - "@babel/helper-member-expression-to-functions": "^7.28.5", - "@babel/helper-optimise-call-expression": "^7.27.1", - "@babel/traverse": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0" - } - }, - "node_modules/@babel/helper-skip-transparent-expression-wrappers": { - "version": "7.27.1", - "license": "MIT", - "dependencies": { - "@babel/traverse": "^7.27.1", - "@babel/types": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-string-parser": { - "version": "7.27.1", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-identifier": { - "version": "7.28.5", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-option": { - "version": "7.27.1", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helpers": { - "version": "7.29.2", - "license": "MIT", - "dependencies": { - "@babel/template": "^7.28.6", - "@babel/types": "^7.29.0" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/parser": { - "version": "7.29.3", - "license": "MIT", - "dependencies": { - "@babel/types": "^7.29.0" - }, - "bin": { - "parser": "bin/babel-parser.js" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@babel/plugin-syntax-jsx": { - "version": "7.28.6", - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-syntax-typescript": { - "version": "7.28.6", - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-transform-typescript": { - "version": "7.28.6", - "license": "MIT", - "dependencies": { - "@babel/helper-annotate-as-pure": "^7.27.3", - "@babel/helper-create-class-features-plugin": "^7.28.6", - "@babel/helper-plugin-utils": "^7.28.6", - "@babel/helper-skip-transparent-expression-wrappers": "^7.27.1", - "@babel/plugin-syntax-typescript": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/template": { - "version": "7.28.6", - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.28.6", - "@babel/parser": "^7.28.6", - "@babel/types": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/traverse": { - "version": "7.29.0", - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.29.0", - "@babel/generator": "^7.29.0", - "@babel/helper-globals": "^7.28.0", - "@babel/parser": "^7.29.0", - "@babel/template": "^7.28.6", - "@babel/types": "^7.29.0", - "debug": "^4.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/types": { - "version": "7.29.0", - "license": "MIT", - "dependencies": { - "@babel/helper-string-parser": "^7.27.1", - "@babel/helper-validator-identifier": "^7.28.5" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@bramus/specificity": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/@bramus/specificity/-/specificity-2.4.2.tgz", - "integrity": "sha512-ctxtJ/eA+t+6q2++vj5j7FYX3nRu311q1wfYH3xjlLOsczhlhxAg2FWNUXhpGvAw3BWo1xBcvOV6/YLc2r5FJw==", - "license": "MIT", - "dependencies": { - "css-tree": "^3.0.0" - }, - "bin": { - "specificity": "bin/cli.js" - } - }, - "node_modules/@clack/core": { - "version": "1.3.1", - "license": "MIT", - "dependencies": { - "fast-wrap-ansi": "^0.2.0", - "sisteransi": "^1.0.5" - }, - "engines": { - "node": ">= 20.12.0" - } - }, - "node_modules/@clack/prompts": { - "version": "1.4.0", - "license": "MIT", - "dependencies": { - "@clack/core": "1.3.1", - "fast-string-width": "^3.0.2", - "fast-wrap-ansi": "^0.2.0", - "sisteransi": "^1.0.5" - }, - "engines": { - "node": ">= 20.12.0" - } - }, - "node_modules/@cloudflare/kv-asset-handler": { - "version": "0.4.2", - "license": "MIT OR Apache-2.0", - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@codemirror/autocomplete": { - "version": "6.20.2", - "license": "MIT", - "dependencies": { - "@codemirror/language": "^6.0.0", - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.17.0", - "@lezer/common": "^1.0.0" - } - }, - "node_modules/@codemirror/commands": { - "version": "6.10.3", - "license": "MIT", - "dependencies": { - "@codemirror/language": "^6.0.0", - "@codemirror/state": "^6.6.0", - "@codemirror/view": "^6.27.0", - "@lezer/common": "^1.1.0" - } - }, - "node_modules/@codemirror/lang-json": { - "version": "6.0.2", - "license": "MIT", - "dependencies": { - "@codemirror/language": "^6.0.0", - "@lezer/json": "^1.0.0" - } - }, - "node_modules/@codemirror/language": { - "version": "6.12.3", - "license": "MIT", - "dependencies": { - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.23.0", - "@lezer/common": "^1.5.0", - "@lezer/highlight": "^1.0.0", - "@lezer/lr": "^1.0.0", - "style-mod": "^4.0.0" - } - }, - "node_modules/@codemirror/lint": { - "version": "6.9.6", - "license": "MIT", - "dependencies": { - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.42.0", - "crelt": "^1.0.5" - } - }, - "node_modules/@codemirror/search": { - "version": "6.7.0", - "license": "MIT", - "dependencies": { - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.37.0", - "crelt": "^1.0.5" - } - }, - "node_modules/@codemirror/state": { - "version": "6.6.0", - "license": "MIT", - "dependencies": { - "@marijn/find-cluster-break": "^1.0.0" - } - }, - "node_modules/@codemirror/view": { - "version": "6.43.0", - "license": "MIT", - "dependencies": { - "@codemirror/state": "^6.6.0", - "crelt": "^1.0.6", - "style-mod": "^4.1.0", - "w3c-keyname": "^2.2.4" - } - }, - "node_modules/@colordx/core": { - "version": "5.4.3", - "license": "MIT" - }, - "node_modules/@cosmograph/cosmograph": { - "version": "2.0.1", - "license": "CC-BY-NC-4.0", - "dependencies": { - "@cosmograph/ui": "2.0.1", - "@cosmos.gl/graph": "^2.6.2-rc.0", - "@duckdb/duckdb-wasm": "1.29.1-dev260.0", - "@interacta/css-labels": "^0.1.3-beta.1", - "@uwdata/mosaic-core": "^0.21.1", - "@uwdata/mosaic-plot": "^0.21.1", - "@uwdata/mosaic-sql": "^0.21.1", - "@uwdata/vgplot": "^0.21.1", - "apache-arrow": "17.0.0", - "d3-array": "^3.2.4", - "d3-brush": "^3.0.0", - "d3-color": "^3.1.0", - "d3-interpolate": "^3.0.1", - "d3-scale": "^4.0.2", - "d3-selection": "^3.0.0", - "dompurify": "^3.2.6" - } - }, - "node_modules/@cosmograph/ui": { - "version": "2.0.1", - "license": "CC-BY-NC-4.0", - "dependencies": { - "@juggle/resize-observer": "^3.4.0", - "d3-array": "^3.2.4", - "d3-axis": "^3.0.0", - "d3-brush": "^3.0.0", - "d3-format": "^3.1.0", - "d3-scale": "^4.0.2", - "d3-selection": "^3.0.0", - "d3-time-format": "^4.1.0", - "d3-transition": "^3.0.1" - } - }, - "node_modules/@cosmos.gl/graph": { - "version": "2.6.4", - "license": "MIT", - "dependencies": { - "d3-array": "^3.2.0", - "d3-color": "^3.1.0", - "d3-drag": "^3.0.0", - "d3-ease": "^3.0.1", - "d3-scale": "^4.0.2", - "d3-selection": "^3.0.0", - "d3-transition": "^3.0.1", - "d3-zoom": "^3.0.0", - "dompurify": "^3.2.6", - "gl-bench": "^1.0.42", - "gl-matrix": "^3.4.3", - "random": "^4.1.0", - "regl": "^2.1.0" - }, - "engines": { - "node": ">=12.2.0", - "npm": ">=7.0.0" - } - }, - "node_modules/@csstools/color-helpers": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-6.0.2.tgz", - "integrity": "sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT-0", - "engines": { - "node": ">=20.19.0" - } - }, - "node_modules/@csstools/css-calc": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-3.2.1.tgz", - "integrity": "sha512-DtdHlgXh5ZkA43cwBcAm+huzgJiwx3ZTWVjBs94kwz2xKqSimDA3lBgCjphYgwgVUMWatSM0pDd8TILB1yrVVg==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT", - "engines": { - "node": ">=20.19.0" - }, - "peerDependencies": { - "@csstools/css-parser-algorithms": "^4.0.0", - "@csstools/css-tokenizer": "^4.0.0" - } - }, - "node_modules/@csstools/css-color-parser": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-4.1.1.tgz", - "integrity": "sha512-eZ5XOtyhK+mggRafYUWzA0tvaYOFgdY8AkgQiCJF9qNAePnUo/zmsqqYubBBb3sQ8uNUaSKTY9s9klfRaAXL0g==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT", - "dependencies": { - "@csstools/color-helpers": "^6.0.2", - "@csstools/css-calc": "^3.2.1" - }, - "engines": { - "node": ">=20.19.0" - }, - "peerDependencies": { - "@csstools/css-parser-algorithms": "^4.0.0", - "@csstools/css-tokenizer": "^4.0.0" - } - }, - "node_modules/@csstools/css-parser-algorithms": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-4.0.0.tgz", - "integrity": "sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT", - "engines": { - "node": ">=20.19.0" - }, - "peerDependencies": { - "@csstools/css-tokenizer": "^4.0.0" - } - }, - "node_modules/@csstools/css-syntax-patches-for-csstree": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.1.4.tgz", - "integrity": "sha512-wgsqt92b7C7tQhIdPNxj0n9zuUbQlvAuI1exyzeNrOKOi62SD7ren8zqszmpVREjAOqg8cD2FqYhQfAuKjk4sw==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT-0", - "peerDependencies": { - "css-tree": "^3.2.1" - }, - "peerDependenciesMeta": { - "css-tree": { - "optional": true - } - } - }, - "node_modules/@csstools/css-tokenizer": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-4.0.0.tgz", - "integrity": "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/csstools" - }, - { - "type": "opencollective", - "url": "https://opencollective.com/csstools" - } - ], - "license": "MIT", - "engines": { - "node": ">=20.19.0" - } - }, - "node_modules/@duckdb/duckdb-wasm": { - "version": "1.29.1-dev260.0", - "license": "MIT", - "dependencies": { - "apache-arrow": "^17.0.0" - } - }, - "node_modules/@dxup/nuxt": { - "version": "0.4.1", - "license": "MIT", - "dependencies": { - "@dxup/unimport": "^0.1.2", - "@nuxt/kit": "^4.4.2", - "chokidar": "^5.0.0", - "pathe": "^2.0.3", - "tinyglobby": "^0.2.16" - }, - "peerDependencies": { - "typescript": "*" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/@dxup/unimport": { - "version": "0.1.2", - "license": "MIT" - }, - "node_modules/@esbuild/linux-x64": { - "version": "0.28.0", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@exodus/bytes": { - "version": "1.15.1", - "resolved": "https://registry.npmjs.org/@exodus/bytes/-/bytes-1.15.1.tgz", - "integrity": "sha512-S6mL0yNB/Abt9Ei4tq8gDhcczc4S3+vQ4ra7vxnAf+YHC02srtqxKKZghx2Dq6p0e66THKwR6r8N6P95wEty7Q==", - "license": "MIT", - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=24.0.0" - }, - "peerDependencies": { - "@noble/hashes": "^1.8.0 || ^2.0.0" - }, - "peerDependenciesMeta": { - "@noble/hashes": { - "optional": true - } - } - }, - "node_modules/@floating-ui/core": { - "version": "1.7.5", - "license": "MIT", - "dependencies": { - "@floating-ui/utils": "^0.2.11" - } - }, - "node_modules/@floating-ui/dom": { - "version": "1.7.6", - "license": "MIT", - "dependencies": { - "@floating-ui/core": "^1.7.5", - "@floating-ui/utils": "^0.2.11" - } - }, - "node_modules/@floating-ui/utils": { - "version": "0.2.11", - "license": "MIT" - }, - "node_modules/@floating-ui/vue": { - "version": "1.1.11", - "license": "MIT", - "dependencies": { - "@floating-ui/dom": "^1.7.6", - "@floating-ui/utils": "^0.2.11", - "vue-demi": ">=0.13.0" - } - }, - "node_modules/@floating-ui/vue/node_modules/vue-demi": { - "version": "0.14.10", - "hasInstallScript": true, - "license": "MIT", - "bin": { - "vue-demi-fix": "bin/vue-demi-fix.js", - "vue-demi-switch": "bin/vue-demi-switch.js" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - }, - "peerDependencies": { - "@vue/composition-api": "^1.0.0-rc.1", - "vue": "^3.0.0-0 || ^2.6.0" - }, - "peerDependenciesMeta": { - "@vue/composition-api": { - "optional": true - } - } - }, - "node_modules/@interacta/css-labels": { - "version": "0.1.3-beta.2", - "license": "MIT" - }, - "node_modules/@internationalized/date": { - "version": "3.12.1", - "license": "Apache-2.0", - "dependencies": { - "@swc/helpers": "^0.5.0" - } - }, - "node_modules/@internationalized/number": { - "version": "3.6.6", - "license": "Apache-2.0", - "dependencies": { - "@swc/helpers": "^0.5.0" - } - }, - "node_modules/@ioredis/commands": { - "version": "1.5.1", - "license": "MIT" - }, - "node_modules/@isaacs/cliui": { - "version": "8.0.2", - "license": "ISC", - "dependencies": { - "string-width": "^5.1.2", - "string-width-cjs": "npm:string-width@^4.2.0", - "strip-ansi": "^7.0.1", - "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", - "wrap-ansi": "^8.1.0", - "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@isaacs/fs-minipass": { - "version": "4.0.1", - "license": "ISC", - "dependencies": { - "minipass": "^7.0.4" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.13", - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0", - "@jridgewell/trace-mapping": "^0.3.24" - } - }, - "node_modules/@jridgewell/remapping": { - "version": "2.3.5", - "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.5", - "@jridgewell/trace-mapping": "^0.3.24" - } - }, - "node_modules/@jridgewell/resolve-uri": { - "version": "3.1.2", - "license": "MIT", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@jridgewell/source-map": { - "version": "0.3.11", - "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.5", - "@jridgewell/trace-mapping": "^0.3.25" - } - }, - "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.5", - "license": "MIT" - }, - "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.31", - "license": "MIT", - "dependencies": { - "@jridgewell/resolve-uri": "^3.1.0", - "@jridgewell/sourcemap-codec": "^1.4.14" - } - }, - "node_modules/@juggle/resize-observer": { - "version": "3.4.0", - "license": "Apache-2.0" - }, - "node_modules/@kwsites/file-exists": { - "version": "1.1.1", - "license": "MIT", - "dependencies": { - "debug": "^4.1.1" - } - }, - "node_modules/@kwsites/promise-deferred": { - "version": "1.1.1", - "license": "MIT" - }, - "node_modules/@lezer/common": { - "version": "1.5.2", - "license": "MIT" - }, - "node_modules/@lezer/highlight": { - "version": "1.2.3", - "license": "MIT", - "dependencies": { - "@lezer/common": "^1.3.0" - } - }, - "node_modules/@lezer/json": { - "version": "1.0.3", - "license": "MIT", - "dependencies": { - "@lezer/common": "^1.2.0", - "@lezer/highlight": "^1.0.0", - "@lezer/lr": "^1.0.0" - } - }, - "node_modules/@lezer/lr": { - "version": "1.4.10", - "license": "MIT", - "dependencies": { - "@lezer/common": "^1.0.0" - } - }, - "node_modules/@mapbox/node-pre-gyp": { - "version": "2.0.3", - "license": "BSD-3-Clause", - "dependencies": { - "consola": "^3.2.3", - "detect-libc": "^2.0.0", - "https-proxy-agent": "^7.0.5", - "node-fetch": "^2.6.7", - "nopt": "^8.0.0", - "semver": "^7.5.3", - "tar": "^7.4.0" - }, - "bin": { - "node-pre-gyp": "bin/node-pre-gyp" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/@mapbox/node-pre-gyp/node_modules/abbrev": { - "version": "3.0.1", - "license": "ISC", - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/@mapbox/node-pre-gyp/node_modules/nopt": { - "version": "8.1.0", - "license": "ISC", - "dependencies": { - "abbrev": "^3.0.0" - }, - "bin": { - "nopt": "bin/nopt.js" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/@marijn/find-cluster-break": { - "version": "1.0.2", - "license": "MIT" - }, - "node_modules/@nodelib/fs.scandir": { - "version": "2.1.5", - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.stat": { - "version": "2.0.5", - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nodelib/fs.walk": { - "version": "1.2.8", - "license": "MIT", - "dependencies": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/@nuxt/cli": { - "version": "3.35.2", - "license": "MIT", - "dependencies": { - "@bomb.sh/tab": "^0.0.15", - "@clack/prompts": "^1.3.0", - "c12": "^3.3.4", - "citty": "^0.2.2", - "confbox": "^0.2.4", - "consola": "^3.4.2", - "debug": "^4.4.3", - "defu": "^6.1.7", - "exsolve": "^1.0.8", - "fuse.js": "^7.3.0", - "fzf": "^0.5.2", - "giget": "^3.2.0", - "jiti": "^2.7.0", - "listhen": "^1.10.0", - "nypm": "^0.6.6", - "ofetch": "^1.5.1", - "ohash": "^2.0.11", - "pathe": "^2.0.3", - "perfect-debounce": "^2.1.0", - "pkg-types": "^2.3.1", - "scule": "^1.3.0", - "semver": "^7.8.0", - "srvx": "^0.11.15", - "std-env": "^4.1.0", - "tinyclip": "^0.1.12", - "tinyexec": "^1.1.2", - "ufo": "^1.6.4", - "youch": "^4.1.1" - }, - "bin": { - "nuxi": "bin/nuxi.mjs", - "nuxi-ng": "bin/nuxi.mjs", - "nuxt": "bin/nuxi.mjs", - "nuxt-cli": "bin/nuxi.mjs" - }, - "engines": { - "node": "^16.14.0 || >=18.0.0" - }, - "peerDependencies": { - "@nuxt/schema": "^4.4.5" - }, - "peerDependenciesMeta": { - "@nuxt/schema": { - "optional": true - } - } - }, - "node_modules/@nuxt/cli/node_modules/@bomb.sh/tab": { - "version": "0.0.15", - "license": "MIT", - "bin": { - "tab": "dist/bin/cli.mjs" - }, - "peerDependencies": { - "cac": "^6.7.14", - "citty": "^0.1.6 || ^0.2.0", - "commander": "^13.1.0" - }, - "peerDependenciesMeta": { - "cac": { - "optional": true - }, - "citty": { - "optional": true - }, - "commander": { - "optional": true - } - } - }, - "node_modules/@nuxt/cli/node_modules/commander": { - "version": "13.1.0", - "license": "MIT", - "optional": true, - "peer": true, - "engines": { - "node": ">=18" - } - }, - "node_modules/@nuxt/devalue": { - "version": "2.0.2", - "license": "MIT" - }, - "node_modules/@nuxt/devtools": { - "version": "3.2.4", - "license": "MIT", - "dependencies": { - "@nuxt/devtools-kit": "3.2.4", - "@nuxt/devtools-wizard": "3.2.4", - "@nuxt/kit": "^4.4.2", - "@vue/devtools-core": "^8.1.0", - "@vue/devtools-kit": "^8.1.0", - "birpc": "^4.0.0", - "consola": "^3.4.2", - "destr": "^2.0.5", - "error-stack-parser-es": "^1.0.5", - "execa": "^8.0.1", - "fast-npm-meta": "^1.4.2", - "get-port-please": "^3.2.0", - "hookable": "^6.1.0", - "image-meta": "^0.2.2", - "is-installed-globally": "^1.0.0", - "launch-editor": "^2.13.1", - "local-pkg": "^1.1.2", - "magicast": "^0.5.2", - "nypm": "^0.6.5", - "ohash": "^2.0.11", - "pathe": "^2.0.3", - "perfect-debounce": "^2.1.0", - "pkg-types": "^2.3.0", - "semver": "^7.7.4", - "simple-git": "^3.33.0", - "sirv": "^3.0.2", - "structured-clone-es": "^2.0.0", - "tinyglobby": "^0.2.15", - "vite-plugin-inspect": "^11.3.3", - "vite-plugin-vue-tracer": "^1.3.0", - "which": "^6.0.1", - "ws": "^8.19.0" - }, - "bin": { - "devtools": "cli.mjs" - }, - "peerDependencies": { - "@vitejs/devtools": "*", - "vite": ">=6.0" - }, - "peerDependenciesMeta": { - "@vitejs/devtools": { - "optional": true - } - } - }, - "node_modules/@nuxt/devtools-kit": { - "version": "3.2.4", - "license": "MIT", - "dependencies": { - "@nuxt/kit": "^4.4.2", - "execa": "^8.0.1" - }, - "peerDependencies": { - "vite": ">=6.0" - } - }, - "node_modules/@nuxt/devtools-wizard": { - "version": "3.2.4", - "license": "MIT", - "dependencies": { - "@clack/prompts": "^1.1.0", - "consola": "^3.4.2", - "diff": "^8.0.3", - "execa": "^8.0.1", - "magicast": "^0.5.2", - "pathe": "^2.0.3", - "pkg-types": "^2.3.0", - "semver": "^7.7.4" - }, - "bin": { - "devtools-wizard": "cli.mjs" - } - }, - "node_modules/@nuxt/devtools/node_modules/isexe": { - "version": "4.0.0", - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=20" - } - }, - "node_modules/@nuxt/devtools/node_modules/which": { - "version": "6.0.1", - "license": "ISC", - "dependencies": { - "isexe": "^4.0.0" - }, - "bin": { - "node-which": "bin/which.js" - }, - "engines": { - "node": "^20.17.0 || >=22.9.0" - } - }, - "node_modules/@nuxt/kit": { - "version": "4.4.6", - "license": "MIT", - "dependencies": { - "c12": "^3.3.4", - "consola": "^3.4.2", - "defu": "^6.1.7", - "destr": "^2.0.5", - "errx": "^0.1.0", - "exsolve": "^1.0.8", - "ignore": "^7.0.5", - "jiti": "^2.7.0", - "klona": "^2.0.6", - "mlly": "^1.8.2", - "ohash": "^2.0.11", - "pathe": "^2.0.3", - "pkg-types": "^2.3.1", - "rc9": "^3.0.1", - "scule": "^1.3.0", - "semver": "^7.8.0", - "tinyglobby": "^0.2.16", - "ufo": "^1.6.4", - "unctx": "^2.5.0", - "untyped": "^2.0.0" - }, - "engines": { - "node": ">=18.12.0" - } - }, - "node_modules/@nuxt/nitro-server": { - "version": "4.4.6", - "license": "MIT", - "dependencies": { - "@nuxt/devalue": "^2.0.2", - "@nuxt/kit": "4.4.6", - "@unhead/vue": "^2.1.15", - "@vue/shared": "^3.5.34", - "consola": "^3.4.2", - "defu": "^6.1.7", - "destr": "^2.0.5", - "devalue": "^5.8.1", - "errx": "^0.1.0", - "escape-string-regexp": "^5.0.0", - "exsolve": "^1.0.8", - "h3": "^1.15.11", - "impound": "^1.1.5", - "klona": "^2.0.6", - "mocked-exports": "^0.1.1", - "nitropack": "^2.13.4", - "nypm": "^0.6.6", - "ohash": "^2.0.11", - "pathe": "^2.0.3", - "rou3": "^0.8.1", - "std-env": "^4.1.0", - "ufo": "^1.6.4", - "unctx": "^2.5.0", - "unstorage": "^1.17.5", - "vue": "^3.5.34", - "vue-bundle-renderer": "^2.2.0", - "vue-devtools-stub": "^0.1.0" - }, - "engines": { - "node": "^22.12.0 || ^24.11.0 || >=26.0.0" - }, - "peerDependencies": { - "@babel/plugin-proposal-decorators": "^7.25.0", - "@babel/plugin-syntax-typescript": "^7.25.0", - "@rollup/plugin-babel": "^6.0.0 || ^7.0.0", - "nuxt": "^4.4.6" - }, - "peerDependenciesMeta": { - "@babel/plugin-proposal-decorators": { - "optional": true - }, - "@babel/plugin-syntax-typescript": { - "optional": true - }, - "@rollup/plugin-babel": { - "optional": true - } - } - }, - "node_modules/@nuxt/schema": { - "version": "4.4.6", - "license": "MIT", - "dependencies": { - "@vue/shared": "^3.5.34", - "defu": "^6.1.7", - "pathe": "^2.0.3", - "pkg-types": "^2.3.1", - "std-env": "^4.1.0" - }, - "engines": { - "node": "^14.18.0 || >=16.10.0" - } - }, - "node_modules/@nuxt/telemetry": { - "version": "2.8.0", - "license": "MIT", - "dependencies": { - "citty": "^0.2.1", - "consola": "^3.4.2", - "ofetch": "^2.0.0-alpha.3", - "rc9": "^3.0.0", - "std-env": "^4.0.0" - }, - "bin": { - "nuxt-telemetry": "bin/nuxt-telemetry.mjs" - }, - "engines": { - "node": ">=18.12.0" - }, - "peerDependencies": { - "@nuxt/kit": ">=3.0.0" - } - }, - "node_modules/@nuxt/telemetry/node_modules/ofetch": { - "version": "2.0.0-alpha.3", - "license": "MIT" - }, - "node_modules/@nuxt/vite-builder": { - "version": "4.4.6", - "license": "MIT", - "dependencies": { - "@nuxt/kit": "4.4.6", - "@rollup/plugin-replace": "^6.0.3", - "@vitejs/plugin-vue": "^6.0.7", - "@vitejs/plugin-vue-jsx": "^5.1.5", - "autoprefixer": "^10.5.0", - "consola": "^3.4.2", - "cssnano": "^8.0.1", - "defu": "^6.1.7", - "escape-string-regexp": "^5.0.0", - "exsolve": "^1.0.8", - "get-port-please": "^3.2.0", - "jiti": "^2.7.0", - "knitwork": "^1.3.0", - "magic-string": "^0.30.21", - "mlly": "^1.8.2", - "mocked-exports": "^0.1.1", - "nypm": "^0.6.6", - "pathe": "^2.0.3", - "pkg-types": "^2.3.1", - "postcss": "^8.5.14", - "seroval": "^1.5.4", - "std-env": "^4.1.0", - "ufo": "^1.6.4", - "unenv": "^2.0.0-rc.24", - "vite": "^7.3.3", - "vite-node": "^5.3.0", - "vite-plugin-checker": "^0.13.0", - "vue-bundle-renderer": "^2.2.0" - }, - "engines": { - "node": "^22.12.0 || ^24.11.0 || >=26.0.0" - }, - "peerDependencies": { - "@babel/plugin-proposal-decorators": "^7.25.0", - "@babel/plugin-syntax-jsx": "^7.25.0", - "nuxt": "4.4.6", - "rolldown": "^1.0.0-beta.38", - "rollup-plugin-visualizer": "^6.0.0 || ^7.0.1", - "vue": "^3.3.4" - }, - "peerDependenciesMeta": { - "@babel/plugin-proposal-decorators": { - "optional": true - }, - "@babel/plugin-syntax-jsx": { - "optional": true - }, - "rolldown": { - "optional": true - }, - "rollup-plugin-visualizer": { - "optional": true - } - } - }, - "node_modules/@nuxt/vite-builder/node_modules/@esbuild/linux-x64": { - "version": "0.27.7", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@nuxt/vite-builder/node_modules/@vitejs/plugin-vue": { - "version": "6.0.7", - "license": "MIT", - "dependencies": { - "@rolldown/pluginutils": "^1.0.1" - }, - "engines": { - "node": "^20.19.0 || >=22.12.0" - }, - "peerDependencies": { - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0", - "vue": "^3.2.25" - } - }, - "node_modules/@nuxt/vite-builder/node_modules/esbuild": { - "version": "0.27.7", - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.27.7", - "@esbuild/android-arm": "0.27.7", - "@esbuild/android-arm64": "0.27.7", - "@esbuild/android-x64": "0.27.7", - "@esbuild/darwin-arm64": "0.27.7", - "@esbuild/darwin-x64": "0.27.7", - "@esbuild/freebsd-arm64": "0.27.7", - "@esbuild/freebsd-x64": "0.27.7", - "@esbuild/linux-arm": "0.27.7", - "@esbuild/linux-arm64": "0.27.7", - "@esbuild/linux-ia32": "0.27.7", - "@esbuild/linux-loong64": "0.27.7", - "@esbuild/linux-mips64el": "0.27.7", - "@esbuild/linux-ppc64": "0.27.7", - "@esbuild/linux-riscv64": "0.27.7", - "@esbuild/linux-s390x": "0.27.7", - "@esbuild/linux-x64": "0.27.7", - "@esbuild/netbsd-arm64": "0.27.7", - "@esbuild/netbsd-x64": "0.27.7", - "@esbuild/openbsd-arm64": "0.27.7", - "@esbuild/openbsd-x64": "0.27.7", - "@esbuild/openharmony-arm64": "0.27.7", - "@esbuild/sunos-x64": "0.27.7", - "@esbuild/win32-arm64": "0.27.7", - "@esbuild/win32-ia32": "0.27.7", - "@esbuild/win32-x64": "0.27.7" - } - }, - "node_modules/@nuxt/vite-builder/node_modules/vite": { - "version": "7.3.3", - "license": "MIT", - "dependencies": { - "esbuild": "^0.27.0", - "fdir": "^6.5.0", - "picomatch": "^4.0.3", - "postcss": "^8.5.6", - "rollup": "^4.43.0", - "tinyglobby": "^0.2.15" - }, - "bin": { - "vite": "bin/vite.js" - }, - "engines": { - "node": "^20.19.0 || >=22.12.0" - }, - "funding": { - "url": "https://github.com/vitejs/vite?sponsor=1" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - }, - "peerDependencies": { - "@types/node": "^20.19.0 || >=22.12.0", - "jiti": ">=1.21.0", - "less": "^4.0.0", - "lightningcss": "^1.21.0", - "sass": "^1.70.0", - "sass-embedded": "^1.70.0", - "stylus": ">=0.54.8", - "sugarss": "^5.0.0", - "terser": "^5.16.0", - "tsx": "^4.8.1", - "yaml": "^2.4.2" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - }, - "jiti": { - "optional": true - }, - "less": { - "optional": true - }, - "lightningcss": { - "optional": true - }, - "sass": { - "optional": true - }, - "sass-embedded": { - "optional": true - }, - "stylus": { - "optional": true - }, - "sugarss": { - "optional": true - }, - "terser": { - "optional": true - }, - "tsx": { - "optional": true - }, - "yaml": { - "optional": true - } - } - }, - "node_modules/@observablehq/plot": { - "version": "0.6.17", - "license": "ISC", - "dependencies": { - "d3": "^7.9.0", - "interval-tree-1d": "^1.0.0", - "isoformat": "^0.2.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@one-ini/wasm": { - "version": "0.1.1", - "dev": true, - "license": "MIT" - }, - "node_modules/@oxc-minify/binding-linux-x64-gnu": { - "version": "0.131.0", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^20.19.0 || >=22.12.0" - } - }, - "node_modules/@oxc-parser/binding-linux-x64-gnu": { - "version": "0.131.0", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^20.19.0 || >=22.12.0" - } - }, - "node_modules/@oxc-project/types": { - "version": "0.131.0", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/Boshen" - } - }, - "node_modules/@oxc-transform/binding-linux-x64-gnu": { - "version": "0.131.0", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": "^20.19.0 || >=22.12.0" - } - }, - "node_modules/@parcel/watcher": { - "version": "2.5.6", - "hasInstallScript": true, - "license": "MIT", - "dependencies": { - "detect-libc": "^2.0.3", - "is-glob": "^4.0.3", - "node-addon-api": "^7.0.0", - "picomatch": "^4.0.3" - }, - "engines": { - "node": ">= 10.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - }, - "optionalDependencies": { - "@parcel/watcher-android-arm64": "2.5.6", - "@parcel/watcher-darwin-arm64": "2.5.6", - "@parcel/watcher-darwin-x64": "2.5.6", - "@parcel/watcher-freebsd-x64": "2.5.6", - "@parcel/watcher-linux-arm-glibc": "2.5.6", - "@parcel/watcher-linux-arm-musl": "2.5.6", - "@parcel/watcher-linux-arm64-glibc": "2.5.6", - "@parcel/watcher-linux-arm64-musl": "2.5.6", - "@parcel/watcher-linux-x64-glibc": "2.5.6", - "@parcel/watcher-linux-x64-musl": "2.5.6", - "@parcel/watcher-win32-arm64": "2.5.6", - "@parcel/watcher-win32-ia32": "2.5.6", - "@parcel/watcher-win32-x64": "2.5.6" - } - }, - "node_modules/@parcel/watcher-linux-x64-glibc": { - "version": "2.5.6", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 10.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/@parcel/watcher-wasm": { - "version": "2.5.6", - "bundleDependencies": [ - "napi-wasm" - ], - "license": "MIT", - "dependencies": { - "is-glob": "^4.0.3", - "napi-wasm": "^1.1.0", - "picomatch": "^4.0.3" - }, - "engines": { - "node": ">= 10.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/@parcel/watcher-wasm/node_modules/napi-wasm": { - "version": "1.1.0", - "inBundle": true, - "license": "MIT" - }, - "node_modules/@pkgjs/parseargs": { - "version": "0.11.0", - "license": "MIT", - "optional": true, - "engines": { - "node": ">=14" - } - }, - "node_modules/@polka/url": { - "version": "1.0.0-next.29", - "license": "MIT" - }, - "node_modules/@poppinss/colors": { - "version": "4.1.6", - "license": "MIT", - "dependencies": { - "kleur": "^4.1.5" - } - }, - "node_modules/@poppinss/dumper": { - "version": "0.7.0", - "license": "MIT", - "dependencies": { - "@poppinss/colors": "^4.1.5", - "@sindresorhus/is": "^7.0.2", - "supports-color": "^10.0.0" - } - }, - "node_modules/@poppinss/dumper/node_modules/supports-color": { - "version": "10.2.2", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/chalk/supports-color?sponsor=1" - } - }, - "node_modules/@poppinss/exception": { - "version": "1.2.3", - "license": "MIT" - }, - "node_modules/@rolldown/pluginutils": { - "version": "1.0.1", - "license": "MIT" - }, - "node_modules/@rollup/plugin-alias": { - "version": "6.0.0", - "license": "MIT", - "engines": { - "node": ">=20.19.0" - }, - "peerDependencies": { - "rollup": ">=4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/plugin-commonjs": { - "version": "29.0.2", - "license": "MIT", - "dependencies": { - "@rollup/pluginutils": "^5.0.1", - "commondir": "^1.0.1", - "estree-walker": "^2.0.2", - "fdir": "^6.2.0", - "is-reference": "1.2.1", - "magic-string": "^0.30.3", - "picomatch": "^4.0.2" - }, - "engines": { - "node": ">=16.0.0 || 14 >= 14.17" - }, - "peerDependencies": { - "rollup": "^2.68.0||^3.0.0||^4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/plugin-inject": { - "version": "5.0.5", - "license": "MIT", - "dependencies": { - "@rollup/pluginutils": "^5.0.1", - "estree-walker": "^2.0.2", - "magic-string": "^0.30.3" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/plugin-json": { - "version": "6.1.0", - "license": "MIT", - "dependencies": { - "@rollup/pluginutils": "^5.1.0" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/plugin-node-resolve": { - "version": "16.0.3", - "license": "MIT", - "dependencies": { - "@rollup/pluginutils": "^5.0.1", - "@types/resolve": "1.20.2", - "deepmerge": "^4.2.2", - "is-module": "^1.0.0", - "resolve": "^1.22.1" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "rollup": "^2.78.0||^3.0.0||^4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/plugin-replace": { - "version": "6.0.3", - "license": "MIT", - "dependencies": { - "@rollup/pluginutils": "^5.0.1", - "magic-string": "^0.30.3" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/plugin-terser": { - "version": "1.0.0", - "license": "MIT", - "dependencies": { - "serialize-javascript": "^7.0.3", - "smob": "^1.0.0", - "terser": "^5.17.4" - }, - "engines": { - "node": ">=20.0.0" - }, - "peerDependencies": { - "rollup": "^2.0.0||^3.0.0||^4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/pluginutils": { - "version": "5.3.0", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0", - "estree-walker": "^2.0.2", - "picomatch": "^4.0.2" - }, - "engines": { - "node": ">=14.0.0" - }, - "peerDependencies": { - "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" - }, - "peerDependenciesMeta": { - "rollup": { - "optional": true - } - } - }, - "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.60.4", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@simple-git/args-pathspec": { - "version": "1.0.3", - "license": "MIT" - }, - "node_modules/@simple-git/argv-parser": { - "version": "1.1.1", - "license": "MIT", - "dependencies": { - "@simple-git/args-pathspec": "^1.0.3" - } - }, - "node_modules/@sindresorhus/is": { - "version": "7.2.0", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sindresorhus/is?sponsor=1" - } - }, - "node_modules/@sindresorhus/merge-streams": { - "version": "4.0.0", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/@speed-highlight/core": { - "version": "1.2.15", - "license": "CC0-1.0" - }, - "node_modules/@swc/helpers": { - "version": "0.5.21", - "license": "Apache-2.0", - "dependencies": { - "tslib": "^2.8.0" - } - }, - "node_modules/@tailwindcss/node": { - "version": "4.3.0", - "license": "MIT", - "dependencies": { - "@jridgewell/remapping": "^2.3.5", - "enhanced-resolve": "^5.21.0", - "jiti": "^2.6.1", - "lightningcss": "1.32.0", - "magic-string": "^0.30.21", - "source-map-js": "^1.2.1", - "tailwindcss": "4.3.0" - } - }, - "node_modules/@tailwindcss/oxide": { - "version": "4.3.0", - "license": "MIT", - "engines": { - "node": ">= 20" - }, - "optionalDependencies": { - "@tailwindcss/oxide-android-arm64": "4.3.0", - "@tailwindcss/oxide-darwin-arm64": "4.3.0", - "@tailwindcss/oxide-darwin-x64": "4.3.0", - "@tailwindcss/oxide-freebsd-x64": "4.3.0", - "@tailwindcss/oxide-linux-arm-gnueabihf": "4.3.0", - "@tailwindcss/oxide-linux-arm64-gnu": "4.3.0", - "@tailwindcss/oxide-linux-arm64-musl": "4.3.0", - "@tailwindcss/oxide-linux-x64-gnu": "4.3.0", - "@tailwindcss/oxide-linux-x64-musl": "4.3.0", - "@tailwindcss/oxide-wasm32-wasi": "4.3.0", - "@tailwindcss/oxide-win32-arm64-msvc": "4.3.0", - "@tailwindcss/oxide-win32-x64-msvc": "4.3.0" - } - }, - "node_modules/@tailwindcss/oxide-linux-x64-gnu": { - "version": "4.3.0", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 20" - } - }, - "node_modules/@tailwindcss/vite": { - "version": "4.3.0", - "license": "MIT", - "dependencies": { - "@tailwindcss/node": "4.3.0", - "@tailwindcss/oxide": "4.3.0", - "tailwindcss": "4.3.0" - }, - "peerDependencies": { - "vite": "^5.2.0 || ^6 || ^7 || ^8" - } - }, - "node_modules/@tanstack/table-core": { - "version": "8.21.3", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/tannerlinsley" - } - }, - "node_modules/@tanstack/virtual-core": { - "version": "3.15.0", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/tannerlinsley" - } - }, - "node_modules/@tanstack/vue-table": { - "version": "8.21.3", - "license": "MIT", - "dependencies": { - "@tanstack/table-core": "8.21.3" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/tannerlinsley" - }, - "peerDependencies": { - "vue": ">=3.2" - } - }, - "node_modules/@tanstack/vue-virtual": { - "version": "3.13.25", - "license": "MIT", - "dependencies": { - "@tanstack/virtual-core": "3.15.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/tannerlinsley" - }, - "peerDependencies": { - "vue": "^2.7.0 || ^3.0.0" - } - }, - "node_modules/@types/command-line-args": { - "version": "5.2.3", - "license": "MIT" - }, - "node_modules/@types/command-line-usage": { - "version": "5.0.4", - "license": "MIT" - }, - "node_modules/@types/cytoscape": { - "version": "3.31.0", - "deprecated": "This is a stub types definition. cytoscape provides its own type definitions, so you do not need this installed.", - "dev": true, - "license": "MIT", - "dependencies": { - "cytoscape": "*" - } - }, - "node_modules/@types/estree": { - "version": "1.0.9", - "license": "MIT" - }, - "node_modules/@types/jsesc": { - "version": "2.5.1", - "license": "MIT" - }, - "node_modules/@types/node": { - "version": "20.19.41", - "license": "MIT", - "dependencies": { - "undici-types": "~6.21.0" - } - }, - "node_modules/@types/resolve": { - "version": "1.20.2", - "license": "MIT" - }, - "node_modules/@types/trusted-types": { - "version": "2.0.7", - "license": "MIT", - "optional": true - }, - "node_modules/@types/web-bluetooth": { - "version": "0.0.21", - "license": "MIT" - }, - "node_modules/@unhead/vue": { - "version": "2.1.15", - "license": "MIT", - "dependencies": { - "hookable": "^6.0.1", - "unhead": "2.1.15" - }, - "funding": { - "url": "https://github.com/sponsors/harlan-zw" - }, - "peerDependencies": { - "vue": ">=3.5.18" - } - }, - "node_modules/@uwdata/flechette": { - "version": "2.5.0", - "license": "BSD-3-Clause" - }, - "node_modules/@uwdata/mosaic-core": { - "version": "0.21.1", - "license": "BSD-3-Clause", - "dependencies": { - "@duckdb/duckdb-wasm": "1.30.0", - "@uwdata/flechette": "^2.2.5", - "@uwdata/mosaic-sql": "^0.21.1" - } - }, - "node_modules/@uwdata/mosaic-core/node_modules/@duckdb/duckdb-wasm": { - "version": "1.30.0", - "license": "MIT", - "dependencies": { - "apache-arrow": "^17.0.0" - } - }, - "node_modules/@uwdata/mosaic-inputs": { - "version": "0.21.1", - "license": "BSD-3-Clause", - "dependencies": { - "@uwdata/mosaic-core": "^0.21.1", - "@uwdata/mosaic-sql": "^0.21.1" - } - }, - "node_modules/@uwdata/mosaic-plot": { - "version": "0.21.1", - "license": "BSD-3-Clause", - "dependencies": { - "@observablehq/plot": "^0.6.17", - "@uwdata/mosaic-core": "^0.21.1", - "@uwdata/mosaic-sql": "^0.21.1", - "d3": "^7.9.0" - } - }, - "node_modules/@uwdata/mosaic-sql": { - "version": "0.21.1", - "license": "BSD-3-Clause" - }, - "node_modules/@uwdata/vgplot": { - "version": "0.21.1", - "license": "BSD-3-Clause", - "dependencies": { - "@uwdata/mosaic-core": "^0.21.1", - "@uwdata/mosaic-inputs": "^0.21.1", - "@uwdata/mosaic-plot": "^0.21.1", - "@uwdata/mosaic-sql": "^0.21.1" - } - }, - "node_modules/@vercel/nft": { - "version": "1.5.0", - "license": "MIT", - "dependencies": { - "@mapbox/node-pre-gyp": "^2.0.0", - "@rollup/pluginutils": "^5.1.3", - "acorn": "^8.6.0", - "acorn-import-attributes": "^1.9.5", - "async-sema": "^3.1.1", - "bindings": "^1.4.0", - "estree-walker": "2.0.2", - "glob": "^13.0.0", - "graceful-fs": "^4.2.9", - "node-gyp-build": "^4.2.2", - "picomatch": "^4.0.2", - "resolve-from": "^5.0.0" - }, - "bin": { - "nft": "out/cli.js" - }, - "engines": { - "node": ">=20" - } - }, - "node_modules/@vercel/nft/node_modules/balanced-match": { - "version": "4.0.4", - "license": "MIT", - "engines": { - "node": "18 || 20 || >=22" - } - }, - "node_modules/@vercel/nft/node_modules/brace-expansion": { - "version": "5.0.6", - "license": "MIT", - "dependencies": { - "balanced-match": "^4.0.2" - }, - "engines": { - "node": "18 || 20 || >=22" - } - }, - "node_modules/@vercel/nft/node_modules/glob": { - "version": "13.0.6", - "license": "BlueOak-1.0.0", - "dependencies": { - "minimatch": "^10.2.2", - "minipass": "^7.1.3", - "path-scurry": "^2.0.2" - }, - "engines": { - "node": "18 || 20 || >=22" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/@vercel/nft/node_modules/lru-cache": { - "version": "11.5.0", - "license": "BlueOak-1.0.0", - "engines": { - "node": "20 || >=22" - } - }, - "node_modules/@vercel/nft/node_modules/minimatch": { - "version": "10.2.5", - "license": "BlueOak-1.0.0", - "dependencies": { - "brace-expansion": "^5.0.5" - }, - "engines": { - "node": "18 || 20 || >=22" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/@vercel/nft/node_modules/path-scurry": { - "version": "2.0.2", - "license": "BlueOak-1.0.0", - "dependencies": { - "lru-cache": "^11.0.0", - "minipass": "^7.1.2" - }, - "engines": { - "node": "18 || 20 || >=22" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/@vitejs/plugin-vue": { - "version": "5.2.4", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.0.0 || >=20.0.0" - }, - "peerDependencies": { - "vite": "^5.0.0 || ^6.0.0", - "vue": "^3.2.25" - } - }, - "node_modules/@vitejs/plugin-vue-jsx": { - "version": "5.1.5", - "license": "MIT", - "dependencies": { - "@babel/core": "^7.29.0", - "@babel/plugin-syntax-typescript": "^7.28.6", - "@babel/plugin-transform-typescript": "^7.28.6", - "@rolldown/pluginutils": "^1.0.0-rc.2", - "@vue/babel-plugin-jsx": "^2.0.1" - }, - "engines": { - "node": "^20.19.0 || >=22.12.0" - }, - "peerDependencies": { - "vite": "^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0", - "vue": "^3.0.0" - } - }, - "node_modules/@vitest/expect": { - "version": "2.1.9", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/spy": "2.1.9", - "@vitest/utils": "2.1.9", - "chai": "^5.1.2", - "tinyrainbow": "^1.2.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/pretty-format": { - "version": "2.1.9", - "dev": true, - "license": "MIT", - "dependencies": { - "tinyrainbow": "^1.2.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/runner": { - "version": "2.1.9", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/utils": "2.1.9", - "pathe": "^1.1.2" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/runner/node_modules/pathe": { - "version": "1.1.2", - "dev": true, - "license": "MIT" - }, - "node_modules/@vitest/snapshot": { - "version": "2.1.9", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/pretty-format": "2.1.9", - "magic-string": "^0.30.12", - "pathe": "^1.1.2" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/snapshot/node_modules/pathe": { - "version": "1.1.2", - "dev": true, - "license": "MIT" - }, - "node_modules/@vitest/spy": { - "version": "2.1.9", - "dev": true, - "license": "MIT", - "dependencies": { - "tinyspy": "^3.0.2" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@vitest/utils": { - "version": "2.1.9", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/pretty-format": "2.1.9", - "loupe": "^3.1.2", - "tinyrainbow": "^1.2.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/@volar/language-core": { - "version": "2.4.15", - "devOptional": true, - "license": "MIT", - "dependencies": { - "@volar/source-map": "2.4.15" - } - }, - "node_modules/@volar/source-map": { - "version": "2.4.15", - "devOptional": true, - "license": "MIT" - }, - "node_modules/@volar/typescript": { - "version": "2.4.15", - "devOptional": true, - "license": "MIT", - "dependencies": { - "@volar/language-core": "2.4.15", - "path-browserify": "^1.0.1", - "vscode-uri": "^3.0.8" - } - }, - "node_modules/@vue-macros/common": { - "version": "3.1.2", - "license": "MIT", - "dependencies": { - "@vue/compiler-sfc": "^3.5.22", - "ast-kit": "^2.1.2", - "local-pkg": "^1.1.2", - "magic-string-ast": "^1.0.2", - "unplugin-utils": "^0.3.0" - }, - "engines": { - "node": ">=20.19.0" - }, - "funding": { - "url": "https://github.com/sponsors/vue-macros" - }, - "peerDependencies": { - "vue": "^2.7.0 || ^3.2.25" - }, - "peerDependenciesMeta": { - "vue": { - "optional": true - } - } - }, - "node_modules/@vue/babel-helper-vue-transform-on": { - "version": "2.0.1", - "license": "MIT" - }, - "node_modules/@vue/babel-plugin-jsx": { - "version": "2.0.1", - "license": "MIT", - "dependencies": { - "@babel/helper-module-imports": "^7.27.1", - "@babel/helper-plugin-utils": "^7.27.1", - "@babel/plugin-syntax-jsx": "^7.27.1", - "@babel/template": "^7.27.2", - "@babel/traverse": "^7.28.4", - "@babel/types": "^7.28.4", - "@vue/babel-helper-vue-transform-on": "2.0.1", - "@vue/babel-plugin-resolve-type": "2.0.1", - "@vue/shared": "^3.5.22" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - }, - "peerDependenciesMeta": { - "@babel/core": { - "optional": true - } - } - }, - "node_modules/@vue/babel-plugin-resolve-type": { - "version": "2.0.1", - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "@babel/helper-module-imports": "^7.27.1", - "@babel/helper-plugin-utils": "^7.27.1", - "@babel/parser": "^7.28.4", - "@vue/compiler-sfc": "^3.5.22" - }, - "funding": { - "url": "https://github.com/sponsors/sxzz" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@vue/compiler-core": { - "version": "3.5.34", - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.29.3", - "@vue/shared": "3.5.34", - "entities": "^7.0.1", - "estree-walker": "^2.0.2", - "source-map-js": "^1.2.1" - } - }, - "node_modules/@vue/compiler-dom": { - "version": "3.5.34", - "license": "MIT", - "dependencies": { - "@vue/compiler-core": "3.5.34", - "@vue/shared": "3.5.34" - } - }, - "node_modules/@vue/compiler-sfc": { - "version": "3.5.34", - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.29.3", - "@vue/compiler-core": "3.5.34", - "@vue/compiler-dom": "3.5.34", - "@vue/compiler-ssr": "3.5.34", - "@vue/shared": "3.5.34", - "estree-walker": "^2.0.2", - "magic-string": "^0.30.21", - "postcss": "^8.5.14", - "source-map-js": "^1.2.1" - } - }, - "node_modules/@vue/compiler-ssr": { - "version": "3.5.34", - "license": "MIT", - "dependencies": { - "@vue/compiler-dom": "3.5.34", - "@vue/shared": "3.5.34" - } - }, - "node_modules/@vue/compiler-vue2": { - "version": "2.7.16", - "devOptional": true, - "license": "MIT", - "dependencies": { - "de-indent": "^1.0.2", - "he": "^1.2.0" - } - }, - "node_modules/@vue/devtools-api": { - "version": "6.6.4", - "license": "MIT" - }, - "node_modules/@vue/devtools-core": { - "version": "8.1.2", - "license": "MIT", - "dependencies": { - "@vue/devtools-kit": "^8.1.2", - "@vue/devtools-shared": "^8.1.2" - }, - "peerDependencies": { - "vue": "^3.0.0" - } - }, - "node_modules/@vue/devtools-kit": { - "version": "8.1.2", - "license": "MIT", - "dependencies": { - "@vue/devtools-shared": "^8.1.2", - "birpc": "^2.6.1", - "hookable": "^5.5.3", - "perfect-debounce": "^2.0.0" - } - }, - "node_modules/@vue/devtools-kit/node_modules/birpc": { - "version": "2.9.0", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/@vue/devtools-kit/node_modules/hookable": { - "version": "5.5.3", - "license": "MIT" - }, - "node_modules/@vue/devtools-shared": { - "version": "8.1.2", - "license": "MIT" - }, - "node_modules/@vue/language-core": { - "version": "2.2.12", - "devOptional": true, - "license": "MIT", - "dependencies": { - "@volar/language-core": "2.4.15", - "@vue/compiler-dom": "^3.5.0", - "@vue/compiler-vue2": "^2.7.16", - "@vue/shared": "^3.5.0", - "alien-signals": "^1.0.3", - "minimatch": "^9.0.3", - "muggle-string": "^0.4.1", - "path-browserify": "^1.0.1" - }, - "peerDependencies": { - "typescript": "*" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/@vue/reactivity": { - "version": "3.5.34", - "license": "MIT", - "dependencies": { - "@vue/shared": "3.5.34" - } - }, - "node_modules/@vue/runtime-core": { - "version": "3.5.34", - "license": "MIT", - "dependencies": { - "@vue/reactivity": "3.5.34", - "@vue/shared": "3.5.34" - } - }, - "node_modules/@vue/runtime-dom": { - "version": "3.5.34", - "license": "MIT", - "dependencies": { - "@vue/reactivity": "3.5.34", - "@vue/runtime-core": "3.5.34", - "@vue/shared": "3.5.34", - "csstype": "^3.2.3" - } - }, - "node_modules/@vue/server-renderer": { - "version": "3.5.34", - "license": "MIT", - "dependencies": { - "@vue/compiler-ssr": "3.5.34", - "@vue/shared": "3.5.34" - }, - "peerDependencies": { - "vue": "3.5.34" - } - }, - "node_modules/@vue/shared": { - "version": "3.5.34", - "license": "MIT" - }, - "node_modules/@vue/test-utils": { - "version": "2.4.10", - "dev": true, - "license": "MIT", - "dependencies": { - "js-beautify": "^1.14.9", - "vue-component-type-helpers": "^3.0.0" - }, - "peerDependencies": { - "@vue/compiler-dom": "3.x", - "@vue/server-renderer": "3.x", - "vue": "3.x" - }, - "peerDependenciesMeta": { - "@vue/server-renderer": { - "optional": true - } - } - }, - "node_modules/@vueuse/core": { - "version": "14.3.0", - "license": "MIT", - "dependencies": { - "@types/web-bluetooth": "^0.0.21", - "@vueuse/metadata": "14.3.0", - "@vueuse/shared": "14.3.0" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - }, - "peerDependencies": { - "vue": "^3.5.0" - } - }, - "node_modules/@vueuse/metadata": { - "version": "14.3.0", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/@vueuse/shared": { - "version": "14.3.0", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/antfu" - }, - "peerDependencies": { - "vue": "^3.5.0" - } - }, - "node_modules/abbrev": { - "version": "2.0.0", - "dev": true, - "license": "ISC", - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - } - }, - "node_modules/abort-controller": { - "version": "3.0.0", - "license": "MIT", - "dependencies": { - "event-target-shim": "^5.0.0" - }, - "engines": { - "node": ">=6.5" - } - }, - "node_modules/acorn": { - "version": "8.16.0", - "license": "MIT", - "bin": { - "acorn": "bin/acorn" - }, - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/acorn-import-attributes": { - "version": "1.9.5", - "license": "MIT", - "peerDependencies": { - "acorn": "^8" - } - }, - "node_modules/agent-base": { - "version": "7.1.4", - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, - "node_modules/alien-signals": { - "version": "1.0.13", - "devOptional": true, - "license": "MIT" - }, - "node_modules/ansi-regex": { - "version": "6.2.2", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, - "node_modules/ansi-styles": { - "version": "4.3.0", - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/ansis": { - "version": "4.3.0", - "license": "ISC", - "engines": { - "node": ">=14" - } - }, - "node_modules/anymatch": { - "version": "3.1.3", - "license": "ISC", - "dependencies": { - "normalize-path": "^3.0.0", - "picomatch": "^2.0.4" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/anymatch/node_modules/picomatch": { - "version": "2.3.2", - "license": "MIT", - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/apache-arrow": { - "version": "17.0.0", - "license": "Apache-2.0", - "dependencies": { - "@swc/helpers": "^0.5.11", - "@types/command-line-args": "^5.2.3", - "@types/command-line-usage": "^5.0.4", - "@types/node": "^20.13.0", - "command-line-args": "^5.2.1", - "command-line-usage": "^7.0.1", - "flatbuffers": "^24.3.25", - "json-bignum": "^0.0.3", - "tslib": "^2.6.2" - }, - "bin": { - "arrow2csv": "bin/arrow2csv.cjs" - } - }, - "node_modules/archiver": { - "version": "7.0.1", - "license": "MIT", - "dependencies": { - "archiver-utils": "^5.0.2", - "async": "^3.2.4", - "buffer-crc32": "^1.0.0", - "readable-stream": "^4.0.0", - "readdir-glob": "^1.1.2", - "tar-stream": "^3.0.0", - "zip-stream": "^6.0.1" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/archiver-utils": { - "version": "5.0.2", - "license": "MIT", - "dependencies": { - "glob": "^10.0.0", - "graceful-fs": "^4.2.0", - "is-stream": "^2.0.1", - "lazystream": "^1.0.0", - "lodash": "^4.17.15", - "normalize-path": "^3.0.0", - "readable-stream": "^4.0.0" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/archiver-utils/node_modules/is-stream": { - "version": "2.0.1", - "license": "MIT", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/aria-hidden": { - "version": "1.2.6", - "license": "MIT", - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/array-back": { - "version": "3.1.0", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/assertion-error": { - "version": "2.0.1", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - } - }, - "node_modules/ast-kit": { - "version": "2.2.0", - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.28.5", - "pathe": "^2.0.3" - }, - "engines": { - "node": ">=20.19.0" - }, - "funding": { - "url": "https://github.com/sponsors/sxzz" - } - }, - "node_modules/ast-walker-scope": { - "version": "0.8.3", - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.28.4", - "ast-kit": "^2.1.3" - }, - "engines": { - "node": ">=20.19.0" - }, - "funding": { - "url": "https://github.com/sponsors/sxzz" - } - }, - "node_modules/async": { - "version": "3.2.6", - "license": "MIT" - }, - "node_modules/async-sema": { - "version": "3.1.1", - "license": "MIT" - }, - "node_modules/autoprefixer": { - "version": "10.5.0", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/autoprefixer" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "browserslist": "^4.28.2", - "caniuse-lite": "^1.0.30001787", - "fraction.js": "^5.3.4", - "picocolors": "^1.1.1", - "postcss-value-parser": "^4.2.0" - }, - "bin": { - "autoprefixer": "bin/autoprefixer" - }, - "engines": { - "node": "^10 || ^12 || >=14" - }, - "peerDependencies": { - "postcss": "^8.1.0" - } - }, - "node_modules/avsdf-base": { - "version": "1.0.0", - "license": "MIT", - "dependencies": { - "layout-base": "^1.0.0" - } - }, - "node_modules/b4a": { - "version": "1.8.1", - "license": "Apache-2.0", - "peerDependencies": { - "react-native-b4a": "*" - }, - "peerDependenciesMeta": { - "react-native-b4a": { - "optional": true - } - } - }, - "node_modules/balanced-match": { - "version": "1.0.2", - "license": "MIT" - }, - "node_modules/bare-events": { - "version": "2.8.3", - "license": "Apache-2.0", - "peerDependencies": { - "bare-abort-controller": "*" - }, - "peerDependenciesMeta": { - "bare-abort-controller": { - "optional": true - } - } - }, - "node_modules/bare-fs": { - "version": "4.7.1", - "license": "Apache-2.0", - "dependencies": { - "bare-events": "^2.5.4", - "bare-path": "^3.0.0", - "bare-stream": "^2.6.4", - "bare-url": "^2.2.2", - "fast-fifo": "^1.3.2" - }, - "engines": { - "bare": ">=1.16.0" - }, - "peerDependencies": { - "bare-buffer": "*" - }, - "peerDependenciesMeta": { - "bare-buffer": { - "optional": true - } - } - }, - "node_modules/bare-os": { - "version": "3.9.1", - "license": "Apache-2.0", - "engines": { - "bare": ">=1.14.0" - } - }, - "node_modules/bare-path": { - "version": "3.0.0", - "license": "Apache-2.0", - "dependencies": { - "bare-os": "^3.0.1" - } - }, - "node_modules/bare-stream": { - "version": "2.13.1", - "license": "Apache-2.0", - "dependencies": { - "streamx": "^2.25.0", - "teex": "^1.0.1" - }, - "peerDependencies": { - "bare-abort-controller": "*", - "bare-buffer": "*", - "bare-events": "*" - }, - "peerDependenciesMeta": { - "bare-abort-controller": { - "optional": true - }, - "bare-buffer": { - "optional": true - }, - "bare-events": { - "optional": true - } - } - }, - "node_modules/bare-url": { - "version": "2.4.3", - "license": "Apache-2.0", - "dependencies": { - "bare-path": "^3.0.0" - } - }, - "node_modules/base64-js": { - "version": "1.5.1", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/baseline-browser-mapping": { - "version": "2.10.31", - "license": "Apache-2.0", - "bin": { - "baseline-browser-mapping": "dist/cli.cjs" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/bidi-js": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz", - "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==", - "license": "MIT", - "dependencies": { - "require-from-string": "^2.0.2" - } - }, - "node_modules/binary-search-bounds": { - "version": "2.0.5", - "license": "MIT" - }, - "node_modules/bindings": { - "version": "1.5.0", - "license": "MIT", - "dependencies": { - "file-uri-to-path": "1.0.0" - } - }, - "node_modules/birpc": { - "version": "4.0.0", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/boolbase": { - "version": "1.0.0", - "license": "ISC" - }, - "node_modules/brace-expansion": { - "version": "2.1.0", - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/braces": { - "version": "3.0.3", - "license": "MIT", - "dependencies": { - "fill-range": "^7.1.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/browserslist": { - "version": "4.28.2", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "baseline-browser-mapping": "^2.10.12", - "caniuse-lite": "^1.0.30001782", - "electron-to-chromium": "^1.5.328", - "node-releases": "^2.0.36", - "update-browserslist-db": "^1.2.3" - }, - "bin": { - "browserslist": "cli.js" - }, - "engines": { - "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" - } - }, - "node_modules/buffer": { - "version": "6.0.3", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "base64-js": "^1.3.1", - "ieee754": "^1.2.1" - } - }, - "node_modules/buffer-crc32": { - "version": "1.0.0", - "license": "MIT", - "engines": { - "node": ">=8.0.0" - } - }, - "node_modules/buffer-from": { - "version": "1.1.2", - "license": "MIT" - }, - "node_modules/bundle-name": { - "version": "4.1.0", - "license": "MIT", - "dependencies": { - "run-applescript": "^7.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/c12": { - "version": "3.3.4", - "license": "MIT", - "dependencies": { - "chokidar": "^5.0.0", - "confbox": "^0.2.4", - "defu": "^6.1.6", - "dotenv": "^17.3.1", - "exsolve": "^1.0.8", - "giget": "^3.2.0", - "jiti": "^2.6.1", - "ohash": "^2.0.11", - "pathe": "^2.0.3", - "perfect-debounce": "^2.1.0", - "pkg-types": "^2.3.0", - "rc9": "^3.0.1" - }, - "peerDependencies": { - "magicast": "*" - }, - "peerDependenciesMeta": { - "magicast": { - "optional": true - } - } - }, - "node_modules/cac": { - "version": "6.7.14", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/caniuse-api": { - "version": "3.0.0", - "license": "MIT", - "dependencies": { - "browserslist": "^4.0.0", - "caniuse-lite": "^1.0.0", - "lodash.memoize": "^4.1.2", - "lodash.uniq": "^4.5.0" - } - }, - "node_modules/caniuse-lite": { - "version": "1.0.30001793", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/caniuse-lite" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "CC-BY-4.0" - }, - "node_modules/chai": { - "version": "5.3.3", - "dev": true, - "license": "MIT", - "dependencies": { - "assertion-error": "^2.0.1", - "check-error": "^2.1.1", - "deep-eql": "^5.0.1", - "loupe": "^3.1.0", - "pathval": "^2.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/chalk": { - "version": "4.1.2", - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.1.0", - "supports-color": "^7.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/chalk-template": { - "version": "0.4.0", - "license": "MIT", - "dependencies": { - "chalk": "^4.1.2" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/chalk-template?sponsor=1" - } - }, - "node_modules/check-error": { - "version": "2.1.3", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 16" - } - }, - "node_modules/chokidar": { - "version": "5.0.0", - "license": "MIT", - "dependencies": { - "readdirp": "^5.0.0" - }, - "engines": { - "node": ">= 20.19.0" - }, - "funding": { - "url": "https://paulmillr.com/funding/" - } - }, - "node_modules/chownr": { - "version": "3.0.0", - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=18" - } - }, - "node_modules/citty": { - "version": "0.2.2", - "license": "MIT" - }, - "node_modules/class-variance-authority": { - "version": "0.7.1", - "license": "Apache-2.0", - "dependencies": { - "clsx": "^2.1.1" - }, - "funding": { - "url": "https://polar.sh/cva" - } - }, - "node_modules/cliui": { - "version": "9.0.1", - "license": "ISC", - "dependencies": { - "string-width": "^7.2.0", - "strip-ansi": "^7.1.0", - "wrap-ansi": "^9.0.0" - }, - "engines": { - "node": ">=20" - } - }, - "node_modules/cliui/node_modules/ansi-styles": { - "version": "6.2.3", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/cliui/node_modules/emoji-regex": { - "version": "10.6.0", - "license": "MIT" - }, - "node_modules/cliui/node_modules/string-width": { - "version": "7.2.0", - "license": "MIT", - "dependencies": { - "emoji-regex": "^10.3.0", - "get-east-asian-width": "^1.0.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cliui/node_modules/wrap-ansi": { - "version": "9.0.2", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.2.1", - "string-width": "^7.0.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/clsx": { - "version": "2.1.1", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/cluster-key-slot": { - "version": "1.1.2", - "license": "Apache-2.0", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/codemirror": { - "version": "6.0.2", - "license": "MIT", - "dependencies": { - "@codemirror/autocomplete": "^6.0.0", - "@codemirror/commands": "^6.0.0", - "@codemirror/language": "^6.0.0", - "@codemirror/lint": "^6.0.0", - "@codemirror/search": "^6.0.0", - "@codemirror/state": "^6.0.0", - "@codemirror/view": "^6.0.0" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "license": "MIT", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "license": "MIT" - }, - "node_modules/command-line-args": { - "version": "5.2.1", - "license": "MIT", - "dependencies": { - "array-back": "^3.1.0", - "find-replace": "^3.0.0", - "lodash.camelcase": "^4.3.0", - "typical": "^4.0.0" - }, - "engines": { - "node": ">=4.0.0" - } - }, - "node_modules/command-line-usage": { - "version": "7.0.4", - "license": "MIT", - "dependencies": { - "array-back": "^6.2.2", - "chalk-template": "^0.4.0", - "table-layout": "^4.1.1", - "typical": "^7.3.0" - }, - "engines": { - "node": ">=12.20.0" - } - }, - "node_modules/command-line-usage/node_modules/array-back": { - "version": "6.2.3", - "license": "MIT", - "engines": { - "node": ">=12.17" - } - }, - "node_modules/command-line-usage/node_modules/typical": { - "version": "7.3.0", - "license": "MIT", - "engines": { - "node": ">=12.17" - } - }, - "node_modules/commander": { - "version": "7.2.0", - "license": "MIT", - "engines": { - "node": ">= 10" - } - }, - "node_modules/commondir": { - "version": "1.0.1", - "license": "MIT" - }, - "node_modules/compatx": { - "version": "0.2.0", - "license": "MIT" - }, - "node_modules/compress-commons": { - "version": "6.0.2", - "license": "MIT", - "dependencies": { - "crc-32": "^1.2.0", - "crc32-stream": "^6.0.0", - "is-stream": "^2.0.1", - "normalize-path": "^3.0.0", - "readable-stream": "^4.0.0" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/compress-commons/node_modules/is-stream": { - "version": "2.0.1", - "license": "MIT", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/confbox": { - "version": "0.2.4", - "license": "MIT" - }, - "node_modules/config-chain": { - "version": "1.1.13", - "dev": true, - "license": "MIT", - "dependencies": { - "ini": "^1.3.4", - "proto-list": "~1.2.1" - } - }, - "node_modules/consola": { - "version": "3.4.2", - "license": "MIT", - "engines": { - "node": "^14.18.0 || >=16.10.0" - } - }, - "node_modules/convert-source-map": { - "version": "2.0.0", - "license": "MIT" - }, - "node_modules/cookie-es": { - "version": "3.1.1", - "license": "MIT" - }, - "node_modules/core-util-is": { - "version": "1.0.3", - "license": "MIT" - }, - "node_modules/cose-base": { - "version": "2.2.0", - "license": "MIT", - "dependencies": { - "layout-base": "^2.0.0" - } - }, - "node_modules/cose-base/node_modules/layout-base": { - "version": "2.0.1", - "license": "MIT" - }, - "node_modules/crc-32": { - "version": "1.2.2", - "license": "Apache-2.0", - "bin": { - "crc32": "bin/crc32.njs" - }, - "engines": { - "node": ">=0.8" - } - }, - "node_modules/crc32-stream": { - "version": "6.0.0", - "license": "MIT", - "dependencies": { - "crc-32": "^1.2.0", - "readable-stream": "^4.0.0" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/crelt": { - "version": "1.0.6", - "license": "MIT" - }, - "node_modules/croner": { - "version": "10.0.1", - "funding": [ - { - "type": "other", - "url": "https://paypal.me/hexagonpp" - }, - { - "type": "github", - "url": "https://github.com/sponsors/hexagon" - } - ], - "license": "MIT", - "engines": { - "node": ">=18.0" - } - }, - "node_modules/cross-spawn": { - "version": "7.0.6", - "license": "MIT", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/crossws": { - "version": "0.3.5", - "license": "MIT", - "dependencies": { - "uncrypto": "^0.1.3" - } - }, - "node_modules/css-select": { - "version": "5.2.2", - "license": "BSD-2-Clause", - "dependencies": { - "boolbase": "^1.0.0", - "css-what": "^6.1.0", - "domhandler": "^5.0.2", - "domutils": "^3.0.1", - "nth-check": "^2.0.1" - }, - "funding": { - "url": "https://github.com/sponsors/fb55" - } - }, - "node_modules/css-tree": { - "version": "3.2.1", - "license": "MIT", - "dependencies": { - "mdn-data": "2.27.1", - "source-map-js": "^1.2.1" - }, - "engines": { - "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0" - } - }, - "node_modules/css-what": { - "version": "6.2.2", - "license": "BSD-2-Clause", - "engines": { - "node": ">= 6" - }, - "funding": { - "url": "https://github.com/sponsors/fb55" - } - }, - "node_modules/cssesc": { - "version": "3.0.0", - "license": "MIT", - "bin": { - "cssesc": "bin/cssesc" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/cssnano": { - "version": "8.0.1", - "license": "MIT", - "dependencies": { - "cssnano-preset-default": "^8.0.1", - "lilconfig": "^3.1.3" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/cssnano" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/cssnano-preset-default": { - "version": "8.0.1", - "license": "MIT", - "dependencies": { - "browserslist": "^4.28.2", - "cssnano-utils": "^6.0.0", - "postcss-calc": "^10.1.1", - "postcss-colormin": "^8.0.0", - "postcss-convert-values": "^8.0.0", - "postcss-discard-comments": "^8.0.0", - "postcss-discard-duplicates": "^8.0.0", - "postcss-discard-empty": "^8.0.0", - "postcss-discard-overridden": "^8.0.0", - "postcss-merge-longhand": "^8.0.0", - "postcss-merge-rules": "^8.0.0", - "postcss-minify-font-values": "^8.0.0", - "postcss-minify-gradients": "^8.0.0", - "postcss-minify-params": "^8.0.0", - "postcss-minify-selectors": "^8.0.1", - "postcss-normalize-charset": "^8.0.0", - "postcss-normalize-display-values": "^8.0.0", - "postcss-normalize-positions": "^8.0.0", - "postcss-normalize-repeat-style": "^8.0.0", - "postcss-normalize-string": "^8.0.0", - "postcss-normalize-timing-functions": "^8.0.0", - "postcss-normalize-unicode": "^8.0.0", - "postcss-normalize-url": "^8.0.0", - "postcss-normalize-whitespace": "^8.0.0", - "postcss-ordered-values": "^8.0.0", - "postcss-reduce-initial": "^8.0.0", - "postcss-reduce-transforms": "^8.0.0", - "postcss-svgo": "^8.0.0", - "postcss-unique-selectors": "^8.0.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/cssnano-utils": { - "version": "6.0.0", - "license": "MIT", - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/csso": { - "version": "5.0.5", - "license": "MIT", - "dependencies": { - "css-tree": "~2.2.0" - }, - "engines": { - "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0", - "npm": ">=7.0.0" - } - }, - "node_modules/csso/node_modules/css-tree": { - "version": "2.2.1", - "license": "MIT", - "dependencies": { - "mdn-data": "2.0.28", - "source-map-js": "^1.0.1" - }, - "engines": { - "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0", - "npm": ">=7.0.0" - } - }, - "node_modules/csso/node_modules/mdn-data": { - "version": "2.0.28", - "license": "CC0-1.0" - }, - "node_modules/cssstyle": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-6.2.0.tgz", - "integrity": "sha512-Fm5NvhYathRnXNVndkUsCCuR63DCLVVwGOOwQw782coXFi5HhkXdu289l59HlXZBawsyNccXfWRYvLzcDCdDig==", - "license": "MIT", - "dependencies": { - "@asamuzakjp/css-color": "^5.0.1", - "@csstools/css-syntax-patches-for-csstree": "^1.0.28", - "css-tree": "^3.1.0", - "lru-cache": "^11.2.6" - }, - "engines": { - "node": ">=20" - } - }, - "node_modules/cssstyle/node_modules/lru-cache": { - "version": "11.5.1", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.5.1.tgz", - "integrity": "sha512-RPimw/7aMdv2oqRrxKwvZXcPfwBrn/JZ2xYcY9Hus/6LaS3VOAKVWKWgNLCFSiOm1ESXinjsDlidVU7JlnCN2A==", - "license": "BlueOak-1.0.0", - "engines": { - "node": "20 || >=22" - } - }, - "node_modules/csstype": { - "version": "3.2.3", - "license": "MIT" - }, - "node_modules/cytoscape": { - "version": "3.33.4", - "license": "MIT", - "engines": { - "node": ">=0.10" - } - }, - "node_modules/cytoscape-cise": { - "version": "2.0.1", - "license": "MIT", - "dependencies": { - "avsdf-base": "^1.0.0", - "cose-base": "^2.2.0" - }, - "peerDependencies": { - "cytoscape": "^3.2.0" - } - }, - "node_modules/cytoscape-fcose": { - "version": "2.2.0", - "license": "MIT", - "dependencies": { - "cose-base": "^2.2.0" - }, - "peerDependencies": { - "cytoscape": "^3.2.0" - } - }, - "node_modules/d3": { - "version": "7.9.0", - "license": "ISC", - "dependencies": { - "d3-array": "3", - "d3-axis": "3", - "d3-brush": "3", - "d3-chord": "3", - "d3-color": "3", - "d3-contour": "4", - "d3-delaunay": "6", - "d3-dispatch": "3", - "d3-drag": "3", - "d3-dsv": "3", - "d3-ease": "3", - "d3-fetch": "3", - "d3-force": "3", - "d3-format": "3", - "d3-geo": "3", - "d3-hierarchy": "3", - "d3-interpolate": "3", - "d3-path": "3", - "d3-polygon": "3", - "d3-quadtree": "3", - "d3-random": "3", - "d3-scale": "4", - "d3-scale-chromatic": "3", - "d3-selection": "3", - "d3-shape": "3", - "d3-time": "3", - "d3-time-format": "4", - "d3-timer": "3", - "d3-transition": "3", - "d3-zoom": "3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-array": { - "version": "3.2.4", - "license": "ISC", - "dependencies": { - "internmap": "1 - 2" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-axis": { - "version": "3.0.0", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-brush": { - "version": "3.0.0", - "license": "ISC", - "dependencies": { - "d3-dispatch": "1 - 3", - "d3-drag": "2 - 3", - "d3-interpolate": "1 - 3", - "d3-selection": "3", - "d3-transition": "3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-chord": { - "version": "3.0.1", - "license": "ISC", - "dependencies": { - "d3-path": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-color": { - "version": "3.1.0", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-contour": { - "version": "4.0.2", - "license": "ISC", - "dependencies": { - "d3-array": "^3.2.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-delaunay": { - "version": "6.0.4", - "license": "ISC", - "dependencies": { - "delaunator": "5" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-dispatch": { - "version": "3.0.1", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-drag": { - "version": "3.0.0", - "license": "ISC", - "dependencies": { - "d3-dispatch": "1 - 3", - "d3-selection": "3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-dsv": { - "version": "3.0.1", - "license": "ISC", - "dependencies": { - "commander": "7", - "iconv-lite": "0.6", - "rw": "1" - }, - "bin": { - "csv2json": "bin/dsv2json.js", - "csv2tsv": "bin/dsv2dsv.js", - "dsv2dsv": "bin/dsv2dsv.js", - "dsv2json": "bin/dsv2json.js", - "json2csv": "bin/json2dsv.js", - "json2dsv": "bin/json2dsv.js", - "json2tsv": "bin/json2dsv.js", - "tsv2csv": "bin/dsv2dsv.js", - "tsv2json": "bin/dsv2json.js" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-ease": { - "version": "3.0.1", - "license": "BSD-3-Clause", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-fetch": { - "version": "3.0.1", - "license": "ISC", - "dependencies": { - "d3-dsv": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-force": { - "version": "3.0.0", - "license": "ISC", - "dependencies": { - "d3-dispatch": "1 - 3", - "d3-quadtree": "1 - 3", - "d3-timer": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-format": { - "version": "3.1.2", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-geo": { - "version": "3.1.1", - "license": "ISC", - "dependencies": { - "d3-array": "2.5.0 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-hierarchy": { - "version": "3.1.2", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-interpolate": { - "version": "3.0.1", - "license": "ISC", - "dependencies": { - "d3-color": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-path": { - "version": "3.1.0", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-polygon": { - "version": "3.0.1", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-quadtree": { - "version": "3.0.1", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-random": { - "version": "3.0.1", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-scale": { - "version": "4.0.2", - "license": "ISC", - "dependencies": { - "d3-array": "2.10.0 - 3", - "d3-format": "1 - 3", - "d3-interpolate": "1.2.0 - 3", - "d3-time": "2.1.1 - 3", - "d3-time-format": "2 - 4" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-scale-chromatic": { - "version": "3.1.0", - "license": "ISC", - "dependencies": { - "d3-color": "1 - 3", - "d3-interpolate": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-selection": { - "version": "3.0.0", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-shape": { - "version": "3.2.0", - "license": "ISC", - "dependencies": { - "d3-path": "^3.1.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-time": { - "version": "3.1.0", - "license": "ISC", - "dependencies": { - "d3-array": "2 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-time-format": { - "version": "4.1.0", - "license": "ISC", - "dependencies": { - "d3-time": "1 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-timer": { - "version": "3.0.1", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/d3-transition": { - "version": "3.0.1", - "license": "ISC", - "dependencies": { - "d3-color": "1 - 3", - "d3-dispatch": "1 - 3", - "d3-ease": "1 - 3", - "d3-interpolate": "1 - 3", - "d3-timer": "1 - 3" - }, - "engines": { - "node": ">=12" - }, - "peerDependencies": { - "d3-selection": "2 - 3" - } - }, - "node_modules/d3-zoom": { - "version": "3.0.0", - "license": "ISC", - "dependencies": { - "d3-dispatch": "1 - 3", - "d3-drag": "2 - 3", - "d3-interpolate": "1 - 3", - "d3-selection": "2 - 3", - "d3-transition": "2 - 3" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/data-urls": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-7.0.0.tgz", - "integrity": "sha512-23XHcCF+coGYevirZceTVD7NdJOqVn+49IHyxgszm+JIiHLoB2TkmPtsYkNWT1pvRSGkc35L6NHs0yHkN2SumA==", - "license": "MIT", - "dependencies": { - "whatwg-mimetype": "^5.0.0", - "whatwg-url": "^16.0.0" - }, - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=24.0.0" - } - }, - "node_modules/data-urls/node_modules/tr46": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz", - "integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==", - "license": "MIT", - "dependencies": { - "punycode": "^2.3.1" - }, - "engines": { - "node": ">=20" - } - }, - "node_modules/data-urls/node_modules/webidl-conversions": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz", - "integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==", - "license": "BSD-2-Clause", - "engines": { - "node": ">=20" - } - }, - "node_modules/data-urls/node_modules/whatwg-mimetype": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz", - "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==", - "license": "MIT", - "engines": { - "node": ">=20" - } - }, - "node_modules/data-urls/node_modules/whatwg-url": { - "version": "16.0.1", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-16.0.1.tgz", - "integrity": "sha512-1to4zXBxmXHV3IiSSEInrreIlu02vUOvrhxJJH5vcxYTBDAx51cqZiKdyTxlecdKNSjj8EcxGBxNf6Vg+945gw==", - "license": "MIT", - "dependencies": { - "@exodus/bytes": "^1.11.0", - "tr46": "^6.0.0", - "webidl-conversions": "^8.0.1" - }, - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=24.0.0" - } - }, - "node_modules/db0": { - "version": "0.3.4", - "license": "MIT", - "peerDependencies": { - "@electric-sql/pglite": "*", - "@libsql/client": "*", - "better-sqlite3": "*", - "drizzle-orm": "*", - "mysql2": "*", - "sqlite3": "*" - }, - "peerDependenciesMeta": { - "@electric-sql/pglite": { - "optional": true - }, - "@libsql/client": { - "optional": true - }, - "better-sqlite3": { - "optional": true - }, - "drizzle-orm": { - "optional": true - }, - "mysql2": { - "optional": true - }, - "sqlite3": { - "optional": true - } - } - }, - "node_modules/de-indent": { - "version": "1.0.2", - "devOptional": true, - "license": "MIT" - }, - "node_modules/debug": { - "version": "4.4.3", - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/decimal.js": { - "version": "10.6.0", - "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz", - "integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==", - "license": "MIT" - }, - "node_modules/deep-eql": { - "version": "5.0.2", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/deepmerge": { - "version": "4.3.1", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/default-browser": { - "version": "5.5.0", - "license": "MIT", - "dependencies": { - "bundle-name": "^4.1.0", - "default-browser-id": "^5.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/default-browser-id": { - "version": "5.0.1", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/define-lazy-prop": { - "version": "3.0.0", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/defu": { - "version": "6.1.7", - "license": "MIT" - }, - "node_modules/delaunator": { - "version": "5.1.0", - "license": "ISC", - "dependencies": { - "robust-predicates": "^3.0.2" - } - }, - "node_modules/denque": { - "version": "2.1.0", - "license": "Apache-2.0", - "engines": { - "node": ">=0.10" - } - }, - "node_modules/depd": { - "version": "2.0.0", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/destr": { - "version": "2.0.5", - "license": "MIT" - }, - "node_modules/detect-libc": { - "version": "2.1.2", - "license": "Apache-2.0", - "engines": { - "node": ">=8" - } - }, - "node_modules/devalue": { - "version": "5.8.1", - "license": "MIT" - }, - "node_modules/diff": { - "version": "8.0.4", - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.3.1" - } - }, - "node_modules/dom-serializer": { - "version": "2.0.0", - "license": "MIT", - "dependencies": { - "domelementtype": "^2.3.0", - "domhandler": "^5.0.2", - "entities": "^4.2.0" - }, - "funding": { - "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" - } - }, - "node_modules/dom-serializer/node_modules/entities": { - "version": "4.5.0", - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.12" - }, - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, - "node_modules/domelementtype": { - "version": "2.3.0", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/fb55" - } - ], - "license": "BSD-2-Clause" - }, - "node_modules/domhandler": { - "version": "5.0.3", - "license": "BSD-2-Clause", - "dependencies": { - "domelementtype": "^2.3.0" - }, - "engines": { - "node": ">= 4" - }, - "funding": { - "url": "https://github.com/fb55/domhandler?sponsor=1" - } - }, - "node_modules/dompurify": { - "version": "3.4.5", - "license": "(MPL-2.0 OR Apache-2.0)", - "optionalDependencies": { - "@types/trusted-types": "^2.0.7" - } - }, - "node_modules/domutils": { - "version": "3.2.2", - "license": "BSD-2-Clause", - "dependencies": { - "dom-serializer": "^2.0.0", - "domelementtype": "^2.3.0", - "domhandler": "^5.0.3" - }, - "funding": { - "url": "https://github.com/fb55/domutils?sponsor=1" - } - }, - "node_modules/dot-prop": { - "version": "10.1.0", - "license": "MIT", - "dependencies": { - "type-fest": "^5.0.0" - }, - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/dotenv": { - "version": "17.4.2", - "license": "BSD-2-Clause", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://dotenvx.com" - } - }, - "node_modules/duplexer": { - "version": "0.1.2", - "license": "MIT" - }, - "node_modules/eastasianwidth": { - "version": "0.2.0", - "license": "MIT" - }, - "node_modules/editorconfig": { - "version": "1.0.7", - "dev": true, - "license": "MIT", - "dependencies": { - "@one-ini/wasm": "0.1.1", - "commander": "^10.0.0", - "minimatch": "^9.0.1", - "semver": "^7.5.3" - }, - "bin": { - "editorconfig": "bin/editorconfig" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/editorconfig/node_modules/commander": { - "version": "10.0.1", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14" - } - }, - "node_modules/ee-first": { - "version": "1.1.1", - "license": "MIT" - }, - "node_modules/electron-to-chromium": { - "version": "1.5.360", - "license": "ISC" - }, - "node_modules/emoji-regex": { - "version": "9.2.2", - "license": "MIT" - }, - "node_modules/encodeurl": { - "version": "2.0.0", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/enhanced-resolve": { - "version": "5.21.6", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.4", - "tapable": "^2.3.3" - }, - "engines": { - "node": ">=10.13.0" - } - }, - "node_modules/entities": { - "version": "7.0.1", - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.12" - }, - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, - "node_modules/error-stack-parser-es": { - "version": "1.0.5", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/errx": { - "version": "0.1.0", - "license": "MIT" - }, - "node_modules/es-errors": { - "version": "1.3.0", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-module-lexer": { - "version": "2.1.0", - "license": "MIT" - }, - "node_modules/esbuild": { - "version": "0.28.0", - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.28.0", - "@esbuild/android-arm": "0.28.0", - "@esbuild/android-arm64": "0.28.0", - "@esbuild/android-x64": "0.28.0", - "@esbuild/darwin-arm64": "0.28.0", - "@esbuild/darwin-x64": "0.28.0", - "@esbuild/freebsd-arm64": "0.28.0", - "@esbuild/freebsd-x64": "0.28.0", - "@esbuild/linux-arm": "0.28.0", - "@esbuild/linux-arm64": "0.28.0", - "@esbuild/linux-ia32": "0.28.0", - "@esbuild/linux-loong64": "0.28.0", - "@esbuild/linux-mips64el": "0.28.0", - "@esbuild/linux-ppc64": "0.28.0", - "@esbuild/linux-riscv64": "0.28.0", - "@esbuild/linux-s390x": "0.28.0", - "@esbuild/linux-x64": "0.28.0", - "@esbuild/netbsd-arm64": "0.28.0", - "@esbuild/netbsd-x64": "0.28.0", - "@esbuild/openbsd-arm64": "0.28.0", - "@esbuild/openbsd-x64": "0.28.0", - "@esbuild/openharmony-arm64": "0.28.0", - "@esbuild/sunos-x64": "0.28.0", - "@esbuild/win32-arm64": "0.28.0", - "@esbuild/win32-ia32": "0.28.0", - "@esbuild/win32-x64": "0.28.0" - } - }, - "node_modules/escalade": { - "version": "3.2.0", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/escape-html": { - "version": "1.0.3", - "license": "MIT" - }, - "node_modules/escape-string-regexp": { - "version": "5.0.0", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/estree-walker": { - "version": "2.0.2", - "license": "MIT" - }, - "node_modules/etag": { - "version": "1.8.1", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/event-target-shim": { - "version": "5.0.1", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/events": { - "version": "3.3.0", - "license": "MIT", - "engines": { - "node": ">=0.8.x" - } - }, - "node_modules/events-universal": { - "version": "1.0.1", - "license": "Apache-2.0", - "dependencies": { - "bare-events": "^2.7.0" - } - }, - "node_modules/execa": { - "version": "8.0.1", - "license": "MIT", - "dependencies": { - "cross-spawn": "^7.0.3", - "get-stream": "^8.0.1", - "human-signals": "^5.0.0", - "is-stream": "^3.0.0", - "merge-stream": "^2.0.0", - "npm-run-path": "^5.1.0", - "onetime": "^6.0.0", - "signal-exit": "^4.1.0", - "strip-final-newline": "^3.0.0" - }, - "engines": { - "node": ">=16.17" - }, - "funding": { - "url": "https://github.com/sindresorhus/execa?sponsor=1" - } - }, - "node_modules/expect-type": { - "version": "1.3.0", - "dev": true, - "license": "Apache-2.0", - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/exsolve": { - "version": "1.0.8", - "license": "MIT" - }, - "node_modules/fast-fifo": { - "version": "1.3.2", - "license": "MIT" - }, - "node_modules/fast-glob": { - "version": "3.3.3", - "license": "MIT", - "dependencies": { - "@nodelib/fs.stat": "^2.0.2", - "@nodelib/fs.walk": "^1.2.3", - "glob-parent": "^5.1.2", - "merge2": "^1.3.0", - "micromatch": "^4.0.8" - }, - "engines": { - "node": ">=8.6.0" - } - }, - "node_modules/fast-npm-meta": { - "version": "1.5.1", - "license": "MIT", - "bin": { - "fast-npm-meta": "dist/cli.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/fast-string-truncated-width": { - "version": "3.0.3", - "license": "MIT" - }, - "node_modules/fast-string-width": { - "version": "3.0.2", - "license": "MIT", - "dependencies": { - "fast-string-truncated-width": "^3.0.2" - } - }, - "node_modules/fast-wrap-ansi": { - "version": "0.2.2", - "license": "MIT", - "dependencies": { - "fast-string-width": "^3.0.2" - } - }, - "node_modules/fastq": { - "version": "1.20.1", - "license": "ISC", - "dependencies": { - "reusify": "^1.0.4" - } - }, - "node_modules/fdir": { - "version": "6.5.0", - "license": "MIT", - "engines": { - "node": ">=12.0.0" - }, - "peerDependencies": { - "picomatch": "^3 || ^4" - }, - "peerDependenciesMeta": { - "picomatch": { - "optional": true - } - } - }, - "node_modules/file-uri-to-path": { - "version": "1.0.0", - "license": "MIT" - }, - "node_modules/fill-range": { - "version": "7.1.1", - "license": "MIT", - "dependencies": { - "to-regex-range": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/find-replace": { - "version": "3.0.0", - "license": "MIT", - "dependencies": { - "array-back": "^3.0.1" - }, - "engines": { - "node": ">=4.0.0" - } - }, - "node_modules/flatbuffers": { - "version": "24.12.23", - "license": "Apache-2.0" - }, - "node_modules/foreground-child": { - "version": "3.3.1", - "license": "ISC", - "dependencies": { - "cross-spawn": "^7.0.6", - "signal-exit": "^4.0.1" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/fraction.js": { - "version": "5.3.4", - "license": "MIT", - "engines": { - "node": "*" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/rawify" - } - }, - "node_modules/fresh": { - "version": "2.0.0", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/function-bind": { - "version": "1.1.2", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/fuse.js": { - "version": "7.3.0", - "license": "Apache-2.0", - "engines": { - "node": ">=10" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/krisk" - } - }, - "node_modules/fzf": { - "version": "0.5.2", - "license": "BSD-3-Clause" - }, - "node_modules/gensync": { - "version": "1.0.0-beta.2", - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/get-caller-file": { - "version": "2.0.5", - "license": "ISC", - "engines": { - "node": "6.* || 8.* || >= 10.*" - } - }, - "node_modules/get-east-asian-width": { - "version": "1.6.0", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/get-port-please": { - "version": "3.2.0", - "license": "MIT" - }, - "node_modules/get-stream": { - "version": "8.0.1", - "license": "MIT", - "engines": { - "node": ">=16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/giget": { - "version": "3.2.0", - "license": "MIT", - "bin": { - "giget": "dist/cli.mjs" - } - }, - "node_modules/gl-bench": { - "version": "1.0.42", - "license": "MIT" - }, - "node_modules/gl-matrix": { - "version": "3.4.4", - "license": "MIT" - }, - "node_modules/glob": { - "version": "10.5.0", - "license": "ISC", - "dependencies": { - "foreground-child": "^3.1.0", - "jackspeak": "^3.1.2", - "minimatch": "^9.0.4", - "minipass": "^7.1.2", - "package-json-from-dist": "^1.0.0", - "path-scurry": "^1.11.1" - }, - "bin": { - "glob": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/glob-parent": { - "version": "5.1.2", - "license": "ISC", - "dependencies": { - "is-glob": "^4.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/global-directory": { - "version": "4.0.1", - "license": "MIT", - "dependencies": { - "ini": "4.1.1" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/global-directory/node_modules/ini": { - "version": "4.1.1", - "license": "ISC", - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - } - }, - "node_modules/globby": { - "version": "16.2.0", - "license": "MIT", - "dependencies": { - "@sindresorhus/merge-streams": "^4.0.0", - "fast-glob": "^3.3.3", - "ignore": "^7.0.5", - "is-path-inside": "^4.0.0", - "slash": "^5.1.0", - "unicorn-magic": "^0.4.0" - }, - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/graceful-fs": { - "version": "4.2.11", - "license": "ISC" - }, - "node_modules/gzip-size": { - "version": "7.0.0", - "license": "MIT", - "dependencies": { - "duplexer": "^0.1.2" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/h3": { - "version": "1.15.11", - "license": "MIT", - "dependencies": { - "cookie-es": "^1.2.3", - "crossws": "^0.3.5", - "defu": "^6.1.6", - "destr": "^2.0.5", - "iron-webcrypto": "^1.2.1", - "node-mock-http": "^1.0.4", - "radix3": "^1.1.2", - "ufo": "^1.6.3", - "uncrypto": "^0.1.3" - } - }, - "node_modules/h3/node_modules/cookie-es": { - "version": "1.2.3", - "license": "MIT" - }, - "node_modules/happy-dom": { - "version": "15.11.7", - "dev": true, - "license": "MIT", - "dependencies": { - "entities": "^4.5.0", - "webidl-conversions": "^7.0.0", - "whatwg-mimetype": "^3.0.0" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/happy-dom/node_modules/entities": { - "version": "4.5.0", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.12" - }, - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, - "node_modules/has-flag": { - "version": "4.0.0", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/hasown": { - "version": "2.0.3", - "license": "MIT", - "dependencies": { - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/he": { - "version": "1.2.0", - "devOptional": true, - "license": "MIT", - "bin": { - "he": "bin/he" - } - }, - "node_modules/hookable": { - "version": "6.1.1", - "license": "MIT" - }, - "node_modules/html-encoding-sniffer": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz", - "integrity": "sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg==", - "license": "MIT", - "dependencies": { - "@exodus/bytes": "^1.6.0" - }, - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=24.0.0" - } - }, - "node_modules/http-errors": { - "version": "2.0.1", - "license": "MIT", - "dependencies": { - "depd": "~2.0.0", - "inherits": "~2.0.4", - "setprototypeof": "~1.2.0", - "statuses": "~2.0.2", - "toidentifier": "~1.0.1" - }, - "engines": { - "node": ">= 0.8" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/express" - } - }, - "node_modules/http-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", - "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.0", - "debug": "^4.3.4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/http-shutdown": { - "version": "1.2.2", - "license": "MIT", - "engines": { - "iojs": ">= 1.0.0", - "node": ">= 0.12.0" - } - }, - "node_modules/https-proxy-agent": { - "version": "7.0.6", - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/httpxy": { - "version": "0.5.3", - "license": "MIT" - }, - "node_modules/human-signals": { - "version": "5.0.0", - "license": "Apache-2.0", - "engines": { - "node": ">=16.17.0" - } - }, - "node_modules/iconv-lite": { - "version": "0.6.3", - "license": "MIT", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3.0.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/ieee754": { - "version": "1.2.1", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "BSD-3-Clause" - }, - "node_modules/ignore": { - "version": "7.0.5", - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/image-meta": { - "version": "0.2.2", - "license": "MIT" - }, - "node_modules/impound": { - "version": "1.1.5", - "license": "MIT", - "dependencies": { - "@jridgewell/trace-mapping": "^0.3.31", - "es-module-lexer": "^2.0.0", - "pathe": "^2.0.3", - "unplugin": "^3.0.0", - "unplugin-utils": "^0.3.1" - } - }, - "node_modules/inherits": { - "version": "2.0.4", - "license": "ISC" - }, - "node_modules/ini": { - "version": "1.3.8", - "dev": true, - "license": "ISC" - }, - "node_modules/internmap": { - "version": "2.0.3", - "license": "ISC", - "engines": { - "node": ">=12" - } - }, - "node_modules/interval-tree-1d": { - "version": "1.0.4", - "license": "MIT", - "dependencies": { - "binary-search-bounds": "^2.0.0" - } - }, - "node_modules/ioredis": { - "version": "5.10.1", - "license": "MIT", - "dependencies": { - "@ioredis/commands": "1.5.1", - "cluster-key-slot": "^1.1.0", - "debug": "^4.3.4", - "denque": "^2.1.0", - "lodash.defaults": "^4.2.0", - "lodash.isarguments": "^3.1.0", - "redis-errors": "^1.2.0", - "redis-parser": "^3.0.0", - "standard-as-callback": "^2.1.0" - }, - "engines": { - "node": ">=12.22.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/ioredis" - } - }, - "node_modules/iron-webcrypto": { - "version": "1.2.1", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/brc-dd" - } - }, - "node_modules/is-core-module": { - "version": "2.16.2", - "license": "MIT", - "dependencies": { - "hasown": "^2.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/is-docker": { - "version": "3.0.0", - "license": "MIT", - "bin": { - "is-docker": "cli.js" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-extglob": { - "version": "2.1.1", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/is-glob": { - "version": "4.0.3", - "license": "MIT", - "dependencies": { - "is-extglob": "^2.1.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/is-in-ssh": { - "version": "1.0.0", - "license": "MIT", - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-inside-container": { - "version": "1.0.0", - "license": "MIT", - "dependencies": { - "is-docker": "^3.0.0" - }, - "bin": { - "is-inside-container": "cli.js" - }, - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-installed-globally": { - "version": "1.0.0", - "license": "MIT", - "dependencies": { - "global-directory": "^4.0.1", - "is-path-inside": "^4.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-module": { - "version": "1.0.0", - "license": "MIT" - }, - "node_modules/is-number": { - "version": "7.0.0", - "license": "MIT", - "engines": { - "node": ">=0.12.0" - } - }, - "node_modules/is-path-inside": { - "version": "4.0.0", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-potential-custom-element-name": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", - "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==", - "license": "MIT" - }, - "node_modules/is-reference": { - "version": "1.2.1", - "license": "MIT", - "dependencies": { - "@types/estree": "*" - } - }, - "node_modules/is-stream": { - "version": "3.0.0", - "license": "MIT", - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/is-wsl": { - "version": "3.1.1", - "license": "MIT", - "dependencies": { - "is-inside-container": "^1.0.0" - }, - "engines": { - "node": ">=16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/isarray": { - "version": "1.0.0", - "license": "MIT" - }, - "node_modules/isexe": { - "version": "2.0.0", - "license": "ISC" - }, - "node_modules/isoformat": { - "version": "0.2.1", - "license": "ISC" - }, - "node_modules/isomorphic-dompurify": { - "version": "2.36.0", - "resolved": "https://registry.npmjs.org/isomorphic-dompurify/-/isomorphic-dompurify-2.36.0.tgz", - "integrity": "sha512-E8YkGyPY3a/U5s0WOoc8Ok+3SWL/33yn2IHCoxCFLBUUPVy9WGa++akJZFxQCcJIhI+UvYhbrbnTIFQkHKZbgA==", - "license": "MIT", - "dependencies": { - "dompurify": "^3.3.1", - "jsdom": "^28.0.0" - }, - "engines": { - "node": ">=20.19.5" - } - }, - "node_modules/jackspeak": { - "version": "3.4.3", - "license": "BlueOak-1.0.0", - "dependencies": { - "@isaacs/cliui": "^8.0.2" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - }, - "optionalDependencies": { - "@pkgjs/parseargs": "^0.11.0" - } - }, - "node_modules/jiti": { - "version": "2.7.0", - "license": "MIT", - "bin": { - "jiti": "lib/jiti-cli.mjs" - } - }, - "node_modules/js-beautify": { - "version": "1.15.4", - "dev": true, - "license": "MIT", - "dependencies": { - "config-chain": "^1.1.13", - "editorconfig": "^1.0.4", - "glob": "^10.4.2", - "js-cookie": "^3.0.5", - "nopt": "^7.2.1" - }, - "bin": { - "css-beautify": "js/bin/css-beautify.js", - "html-beautify": "js/bin/html-beautify.js", - "js-beautify": "js/bin/js-beautify.js" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/js-cookie": { - "version": "3.0.7", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=20" - } - }, - "node_modules/js-tokens": { - "version": "4.0.0", - "license": "MIT" - }, - "node_modules/jsdom": { - "version": "28.1.0", - "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-28.1.0.tgz", - "integrity": "sha512-0+MoQNYyr2rBHqO1xilltfDjV9G7ymYGlAUazgcDLQaUf8JDHbuGwsxN6U9qWaElZ4w1B2r7yEGIL3GdeW3Rug==", - "license": "MIT", - "dependencies": { - "@acemir/cssom": "^0.9.31", - "@asamuzakjp/dom-selector": "^6.8.1", - "@bramus/specificity": "^2.4.2", - "@exodus/bytes": "^1.11.0", - "cssstyle": "^6.0.1", - "data-urls": "^7.0.0", - "decimal.js": "^10.6.0", - "html-encoding-sniffer": "^6.0.0", - "http-proxy-agent": "^7.0.2", - "https-proxy-agent": "^7.0.6", - "is-potential-custom-element-name": "^1.0.1", - "parse5": "^8.0.0", - "saxes": "^6.0.0", - "symbol-tree": "^3.2.4", - "tough-cookie": "^6.0.0", - "undici": "^7.21.0", - "w3c-xmlserializer": "^5.0.0", - "webidl-conversions": "^8.0.1", - "whatwg-mimetype": "^5.0.0", - "whatwg-url": "^16.0.0", - "xml-name-validator": "^5.0.0" - }, - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=24.0.0" - }, - "peerDependencies": { - "canvas": "^3.0.0" - }, - "peerDependenciesMeta": { - "canvas": { - "optional": true - } - } - }, - "node_modules/jsdom/node_modules/tr46": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz", - "integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==", - "license": "MIT", - "dependencies": { - "punycode": "^2.3.1" - }, - "engines": { - "node": ">=20" - } - }, - "node_modules/jsdom/node_modules/webidl-conversions": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz", - "integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==", - "license": "BSD-2-Clause", - "engines": { - "node": ">=20" - } - }, - "node_modules/jsdom/node_modules/whatwg-mimetype": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz", - "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==", - "license": "MIT", - "engines": { - "node": ">=20" - } - }, - "node_modules/jsdom/node_modules/whatwg-url": { - "version": "16.0.1", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-16.0.1.tgz", - "integrity": "sha512-1to4zXBxmXHV3IiSSEInrreIlu02vUOvrhxJJH5vcxYTBDAx51cqZiKdyTxlecdKNSjj8EcxGBxNf6Vg+945gw==", - "license": "MIT", - "dependencies": { - "@exodus/bytes": "^1.11.0", - "tr46": "^6.0.0", - "webidl-conversions": "^8.0.1" - }, - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=24.0.0" - } - }, - "node_modules/jsesc": { - "version": "3.1.0", - "license": "MIT", - "bin": { - "jsesc": "bin/jsesc" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/json-bignum": { - "version": "0.0.3", - "engines": { - "node": ">=0.8" - } - }, - "node_modules/json5": { - "version": "2.2.3", - "license": "MIT", - "bin": { - "json5": "lib/cli.js" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/jwt-decode": { - "version": "4.0.0", - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/kleur": { - "version": "4.1.5", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/klona": { - "version": "2.0.6", - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/knitwork": { - "version": "1.3.0", - "license": "MIT" - }, - "node_modules/launch-editor": { - "version": "2.13.2", - "license": "MIT", - "dependencies": { - "picocolors": "^1.1.1", - "shell-quote": "^1.8.3" - } - }, - "node_modules/layout-base": { - "version": "1.0.2", - "license": "MIT" - }, - "node_modules/lazystream": { - "version": "1.0.1", - "license": "MIT", - "dependencies": { - "readable-stream": "^2.0.5" - }, - "engines": { - "node": ">= 0.6.3" - } - }, - "node_modules/lazystream/node_modules/readable-stream": { - "version": "2.3.8", - "license": "MIT", - "dependencies": { - "core-util-is": "~1.0.0", - "inherits": "~2.0.3", - "isarray": "~1.0.0", - "process-nextick-args": "~2.0.0", - "safe-buffer": "~5.1.1", - "string_decoder": "~1.1.1", - "util-deprecate": "~1.0.1" - } - }, - "node_modules/lazystream/node_modules/safe-buffer": { - "version": "5.1.2", - "license": "MIT" - }, - "node_modules/lazystream/node_modules/string_decoder": { - "version": "1.1.1", - "license": "MIT", - "dependencies": { - "safe-buffer": "~5.1.0" - } - }, - "node_modules/lightningcss": { - "version": "1.32.0", - "license": "MPL-2.0", - "dependencies": { - "detect-libc": "^2.0.3" - }, - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - }, - "optionalDependencies": { - "lightningcss-android-arm64": "1.32.0", - "lightningcss-darwin-arm64": "1.32.0", - "lightningcss-darwin-x64": "1.32.0", - "lightningcss-freebsd-x64": "1.32.0", - "lightningcss-linux-arm-gnueabihf": "1.32.0", - "lightningcss-linux-arm64-gnu": "1.32.0", - "lightningcss-linux-arm64-musl": "1.32.0", - "lightningcss-linux-x64-gnu": "1.32.0", - "lightningcss-linux-x64-musl": "1.32.0", - "lightningcss-win32-arm64-msvc": "1.32.0", - "lightningcss-win32-x64-msvc": "1.32.0" - } - }, - "node_modules/lightningcss-linux-x64-gnu": { - "version": "1.32.0", - "cpu": [ - "x64" - ], - "license": "MPL-2.0", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">= 12.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/parcel" - } - }, - "node_modules/lilconfig": { - "version": "3.1.3", - "license": "MIT", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/antonk52" - } - }, - "node_modules/listhen": { - "version": "1.10.0", - "license": "MIT", - "dependencies": { - "@parcel/watcher": "^2.5.6", - "@parcel/watcher-wasm": "^2.5.6", - "citty": "^0.2.2", - "consola": "^3.4.2", - "crossws": ">=0.2.0 <0.5.0", - "defu": "^6.1.7", - "get-port-please": "^3.2.0", - "h3": "^1.15.11", - "http-shutdown": "^1.2.2", - "jiti": "^2.6.1", - "mlly": "^1.8.2", - "node-forge": "^1.4.0", - "pathe": "^2.0.3", - "std-env": "^4.1.0", - "tinyclip": "^0.1.12", - "ufo": "^1.6.4", - "untun": "^0.1.3", - "uqr": "^0.1.3" - }, - "bin": { - "listen": "bin/listhen.mjs", - "listhen": "bin/listhen.mjs" - } - }, - "node_modules/local-pkg": { - "version": "1.2.1", - "license": "MIT", - "dependencies": { - "mlly": "^1.7.4", - "pkg-types": "^2.3.0", - "quansync": "^0.2.11" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/lodash": { - "version": "4.18.1", - "license": "MIT" - }, - "node_modules/lodash.camelcase": { - "version": "4.3.0", - "license": "MIT" - }, - "node_modules/lodash.defaults": { - "version": "4.2.0", - "license": "MIT" - }, - "node_modules/lodash.isarguments": { - "version": "3.1.0", - "license": "MIT" - }, - "node_modules/lodash.memoize": { - "version": "4.1.2", - "license": "MIT" - }, - "node_modules/lodash.uniq": { - "version": "4.5.0", - "license": "MIT" - }, - "node_modules/loupe": { - "version": "3.2.1", - "dev": true, - "license": "MIT" - }, - "node_modules/lru-cache": { - "version": "5.1.1", - "license": "ISC", - "dependencies": { - "yallist": "^3.0.2" - } - }, - "node_modules/lucide-vue-next": { - "version": "0.563.0", - "license": "ISC", - "peerDependencies": { - "vue": ">=3.0.1" - } - }, - "node_modules/magic-regexp": { - "version": "0.11.0", - "license": "MIT", - "dependencies": { - "magic-string": "^0.30.21", - "regexp-tree": "^0.1.27", - "type-level-regexp": "~0.1.17", - "unplugin": "^3.0.0" - } - }, - "node_modules/magic-string": { - "version": "0.30.21", - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.5" - } - }, - "node_modules/magic-string-ast": { - "version": "1.0.3", - "license": "MIT", - "dependencies": { - "magic-string": "^0.30.19" - }, - "engines": { - "node": ">=20.19.0" - }, - "funding": { - "url": "https://github.com/sponsors/sxzz" - } - }, - "node_modules/magicast": { - "version": "0.5.3", - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.29.3", - "@babel/types": "^7.29.0", - "source-map-js": "^1.2.1" - } - }, - "node_modules/marked": { - "version": "15.0.12", - "resolved": "https://registry.npmjs.org/marked/-/marked-15.0.12.tgz", - "integrity": "sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==", - "license": "MIT", - "bin": { - "marked": "bin/marked.js" - }, - "engines": { - "node": ">= 18" - } - }, - "node_modules/mdn-data": { - "version": "2.27.1", - "license": "CC0-1.0" - }, - "node_modules/merge-stream": { - "version": "2.0.0", - "license": "MIT" - }, - "node_modules/merge2": { - "version": "1.4.1", - "license": "MIT", - "engines": { - "node": ">= 8" - } - }, - "node_modules/micromatch": { - "version": "4.0.8", - "license": "MIT", - "dependencies": { - "braces": "^3.0.3", - "picomatch": "^2.3.1" - }, - "engines": { - "node": ">=8.6" - } - }, - "node_modules/micromatch/node_modules/picomatch": { - "version": "2.3.2", - "license": "MIT", - "engines": { - "node": ">=8.6" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/mime": { - "version": "4.1.0", - "funding": [ - "https://github.com/sponsors/broofa" - ], - "license": "MIT", - "bin": { - "mime": "bin/cli.js" - }, - "engines": { - "node": ">=16" - } - }, - "node_modules/mime-db": { - "version": "1.54.0", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/mime-types": { - "version": "3.0.2", - "license": "MIT", - "dependencies": { - "mime-db": "^1.54.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/express" - } - }, - "node_modules/mimic-fn": { - "version": "4.0.0", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/minimatch": { - "version": "9.0.9", - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.2" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/minipass": { - "version": "7.1.3", - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/minizlib": { - "version": "3.1.0", - "license": "MIT", - "dependencies": { - "minipass": "^7.1.2" - }, - "engines": { - "node": ">= 18" - } - }, - "node_modules/mlly": { - "version": "1.8.2", - "license": "MIT", - "dependencies": { - "acorn": "^8.16.0", - "pathe": "^2.0.3", - "pkg-types": "^1.3.1", - "ufo": "^1.6.3" - } - }, - "node_modules/mlly/node_modules/confbox": { - "version": "0.1.8", - "license": "MIT" - }, - "node_modules/mlly/node_modules/pkg-types": { - "version": "1.3.1", - "license": "MIT", - "dependencies": { - "confbox": "^0.1.8", - "mlly": "^1.7.4", - "pathe": "^2.0.1" - } - }, - "node_modules/mocked-exports": { - "version": "0.1.1", - "license": "MIT" - }, - "node_modules/mrmime": { - "version": "2.0.1", - "license": "MIT", - "engines": { - "node": ">=10" - } - }, - "node_modules/ms": { - "version": "2.1.3", - "license": "MIT" - }, - "node_modules/muggle-string": { - "version": "0.4.1", - "license": "MIT" - }, - "node_modules/nanoid": { - "version": "3.3.12", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "bin": { - "nanoid": "bin/nanoid.cjs" - }, - "engines": { - "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" - } - }, - "node_modules/nanotar": { - "version": "0.3.0", - "license": "MIT" - }, - "node_modules/nitropack": { - "version": "2.13.4", - "license": "MIT", - "dependencies": { - "@cloudflare/kv-asset-handler": "^0.4.2", - "@rollup/plugin-alias": "^6.0.0", - "@rollup/plugin-commonjs": "^29.0.2", - "@rollup/plugin-inject": "^5.0.5", - "@rollup/plugin-json": "^6.1.0", - "@rollup/plugin-node-resolve": "^16.0.3", - "@rollup/plugin-replace": "^6.0.3", - "@rollup/plugin-terser": "^1.0.0", - "@vercel/nft": "^1.5.0", - "archiver": "^7.0.1", - "c12": "^3.3.4", - "chokidar": "^5.0.0", - "citty": "^0.2.2", - "compatx": "^0.2.0", - "confbox": "^0.2.4", - "consola": "^3.4.2", - "cookie-es": "^2.0.1", - "croner": "^10.0.1", - "crossws": "^0.3.5", - "db0": "^0.3.4", - "defu": "^6.1.7", - "destr": "^2.0.5", - "dot-prop": "^10.1.0", - "esbuild": "^0.28.0", - "escape-string-regexp": "^5.0.0", - "etag": "^1.8.1", - "exsolve": "^1.0.8", - "globby": "^16.2.0", - "gzip-size": "^7.0.0", - "h3": "^1.15.11", - "hookable": "^5.5.3", - "httpxy": "^0.5.1", - "ioredis": "^5.10.1", - "jiti": "^2.6.1", - "klona": "^2.0.6", - "knitwork": "^1.3.0", - "listhen": "^1.9.1", - "magic-string": "^0.30.21", - "magicast": "^0.5.2", - "mime": "^4.1.0", - "mlly": "^1.8.2", - "node-fetch-native": "^1.6.7", - "node-mock-http": "^1.0.4", - "ofetch": "^1.5.1", - "ohash": "^2.0.11", - "pathe": "^2.0.3", - "perfect-debounce": "^2.1.0", - "pkg-types": "^2.3.1", - "pretty-bytes": "^7.1.0", - "radix3": "^1.1.2", - "rollup": "^4.60.2", - "rollup-plugin-visualizer": "^7.0.1", - "scule": "^1.3.0", - "semver": "^7.7.4", - "serve-placeholder": "^2.0.2", - "serve-static": "^2.2.1", - "source-map": "^0.7.6", - "std-env": "^4.1.0", - "ufo": "^1.6.4", - "ultrahtml": "^1.6.0", - "uncrypto": "^0.1.3", - "unctx": "^2.5.0", - "unenv": "2.0.0-rc.24", - "unimport": "^6.2.0", - "unplugin-utils": "^0.3.1", - "unstorage": "^1.17.5", - "untyped": "^2.0.0", - "unwasm": "^0.5.3", - "youch": "^4.1.1", - "youch-core": "^0.3.3" - }, - "bin": { - "nitro": "dist/cli/index.mjs", - "nitropack": "dist/cli/index.mjs" - }, - "engines": { - "node": "^20.19.0 || >=22.12.0" - }, - "peerDependencies": { - "xml2js": "^0.6.2" - }, - "peerDependenciesMeta": { - "xml2js": { - "optional": true - } - } - }, - "node_modules/nitropack/node_modules/cookie-es": { - "version": "2.0.1", - "license": "MIT" - }, - "node_modules/nitropack/node_modules/hookable": { - "version": "5.5.3", - "license": "MIT" - }, - "node_modules/node-addon-api": { - "version": "7.1.1", - "license": "MIT" - }, - "node_modules/node-fetch": { - "version": "2.7.0", - "license": "MIT", - "dependencies": { - "whatwg-url": "^5.0.0" - }, - "engines": { - "node": "4.x || >=6.0.0" - }, - "peerDependencies": { - "encoding": "^0.1.0" - }, - "peerDependenciesMeta": { - "encoding": { - "optional": true - } - } - }, - "node_modules/node-fetch-native": { - "version": "1.6.7", - "license": "MIT" - }, - "node_modules/node-forge": { - "version": "1.4.0", - "license": "(BSD-3-Clause OR GPL-2.0)", - "engines": { - "node": ">= 6.13.0" - } - }, - "node_modules/node-gyp-build": { - "version": "4.8.4", - "license": "MIT", - "bin": { - "node-gyp-build": "bin.js", - "node-gyp-build-optional": "optional.js", - "node-gyp-build-test": "build-test.js" - } - }, - "node_modules/node-mock-http": { - "version": "1.0.4", - "license": "MIT" - }, - "node_modules/node-releases": { - "version": "2.0.45", - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/nopt": { - "version": "7.2.1", - "dev": true, - "license": "ISC", - "dependencies": { - "abbrev": "^2.0.0" - }, - "bin": { - "nopt": "bin/nopt.js" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - } - }, - "node_modules/normalize-path": { - "version": "3.0.0", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/npm-run-path": { - "version": "5.3.0", - "license": "MIT", - "dependencies": { - "path-key": "^4.0.0" - }, - "engines": { - "node": "^12.20.0 || ^14.13.1 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/npm-run-path/node_modules/path-key": { - "version": "4.0.0", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/nth-check": { - "version": "2.1.1", - "license": "BSD-2-Clause", - "dependencies": { - "boolbase": "^1.0.0" - }, - "funding": { - "url": "https://github.com/fb55/nth-check?sponsor=1" - } - }, - "node_modules/nuxt": { - "version": "4.4.6", - "license": "MIT", - "dependencies": { - "@dxup/nuxt": "^0.4.1", - "@nuxt/cli": "^3.35.2", - "@nuxt/devtools": "^3.2.4", - "@nuxt/kit": "4.4.6", - "@nuxt/nitro-server": "4.4.6", - "@nuxt/schema": "4.4.6", - "@nuxt/telemetry": "^2.8.0", - "@nuxt/vite-builder": "4.4.6", - "@unhead/vue": "^2.1.15", - "@vue/shared": "^3.5.34", - "chokidar": "^5.0.0", - "compatx": "^0.2.0", - "consola": "^3.4.2", - "cookie-es": "^3.1.1", - "defu": "^6.1.7", - "devalue": "^5.8.1", - "errx": "^0.1.0", - "escape-string-regexp": "^5.0.0", - "exsolve": "^1.0.8", - "hookable": "^6.1.1", - "ignore": "^7.0.5", - "impound": "^1.1.5", - "jiti": "^2.7.0", - "klona": "^2.0.6", - "knitwork": "^1.3.0", - "magic-string": "^0.30.21", - "mlly": "^1.8.2", - "nanotar": "^0.3.0", - "nypm": "^0.6.6", - "ofetch": "^1.5.1", - "ohash": "^2.0.11", - "on-change": "^6.0.2", - "oxc-minify": "^0.131.0", - "oxc-parser": "^0.131.0", - "oxc-transform": "^0.131.0", - "oxc-walker": "^1.0.0", - "pathe": "^2.0.3", - "perfect-debounce": "^2.1.0", - "picomatch": "^4.0.4", - "pkg-types": "^2.3.1", - "rou3": "^0.8.1", - "scule": "^1.3.0", - "semver": "^7.8.0", - "std-env": "^4.1.0", - "tinyglobby": "^0.2.16", - "ufo": "^1.6.4", - "ultrahtml": "^1.6.0", - "uncrypto": "^0.1.3", - "unctx": "^2.5.0", - "unimport": "^6.3.0", - "unplugin": "^3.0.0", - "unrouting": "^0.1.7", - "untyped": "^2.0.0", - "vue": "^3.5.34", - "vue-router": "^5.0.7" - }, - "bin": { - "nuxi": "bin/nuxt.mjs", - "nuxt": "bin/nuxt.mjs" - }, - "engines": { - "node": "^22.12.0 || ^24.11.0 || >=26.0.0" - }, - "peerDependencies": { - "@parcel/watcher": "^2.1.0", - "@types/node": ">=18.12.0" - }, - "peerDependenciesMeta": { - "@parcel/watcher": { - "optional": true - }, - "@types/node": { - "optional": true - } - } - }, - "node_modules/nuxt/node_modules/@babel/generator": { - "version": "8.0.0-rc.5", - "license": "MIT", - "dependencies": { - "@babel/parser": "^8.0.0-rc.5", - "@babel/types": "^8.0.0-rc.5", - "@jridgewell/gen-mapping": "^0.3.12", - "@jridgewell/trace-mapping": "^0.3.28", - "@types/jsesc": "^2.5.0", - "jsesc": "^3.0.2" - }, - "engines": { - "node": "^22.18.0 || >=24.11.0" - } - }, - "node_modules/nuxt/node_modules/@babel/helper-string-parser": { - "version": "8.0.0-rc.5", - "license": "MIT", - "engines": { - "node": "^22.18.0 || >=24.11.0" - } - }, - "node_modules/nuxt/node_modules/@babel/helper-validator-identifier": { - "version": "8.0.0-rc.5", - "license": "MIT", - "engines": { - "node": "^22.18.0 || >=24.11.0" - } - }, - "node_modules/nuxt/node_modules/@babel/parser": { - "version": "8.0.0-rc.5", - "license": "MIT", - "dependencies": { - "@babel/types": "^8.0.0-rc.5" - }, - "bin": { - "parser": "bin/babel-parser.js" - }, - "engines": { - "node": "^22.18.0 || >=24.11.0" - } - }, - "node_modules/nuxt/node_modules/@babel/types": { - "version": "8.0.0-rc.5", - "license": "MIT", - "dependencies": { - "@babel/helper-string-parser": "^8.0.0-rc.5", - "@babel/helper-validator-identifier": "^8.0.0-rc.5" - }, - "engines": { - "node": "^22.18.0 || >=24.11.0" - } - }, - "node_modules/nuxt/node_modules/@vue/devtools-api": { - "version": "8.1.2", - "license": "MIT", - "dependencies": { - "@vue/devtools-kit": "^8.1.2" - } - }, - "node_modules/nuxt/node_modules/vue-router": { - "version": "5.0.7", - "license": "MIT", - "dependencies": { - "@babel/generator": "^8.0.0-rc.4", - "@vue-macros/common": "^3.1.1", - "@vue/devtools-api": "^8.1.1", - "ast-walker-scope": "^0.8.3", - "chokidar": "^5.0.0", - "json5": "^2.2.3", - "local-pkg": "^1.1.2", - "magic-string": "^0.30.21", - "mlly": "^1.8.0", - "muggle-string": "^0.4.1", - "pathe": "^2.0.3", - "picomatch": "^4.0.3", - "scule": "^1.3.0", - "tinyglobby": "^0.2.15", - "unplugin": "^3.0.0", - "unplugin-utils": "^0.3.1", - "yaml": "^2.8.2" - }, - "funding": { - "url": "https://github.com/sponsors/posva" - }, - "peerDependencies": { - "@pinia/colada": ">=0.21.2", - "@vue/compiler-sfc": "^3.5.34", - "pinia": "^3.0.4", - "vue": "^3.5.34" - }, - "peerDependenciesMeta": { - "@pinia/colada": { - "optional": true - }, - "@vue/compiler-sfc": { - "optional": true - }, - "pinia": { - "optional": true - } - } - }, - "node_modules/nypm": { - "version": "0.6.6", - "license": "MIT", - "dependencies": { - "citty": "^0.2.2", - "pathe": "^2.0.3", - "tinyexec": "^1.1.1" - }, - "bin": { - "nypm": "dist/cli.mjs" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/obug": { - "version": "2.1.1", - "funding": [ - "https://github.com/sponsors/sxzz", - "https://opencollective.com/debug" - ], - "license": "MIT" - }, - "node_modules/ofetch": { - "version": "1.5.1", - "license": "MIT", - "dependencies": { - "destr": "^2.0.5", - "node-fetch-native": "^1.6.7", - "ufo": "^1.6.1" - } - }, - "node_modules/ohash": { - "version": "2.0.11", - "license": "MIT" - }, - "node_modules/oidc-client-ts": { - "version": "3.5.0", - "license": "Apache-2.0", - "dependencies": { - "jwt-decode": "^4.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/on-change": { - "version": "6.0.2", - "license": "MIT", - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sindresorhus/on-change?sponsor=1" - } - }, - "node_modules/on-finished": { - "version": "2.4.1", - "license": "MIT", - "dependencies": { - "ee-first": "1.1.1" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/onetime": { - "version": "6.0.0", - "license": "MIT", - "dependencies": { - "mimic-fn": "^4.0.0" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/open": { - "version": "11.0.0", - "license": "MIT", - "dependencies": { - "default-browser": "^5.4.0", - "define-lazy-prop": "^3.0.0", - "is-in-ssh": "^1.0.0", - "is-inside-container": "^1.0.0", - "powershell-utils": "^0.1.0", - "wsl-utils": "^0.3.0" - }, - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/oxc-minify": { - "version": "0.131.0", - "license": "MIT", - "engines": { - "node": "^20.19.0 || >=22.12.0" - }, - "funding": { - "url": "https://github.com/sponsors/Boshen" - }, - "optionalDependencies": { - "@oxc-minify/binding-android-arm-eabi": "0.131.0", - "@oxc-minify/binding-android-arm64": "0.131.0", - "@oxc-minify/binding-darwin-arm64": "0.131.0", - "@oxc-minify/binding-darwin-x64": "0.131.0", - "@oxc-minify/binding-freebsd-x64": "0.131.0", - "@oxc-minify/binding-linux-arm-gnueabihf": "0.131.0", - "@oxc-minify/binding-linux-arm-musleabihf": "0.131.0", - "@oxc-minify/binding-linux-arm64-gnu": "0.131.0", - "@oxc-minify/binding-linux-arm64-musl": "0.131.0", - "@oxc-minify/binding-linux-ppc64-gnu": "0.131.0", - "@oxc-minify/binding-linux-riscv64-gnu": "0.131.0", - "@oxc-minify/binding-linux-riscv64-musl": "0.131.0", - "@oxc-minify/binding-linux-s390x-gnu": "0.131.0", - "@oxc-minify/binding-linux-x64-gnu": "0.131.0", - "@oxc-minify/binding-linux-x64-musl": "0.131.0", - "@oxc-minify/binding-openharmony-arm64": "0.131.0", - "@oxc-minify/binding-wasm32-wasi": "0.131.0", - "@oxc-minify/binding-win32-arm64-msvc": "0.131.0", - "@oxc-minify/binding-win32-ia32-msvc": "0.131.0", - "@oxc-minify/binding-win32-x64-msvc": "0.131.0" - } - }, - "node_modules/oxc-parser": { - "version": "0.131.0", - "license": "MIT", - "dependencies": { - "@oxc-project/types": "^0.131.0" - }, - "engines": { - "node": "^20.19.0 || >=22.12.0" - }, - "funding": { - "url": "https://github.com/sponsors/Boshen" - }, - "optionalDependencies": { - "@oxc-parser/binding-android-arm-eabi": "0.131.0", - "@oxc-parser/binding-android-arm64": "0.131.0", - "@oxc-parser/binding-darwin-arm64": "0.131.0", - "@oxc-parser/binding-darwin-x64": "0.131.0", - "@oxc-parser/binding-freebsd-x64": "0.131.0", - "@oxc-parser/binding-linux-arm-gnueabihf": "0.131.0", - "@oxc-parser/binding-linux-arm-musleabihf": "0.131.0", - "@oxc-parser/binding-linux-arm64-gnu": "0.131.0", - "@oxc-parser/binding-linux-arm64-musl": "0.131.0", - "@oxc-parser/binding-linux-ppc64-gnu": "0.131.0", - "@oxc-parser/binding-linux-riscv64-gnu": "0.131.0", - "@oxc-parser/binding-linux-riscv64-musl": "0.131.0", - "@oxc-parser/binding-linux-s390x-gnu": "0.131.0", - "@oxc-parser/binding-linux-x64-gnu": "0.131.0", - "@oxc-parser/binding-linux-x64-musl": "0.131.0", - "@oxc-parser/binding-openharmony-arm64": "0.131.0", - "@oxc-parser/binding-wasm32-wasi": "0.131.0", - "@oxc-parser/binding-win32-arm64-msvc": "0.131.0", - "@oxc-parser/binding-win32-ia32-msvc": "0.131.0", - "@oxc-parser/binding-win32-x64-msvc": "0.131.0" - } - }, - "node_modules/oxc-transform": { - "version": "0.131.0", - "license": "MIT", - "engines": { - "node": "^20.19.0 || >=22.12.0" - }, - "funding": { - "url": "https://github.com/sponsors/Boshen" - }, - "optionalDependencies": { - "@oxc-transform/binding-android-arm-eabi": "0.131.0", - "@oxc-transform/binding-android-arm64": "0.131.0", - "@oxc-transform/binding-darwin-arm64": "0.131.0", - "@oxc-transform/binding-darwin-x64": "0.131.0", - "@oxc-transform/binding-freebsd-x64": "0.131.0", - "@oxc-transform/binding-linux-arm-gnueabihf": "0.131.0", - "@oxc-transform/binding-linux-arm-musleabihf": "0.131.0", - "@oxc-transform/binding-linux-arm64-gnu": "0.131.0", - "@oxc-transform/binding-linux-arm64-musl": "0.131.0", - "@oxc-transform/binding-linux-ppc64-gnu": "0.131.0", - "@oxc-transform/binding-linux-riscv64-gnu": "0.131.0", - "@oxc-transform/binding-linux-riscv64-musl": "0.131.0", - "@oxc-transform/binding-linux-s390x-gnu": "0.131.0", - "@oxc-transform/binding-linux-x64-gnu": "0.131.0", - "@oxc-transform/binding-linux-x64-musl": "0.131.0", - "@oxc-transform/binding-openharmony-arm64": "0.131.0", - "@oxc-transform/binding-wasm32-wasi": "0.131.0", - "@oxc-transform/binding-win32-arm64-msvc": "0.131.0", - "@oxc-transform/binding-win32-ia32-msvc": "0.131.0", - "@oxc-transform/binding-win32-x64-msvc": "0.131.0" - } - }, - "node_modules/oxc-walker": { - "version": "1.0.0", - "license": "MIT", - "dependencies": { - "magic-regexp": "^0.11.0" - }, - "peerDependencies": { - "oxc-parser": ">=0.98.0", - "rolldown": ">=1.0.0" - }, - "peerDependenciesMeta": { - "oxc-parser": { - "optional": true - }, - "rolldown": { - "optional": true - } - } - }, - "node_modules/package-json-from-dist": { - "version": "1.0.1", - "license": "BlueOak-1.0.0" - }, - "node_modules/parse5": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.1.tgz", - "integrity": "sha512-z1e/HMG90obSGeidlli3hj7cbocou0/wa5HacvI3ASx34PecNjNQeaHNo5WIZpWofN9kgkqV1q5YvXe3F0FoPw==", - "license": "MIT", - "dependencies": { - "entities": "^8.0.0" - }, - "funding": { - "url": "https://github.com/inikulin/parse5?sponsor=1" - } - }, - "node_modules/parse5/node_modules/entities": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-8.0.0.tgz", - "integrity": "sha512-zwfzJecQ/Uej6tusMqwAqU/6KL2XaB2VZ2Jg54Je6ahNBGNH6Ek6g3jjNCF0fG9EWQKGZNddNjU5F1ZQn/sBnA==", - "license": "BSD-2-Clause", - "engines": { - "node": ">=20.19.0" - }, - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, - "node_modules/parseurl": { - "version": "1.3.3", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/path-browserify": { - "version": "1.0.1", - "devOptional": true, - "license": "MIT" - }, - "node_modules/path-key": { - "version": "3.1.1", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/path-parse": { - "version": "1.0.7", - "license": "MIT" - }, - "node_modules/path-scurry": { - "version": "1.11.1", - "license": "BlueOak-1.0.0", - "dependencies": { - "lru-cache": "^10.2.0", - "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" - }, - "engines": { - "node": ">=16 || 14 >=14.18" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/path-scurry/node_modules/lru-cache": { - "version": "10.4.3", - "license": "ISC" - }, - "node_modules/pathe": { - "version": "2.0.3", - "license": "MIT" - }, - "node_modules/pathval": { - "version": "2.0.1", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 14.16" - } - }, - "node_modules/perfect-debounce": { - "version": "2.1.0", - "license": "MIT" - }, - "node_modules/picocolors": { - "version": "1.1.1", - "license": "ISC" - }, - "node_modules/picomatch": { - "version": "4.0.4", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/pkg-types": { - "version": "2.3.1", - "license": "MIT", - "dependencies": { - "confbox": "^0.2.4", - "exsolve": "^1.0.8", - "pathe": "^2.0.3" - } - }, - "node_modules/postcss": { - "version": "8.5.15", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/postcss" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "nanoid": "^3.3.12", - "picocolors": "^1.1.1", - "source-map-js": "^1.2.1" - }, - "engines": { - "node": "^10 || ^12 || >=14" - } - }, - "node_modules/postcss-calc": { - "version": "10.1.1", - "license": "MIT", - "dependencies": { - "postcss-selector-parser": "^7.0.0", - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^18.12 || ^20.9 || >=22.0" - }, - "peerDependencies": { - "postcss": "^8.4.38" - } - }, - "node_modules/postcss-colormin": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "@colordx/core": "^5.4.3", - "browserslist": "^4.28.2", - "caniuse-api": "^3.0.0", - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-convert-values": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "browserslist": "^4.28.2", - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-discard-comments": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-selector-parser": "^7.1.1" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-discard-duplicates": { - "version": "8.0.0", - "license": "MIT", - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-discard-empty": { - "version": "8.0.0", - "license": "MIT", - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-discard-overridden": { - "version": "8.0.0", - "license": "MIT", - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-merge-longhand": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0", - "stylehacks": "^8.0.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-merge-rules": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "browserslist": "^4.28.2", - "caniuse-api": "^3.0.0", - "cssnano-utils": "^6.0.0", - "postcss-selector-parser": "^7.1.1" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-minify-font-values": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-minify-gradients": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "@colordx/core": "^5.4.3", - "cssnano-utils": "^6.0.0", - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-minify-params": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "browserslist": "^4.28.2", - "cssnano-utils": "^6.0.0", - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-minify-selectors": { - "version": "8.0.1", - "license": "MIT", - "dependencies": { - "browserslist": "^4.28.1", - "caniuse-api": "^3.0.0", - "cssesc": "^3.0.0", - "postcss-selector-parser": "^7.1.1" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-normalize-charset": { - "version": "8.0.0", - "license": "MIT", - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-normalize-display-values": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-normalize-positions": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-normalize-repeat-style": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-normalize-string": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-normalize-timing-functions": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-normalize-unicode": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "browserslist": "^4.28.2", - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-normalize-url": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-normalize-whitespace": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-ordered-values": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "cssnano-utils": "^6.0.0", - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-reduce-initial": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "browserslist": "^4.28.2", - "caniuse-api": "^3.0.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-reduce-transforms": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-selector-parser": { - "version": "7.1.1", - "license": "MIT", - "dependencies": { - "cssesc": "^3.0.0", - "util-deprecate": "^1.0.2" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/postcss-svgo": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-value-parser": "^4.2.0", - "svgo": "^4.0.1" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-unique-selectors": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "postcss-selector-parser": "^7.1.1" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/postcss-value-parser": { - "version": "4.2.0", - "license": "MIT" - }, - "node_modules/powershell-utils": { - "version": "0.1.0", - "license": "MIT", - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/pretty-bytes": { - "version": "7.1.0", - "license": "MIT", - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/process": { - "version": "0.11.10", - "license": "MIT", - "engines": { - "node": ">= 0.6.0" - } - }, - "node_modules/process-nextick-args": { - "version": "2.0.1", - "license": "MIT" - }, - "node_modules/proper-lockfile": { - "version": "4.1.2", - "license": "MIT", - "dependencies": { - "graceful-fs": "^4.2.4", - "retry": "^0.12.0", - "signal-exit": "^3.0.2" - } - }, - "node_modules/proper-lockfile/node_modules/signal-exit": { - "version": "3.0.7", - "license": "ISC" - }, - "node_modules/proto-list": { - "version": "1.2.4", - "dev": true, - "license": "ISC" - }, - "node_modules/punycode": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", - "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/quansync": { - "version": "0.2.11", - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/antfu" - }, - { - "type": "individual", - "url": "https://github.com/sponsors/sxzz" - } - ], - "license": "MIT" - }, - "node_modules/queue-microtask": { - "version": "1.2.3", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/radix3": { - "version": "1.1.2", - "license": "MIT" - }, - "node_modules/random": { - "version": "4.1.0", - "license": "MIT", - "dependencies": { - "seedrandom": "^3.0.5" - }, - "engines": { - "node": ">=14" - } - }, - "node_modules/range-parser": { - "version": "1.2.1", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/rc9": { - "version": "3.0.1", - "license": "MIT", - "dependencies": { - "defu": "^6.1.6", - "destr": "^2.0.5" - } - }, - "node_modules/readable-stream": { - "version": "4.7.0", - "license": "MIT", - "dependencies": { - "abort-controller": "^3.0.0", - "buffer": "^6.0.3", - "events": "^3.3.0", - "process": "^0.11.10", - "string_decoder": "^1.3.0" - }, - "engines": { - "node": "^12.22.0 || ^14.17.0 || >=16.0.0" - } - }, - "node_modules/readdir-glob": { - "version": "1.1.3", - "license": "Apache-2.0", - "dependencies": { - "minimatch": "^5.1.0" - } - }, - "node_modules/readdir-glob/node_modules/minimatch": { - "version": "5.1.9", - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/readdirp": { - "version": "5.0.0", - "license": "MIT", - "engines": { - "node": ">= 20.19.0" - }, - "funding": { - "type": "individual", - "url": "https://paulmillr.com/funding/" - } - }, - "node_modules/redis-errors": { - "version": "1.2.0", - "license": "MIT", - "engines": { - "node": ">=4" - } - }, - "node_modules/redis-parser": { - "version": "3.0.0", - "license": "MIT", - "dependencies": { - "redis-errors": "^1.0.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/regexp-tree": { - "version": "0.1.27", - "license": "MIT", - "bin": { - "regexp-tree": "bin/regexp-tree" - } - }, - "node_modules/regl": { - "version": "2.1.1", - "license": "MIT" - }, - "node_modules/reka-ui": { - "version": "2.9.7", - "license": "MIT", - "dependencies": { - "@floating-ui/dom": "^1.6.13", - "@floating-ui/vue": "^1.1.6", - "@internationalized/date": "^3.5.0", - "@internationalized/number": "^3.5.0", - "@tanstack/vue-virtual": "^3.12.0", - "@vueuse/core": "^14.1.0", - "@vueuse/shared": "^14.1.0", - "aria-hidden": "^1.2.4", - "defu": "^6.1.5", - "ohash": "^2.0.11" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/zernonia" - }, - "peerDependencies": { - "vue": ">= 3.4.0" - } - }, - "node_modules/require-from-string": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", - "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/resolve": { - "version": "1.22.12", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "is-core-module": "^2.16.1", - "path-parse": "^1.0.7", - "supports-preserve-symlinks-flag": "^1.0.0" - }, - "bin": { - "resolve": "bin/resolve" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/resolve-from": { - "version": "5.0.0", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/retry": { - "version": "0.12.0", - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/reusify": { - "version": "1.1.0", - "license": "MIT", - "engines": { - "iojs": ">=1.0.0", - "node": ">=0.10.0" - } - }, - "node_modules/robust-predicates": { - "version": "3.0.3", - "license": "Unlicense" - }, - "node_modules/rollup": { - "version": "4.60.4", - "license": "MIT", - "dependencies": { - "@types/estree": "1.0.8" - }, - "bin": { - "rollup": "dist/bin/rollup" - }, - "engines": { - "node": ">=18.0.0", - "npm": ">=8.0.0" - }, - "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.60.4", - "@rollup/rollup-android-arm64": "4.60.4", - "@rollup/rollup-darwin-arm64": "4.60.4", - "@rollup/rollup-darwin-x64": "4.60.4", - "@rollup/rollup-freebsd-arm64": "4.60.4", - "@rollup/rollup-freebsd-x64": "4.60.4", - "@rollup/rollup-linux-arm-gnueabihf": "4.60.4", - "@rollup/rollup-linux-arm-musleabihf": "4.60.4", - "@rollup/rollup-linux-arm64-gnu": "4.60.4", - "@rollup/rollup-linux-arm64-musl": "4.60.4", - "@rollup/rollup-linux-loong64-gnu": "4.60.4", - "@rollup/rollup-linux-loong64-musl": "4.60.4", - "@rollup/rollup-linux-ppc64-gnu": "4.60.4", - "@rollup/rollup-linux-ppc64-musl": "4.60.4", - "@rollup/rollup-linux-riscv64-gnu": "4.60.4", - "@rollup/rollup-linux-riscv64-musl": "4.60.4", - "@rollup/rollup-linux-s390x-gnu": "4.60.4", - "@rollup/rollup-linux-x64-gnu": "4.60.4", - "@rollup/rollup-linux-x64-musl": "4.60.4", - "@rollup/rollup-openbsd-x64": "4.60.4", - "@rollup/rollup-openharmony-arm64": "4.60.4", - "@rollup/rollup-win32-arm64-msvc": "4.60.4", - "@rollup/rollup-win32-ia32-msvc": "4.60.4", - "@rollup/rollup-win32-x64-gnu": "4.60.4", - "@rollup/rollup-win32-x64-msvc": "4.60.4", - "fsevents": "~2.3.2" - } - }, - "node_modules/rollup-plugin-visualizer": { - "version": "7.0.1", - "license": "MIT", - "dependencies": { - "open": "^11.0.0", - "picomatch": "^4.0.2", - "source-map": "^0.7.4", - "yargs": "^18.0.0" - }, - "bin": { - "rollup-plugin-visualizer": "dist/bin/cli.js" - }, - "engines": { - "node": ">=22" - }, - "peerDependencies": { - "rolldown": "1.x || ^1.0.0-beta || ^1.0.0-rc", - "rollup": "2.x || 3.x || 4.x" - }, - "peerDependenciesMeta": { - "rolldown": { - "optional": true - }, - "rollup": { - "optional": true - } - } - }, - "node_modules/rollup/node_modules/@types/estree": { - "version": "1.0.8", - "license": "MIT" - }, - "node_modules/rou3": { - "version": "0.8.1", - "license": "MIT" - }, - "node_modules/run-applescript": { - "version": "7.1.0", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/run-parallel": { - "version": "1.2.0", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT", - "dependencies": { - "queue-microtask": "^1.2.2" - } - }, - "node_modules/rw": { - "version": "1.3.3", - "license": "BSD-3-Clause" - }, - "node_modules/safe-buffer": { - "version": "5.2.1", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "license": "MIT" - }, - "node_modules/safer-buffer": { - "version": "2.1.2", - "license": "MIT" - }, - "node_modules/sax": { - "version": "1.6.0", - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=11.0.0" - } - }, - "node_modules/saxes": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", - "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", - "license": "ISC", - "dependencies": { - "xmlchars": "^2.2.0" - }, - "engines": { - "node": ">=v12.22.7" - } - }, - "node_modules/scule": { - "version": "1.3.0", - "license": "MIT" - }, - "node_modules/seedrandom": { - "version": "3.0.5", - "license": "MIT" - }, - "node_modules/semver": { - "version": "7.8.0", - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/send": { - "version": "1.2.1", - "license": "MIT", - "dependencies": { - "debug": "^4.4.3", - "encodeurl": "^2.0.0", - "escape-html": "^1.0.3", - "etag": "^1.8.1", - "fresh": "^2.0.0", - "http-errors": "^2.0.1", - "mime-types": "^3.0.2", - "ms": "^2.1.3", - "on-finished": "^2.4.1", - "range-parser": "^1.2.1", - "statuses": "^2.0.2" - }, - "engines": { - "node": ">= 18" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/express" - } - }, - "node_modules/serialize-javascript": { - "version": "7.0.5", - "license": "BSD-3-Clause", - "engines": { - "node": ">=20.0.0" - } - }, - "node_modules/seroval": { - "version": "1.5.4", - "license": "MIT", - "engines": { - "node": ">=10" - } - }, - "node_modules/serve-placeholder": { - "version": "2.0.2", - "license": "MIT", - "dependencies": { - "defu": "^6.1.4" - } - }, - "node_modules/serve-static": { - "version": "2.2.1", - "license": "MIT", - "dependencies": { - "encodeurl": "^2.0.0", - "escape-html": "^1.0.3", - "parseurl": "^1.3.3", - "send": "^1.2.0" - }, - "engines": { - "node": ">= 18" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/express" - } - }, - "node_modules/setprototypeof": { - "version": "1.2.0", - "license": "ISC" - }, - "node_modules/shebang-command": { - "version": "2.0.0", - "license": "MIT", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/shebang-regex": { - "version": "3.0.0", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/shell-quote": { - "version": "1.8.3", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/siginfo": { - "version": "2.0.0", - "dev": true, - "license": "ISC" - }, - "node_modules/signal-exit": { - "version": "4.1.0", - "license": "ISC", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/simple-git": { - "version": "3.36.0", - "license": "MIT", - "dependencies": { - "@kwsites/file-exists": "^1.1.1", - "@kwsites/promise-deferred": "^1.1.1", - "@simple-git/args-pathspec": "^1.0.3", - "@simple-git/argv-parser": "^1.1.0", - "debug": "^4.4.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/steveukx/git-js?sponsor=1" - } - }, - "node_modules/sirv": { - "version": "3.0.2", - "license": "MIT", - "dependencies": { - "@polka/url": "^1.0.0-next.24", - "mrmime": "^2.0.0", - "totalist": "^3.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/sisteransi": { - "version": "1.0.5", - "license": "MIT" - }, - "node_modules/slash": { - "version": "5.1.0", - "license": "MIT", - "engines": { - "node": ">=14.16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/smob": { - "version": "1.6.2", - "license": "MIT", - "engines": { - "node": ">=20.0.0" - } - }, - "node_modules/source-map": { - "version": "0.7.6", - "license": "BSD-3-Clause", - "engines": { - "node": ">= 12" - } - }, - "node_modules/source-map-js": { - "version": "1.2.1", - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/source-map-support": { - "version": "0.5.21", - "license": "MIT", - "dependencies": { - "buffer-from": "^1.0.0", - "source-map": "^0.6.0" - } - }, - "node_modules/source-map-support/node_modules/source-map": { - "version": "0.6.1", - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/srvx": { - "version": "0.11.15", - "license": "MIT", - "bin": { - "srvx": "bin/srvx.mjs" - }, - "engines": { - "node": ">=20.16.0" - } - }, - "node_modules/stackback": { - "version": "0.0.2", - "dev": true, - "license": "MIT" - }, - "node_modules/standard-as-callback": { - "version": "2.1.0", - "license": "MIT" - }, - "node_modules/statuses": { - "version": "2.0.2", - "license": "MIT", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/std-env": { - "version": "4.1.0", - "license": "MIT" - }, - "node_modules/streamx": { - "version": "2.25.0", - "license": "MIT", - "dependencies": { - "events-universal": "^1.0.0", - "fast-fifo": "^1.3.2", - "text-decoder": "^1.1.0" - } - }, - "node_modules/string_decoder": { - "version": "1.3.0", - "license": "MIT", - "dependencies": { - "safe-buffer": "~5.2.0" - } - }, - "node_modules/string-width": { - "version": "5.1.2", - "license": "MIT", - "dependencies": { - "eastasianwidth": "^0.2.0", - "emoji-regex": "^9.2.2", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/string-width-cjs": { - "name": "string-width", - "version": "4.2.3", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "license": "MIT" - }, - "node_modules/string-width-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi": { - "version": "7.2.0", - "license": "MIT", - "dependencies": { - "ansi-regex": "^6.2.2" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/strip-ansi-cjs": { - "name": "strip-ansi", - "version": "6.0.1", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-final-newline": { - "version": "3.0.0", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/strip-literal": { - "version": "3.1.0", - "license": "MIT", - "dependencies": { - "js-tokens": "^9.0.1" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/strip-literal/node_modules/js-tokens": { - "version": "9.0.1", - "license": "MIT" - }, - "node_modules/structured-clone-es": { - "version": "2.0.0", - "license": "ISC" - }, - "node_modules/style-mod": { - "version": "4.1.3", - "license": "MIT" - }, - "node_modules/stylehacks": { - "version": "8.0.0", - "license": "MIT", - "dependencies": { - "browserslist": "^4.28.2", - "postcss-selector-parser": "^7.1.1" - }, - "engines": { - "node": "^22.11.0 || ^24.11.0 || >=26.0" - }, - "peerDependencies": { - "postcss": "^8.5.14" - } - }, - "node_modules/supports-color": { - "version": "7.2.0", - "license": "MIT", - "dependencies": { - "has-flag": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/supports-preserve-symlinks-flag": { - "version": "1.0.0", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/svgo": { - "version": "4.0.1", - "license": "MIT", - "dependencies": { - "commander": "^11.1.0", - "css-select": "^5.1.0", - "css-tree": "^3.0.1", - "css-what": "^6.1.0", - "csso": "^5.0.5", - "picocolors": "^1.1.1", - "sax": "^1.5.0" - }, - "bin": { - "svgo": "bin/svgo.js" - }, - "engines": { - "node": ">=16" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/svgo" - } - }, - "node_modules/svgo/node_modules/commander": { - "version": "11.1.0", - "license": "MIT", - "engines": { - "node": ">=16" - } - }, - "node_modules/symbol-tree": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", - "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==", - "license": "MIT" - }, - "node_modules/table-layout": { - "version": "4.1.1", - "license": "MIT", - "dependencies": { - "array-back": "^6.2.2", - "wordwrapjs": "^5.1.0" - }, - "engines": { - "node": ">=12.17" - } - }, - "node_modules/table-layout/node_modules/array-back": { - "version": "6.2.3", - "license": "MIT", - "engines": { - "node": ">=12.17" - } - }, - "node_modules/tagged-tag": { - "version": "1.0.0", - "license": "MIT", - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/tailwind-merge": { - "version": "3.6.0", - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/dcastil" - } - }, - "node_modules/tailwindcss": { - "version": "4.3.0", - "license": "MIT" - }, - "node_modules/tapable": { - "version": "2.3.3", - "license": "MIT", - "engines": { - "node": ">=6" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/webpack" - } - }, - "node_modules/tar": { - "version": "7.5.15", - "license": "BlueOak-1.0.0", - "dependencies": { - "@isaacs/fs-minipass": "^4.0.0", - "chownr": "^3.0.0", - "minipass": "^7.1.2", - "minizlib": "^3.1.0", - "yallist": "^5.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/tar-stream": { - "version": "3.2.0", - "license": "MIT", - "dependencies": { - "b4a": "^1.6.4", - "bare-fs": "^4.5.5", - "fast-fifo": "^1.2.0", - "streamx": "^2.15.0" - } - }, - "node_modules/tar/node_modules/yallist": { - "version": "5.0.0", - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=18" - } - }, - "node_modules/teex": { - "version": "1.0.1", - "license": "MIT", - "dependencies": { - "streamx": "^2.12.5" - } - }, - "node_modules/terser": { - "version": "5.47.1", - "license": "BSD-2-Clause", - "dependencies": { - "@jridgewell/source-map": "^0.3.3", - "acorn": "^8.15.0", - "commander": "^2.20.0", - "source-map-support": "~0.5.20" - }, - "bin": { - "terser": "bin/terser" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/terser/node_modules/commander": { - "version": "2.20.3", - "license": "MIT" - }, - "node_modules/text-decoder": { - "version": "1.2.7", - "license": "Apache-2.0", - "dependencies": { - "b4a": "^1.6.4" - } - }, - "node_modules/tiny-invariant": { - "version": "1.3.3", - "license": "MIT" - }, - "node_modules/tinybench": { - "version": "2.9.0", - "dev": true, - "license": "MIT" - }, - "node_modules/tinyclip": { - "version": "0.1.12", - "license": "MIT", - "engines": { - "node": "^16.14.0 || >= 17.3.0" - } - }, - "node_modules/tinyexec": { - "version": "1.1.2", - "license": "MIT", - "engines": { - "node": ">=18" - } - }, - "node_modules/tinyglobby": { - "version": "0.2.16", - "license": "MIT", - "dependencies": { - "fdir": "^6.5.0", - "picomatch": "^4.0.4" - }, - "engines": { - "node": ">=12.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/SuperchupuDev" - } - }, - "node_modules/tinypool": { - "version": "1.1.1", - "dev": true, - "license": "MIT", - "engines": { - "node": "^18.0.0 || >=20.0.0" - } - }, - "node_modules/tinyrainbow": { - "version": "1.2.0", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/tinyspy": { - "version": "3.0.2", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=14.0.0" - } - }, - "node_modules/tldts": { - "version": "7.4.2", - "resolved": "https://registry.npmjs.org/tldts/-/tldts-7.4.2.tgz", - "integrity": "sha512-kCwffuaH8ntKtygnWe1b4BJKWiCUH30n5KfoTr6IchcXOwR7chAOFJxFrH3vjANafUYrIA4a7SDL+nn7SiR4Sw==", - "license": "MIT", - "dependencies": { - "tldts-core": "^7.4.2" - }, - "bin": { - "tldts": "bin/cli.js" - } - }, - "node_modules/tldts-core": { - "version": "7.4.2", - "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-7.4.2.tgz", - "integrity": "sha512-nwEyF4vl4RSJjwSjBUmOSxc3BFPoIFdlRthJ6e+5v9P3bHNsoD06UjuqMUspqp7vsEZ1beaHi1km+optiE17yA==", - "license": "MIT" - }, - "node_modules/to-regex-range": { - "version": "5.0.1", - "license": "MIT", - "dependencies": { - "is-number": "^7.0.0" - }, - "engines": { - "node": ">=8.0" - } - }, - "node_modules/toidentifier": { - "version": "1.0.1", - "license": "MIT", - "engines": { - "node": ">=0.6" - } - }, - "node_modules/totalist": { - "version": "3.0.1", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/tough-cookie": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-6.0.1.tgz", - "integrity": "sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==", - "license": "BSD-3-Clause", - "dependencies": { - "tldts": "^7.0.5" - }, - "engines": { - "node": ">=16" - } - }, - "node_modules/tr46": { - "version": "0.0.3", - "license": "MIT" - }, - "node_modules/tslib": { - "version": "2.8.1", - "license": "0BSD" - }, - "node_modules/tw-animate-css": { - "version": "1.4.0", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/Wombosvideo" - } - }, - "node_modules/type-fest": { - "version": "5.6.0", - "license": "(MIT OR CC0-1.0)", - "dependencies": { - "tagged-tag": "^1.0.0" - }, - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/type-level-regexp": { - "version": "0.1.17", - "license": "MIT" - }, - "node_modules/typescript": { - "version": "5.9.3", - "devOptional": true, - "license": "Apache-2.0", - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=14.17" - } - }, - "node_modules/typical": { - "version": "4.0.0", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/ufo": { - "version": "1.6.4", - "license": "MIT" - }, - "node_modules/ultrahtml": { - "version": "1.6.0", - "license": "MIT" - }, - "node_modules/uncrypto": { - "version": "0.1.3", - "license": "MIT" - }, - "node_modules/unctx": { - "version": "2.5.0", - "license": "MIT", - "dependencies": { - "acorn": "^8.15.0", - "estree-walker": "^3.0.3", - "magic-string": "^0.30.21", - "unplugin": "^2.3.11" - } - }, - "node_modules/unctx/node_modules/estree-walker": { - "version": "3.0.3", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0" - } - }, - "node_modules/unctx/node_modules/unplugin": { - "version": "2.3.11", - "license": "MIT", - "dependencies": { - "@jridgewell/remapping": "^2.3.5", - "acorn": "^8.15.0", - "picomatch": "^4.0.3", - "webpack-virtual-modules": "^0.6.2" - }, - "engines": { - "node": ">=18.12.0" - } - }, - "node_modules/undici": { - "version": "7.27.0", - "resolved": "https://registry.npmjs.org/undici/-/undici-7.27.0.tgz", - "integrity": "sha512-+t2Z/GwkZQDtu00813aP66ygViGtPHKhhoFZpQKpKrE+9jIgES+Zw+mFNaDWOVRKiuJjuqKHzD3B1sfGg8+ZOQ==", - "license": "MIT", - "engines": { - "node": ">=20.18.1" - } - }, - "node_modules/undici-types": { - "version": "6.21.0", - "license": "MIT" - }, - "node_modules/unenv": { - "version": "2.0.0-rc.24", - "license": "MIT", - "dependencies": { - "pathe": "^2.0.3" - } - }, - "node_modules/unhead": { - "version": "2.1.15", - "license": "MIT", - "dependencies": { - "hookable": "^6.0.1" - }, - "funding": { - "url": "https://github.com/sponsors/harlan-zw" - } - }, - "node_modules/unicorn-magic": { - "version": "0.4.0", - "license": "MIT", - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/unimport": { - "version": "6.3.0", - "license": "MIT", - "dependencies": { - "acorn": "^8.16.0", - "escape-string-regexp": "^5.0.0", - "estree-walker": "^3.0.3", - "local-pkg": "^1.1.2", - "magic-string": "^0.30.21", - "mlly": "^1.8.2", - "pathe": "^2.0.3", - "picomatch": "^4.0.4", - "pkg-types": "^2.3.1", - "scule": "^1.3.0", - "strip-literal": "^3.1.0", - "tinyglobby": "^0.2.16", - "unplugin": "^3.0.0", - "unplugin-utils": "^0.3.1" - }, - "engines": { - "node": ">=18.12.0" - }, - "peerDependencies": { - "oxc-parser": "*", - "rolldown": "^1.0.0" - }, - "peerDependenciesMeta": { - "oxc-parser": { - "optional": true - }, - "rolldown": { - "optional": true - } - } - }, - "node_modules/unimport/node_modules/estree-walker": { - "version": "3.0.3", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0" - } - }, - "node_modules/unplugin": { - "version": "3.0.0", - "license": "MIT", - "dependencies": { - "@jridgewell/remapping": "^2.3.5", - "picomatch": "^4.0.3", - "webpack-virtual-modules": "^0.6.2" - }, - "engines": { - "node": "^20.19.0 || >=22.12.0" - } - }, - "node_modules/unplugin-utils": { - "version": "0.3.1", - "license": "MIT", - "dependencies": { - "pathe": "^2.0.3", - "picomatch": "^4.0.3" - }, - "engines": { - "node": ">=20.19.0" - }, - "funding": { - "url": "https://github.com/sponsors/sxzz" - } - }, - "node_modules/unrouting": { - "version": "0.1.7", - "license": "MIT", - "dependencies": { - "escape-string-regexp": "^5.0.0", - "ufo": "^1.6.3" - } - }, - "node_modules/unstorage": { - "version": "1.17.5", - "license": "MIT", - "dependencies": { - "anymatch": "^3.1.3", - "chokidar": "^5.0.0", - "destr": "^2.0.5", - "h3": "^1.15.10", - "lru-cache": "^11.2.7", - "node-fetch-native": "^1.6.7", - "ofetch": "^1.5.1", - "ufo": "^1.6.3" - }, - "peerDependencies": { - "@azure/app-configuration": "^1.8.0", - "@azure/cosmos": "^4.2.0", - "@azure/data-tables": "^13.3.0", - "@azure/identity": "^4.6.0", - "@azure/keyvault-secrets": "^4.9.0", - "@azure/storage-blob": "^12.26.0", - "@capacitor/preferences": "^6 || ^7 || ^8", - "@deno/kv": ">=0.9.0", - "@netlify/blobs": "^6.5.0 || ^7.0.0 || ^8.1.0 || ^9.0.0 || ^10.0.0", - "@planetscale/database": "^1.19.0", - "@upstash/redis": "^1.34.3", - "@vercel/blob": ">=0.27.1", - "@vercel/functions": "^2.2.12 || ^3.0.0", - "@vercel/kv": "^1 || ^2 || ^3", - "aws4fetch": "^1.0.20", - "db0": ">=0.2.1", - "idb-keyval": "^6.2.1", - "ioredis": "^5.4.2", - "uploadthing": "^7.4.4" - }, - "peerDependenciesMeta": { - "@azure/app-configuration": { - "optional": true - }, - "@azure/cosmos": { - "optional": true - }, - "@azure/data-tables": { - "optional": true - }, - "@azure/identity": { - "optional": true - }, - "@azure/keyvault-secrets": { - "optional": true - }, - "@azure/storage-blob": { - "optional": true - }, - "@capacitor/preferences": { - "optional": true - }, - "@deno/kv": { - "optional": true - }, - "@netlify/blobs": { - "optional": true - }, - "@planetscale/database": { - "optional": true - }, - "@upstash/redis": { - "optional": true - }, - "@vercel/blob": { - "optional": true - }, - "@vercel/functions": { - "optional": true - }, - "@vercel/kv": { - "optional": true - }, - "aws4fetch": { - "optional": true - }, - "db0": { - "optional": true - }, - "idb-keyval": { - "optional": true - }, - "ioredis": { - "optional": true - }, - "uploadthing": { - "optional": true - } - } - }, - "node_modules/unstorage/node_modules/lru-cache": { - "version": "11.5.0", - "license": "BlueOak-1.0.0", - "engines": { - "node": "20 || >=22" - } - }, - "node_modules/untun": { - "version": "0.1.3", - "license": "MIT", - "dependencies": { - "citty": "^0.1.5", - "consola": "^3.2.3", - "pathe": "^1.1.1" - }, - "bin": { - "untun": "bin/untun.mjs" - } - }, - "node_modules/untun/node_modules/citty": { - "version": "0.1.6", - "license": "MIT", - "dependencies": { - "consola": "^3.2.3" - } - }, - "node_modules/untun/node_modules/pathe": { - "version": "1.1.2", - "license": "MIT" - }, - "node_modules/untyped": { - "version": "2.0.0", - "license": "MIT", - "dependencies": { - "citty": "^0.1.6", - "defu": "^6.1.4", - "jiti": "^2.4.2", - "knitwork": "^1.2.0", - "scule": "^1.3.0" - }, - "bin": { - "untyped": "dist/cli.mjs" - } - }, - "node_modules/untyped/node_modules/citty": { - "version": "0.1.6", - "license": "MIT", - "dependencies": { - "consola": "^3.2.3" - } - }, - "node_modules/unwasm": { - "version": "0.5.3", - "license": "MIT", - "dependencies": { - "exsolve": "^1.0.8", - "knitwork": "^1.3.0", - "magic-string": "^0.30.21", - "mlly": "^1.8.0", - "pathe": "^2.0.3", - "pkg-types": "^2.3.0" - } - }, - "node_modules/update-browserslist-db": { - "version": "1.2.3", - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "escalade": "^3.2.0", - "picocolors": "^1.1.1" - }, - "bin": { - "update-browserslist-db": "cli.js" - }, - "peerDependencies": { - "browserslist": ">= 4.21.0" - } - }, - "node_modules/uqr": { - "version": "0.1.3", - "license": "MIT" - }, - "node_modules/util-deprecate": { - "version": "1.0.2", - "license": "MIT" - }, - "node_modules/vite": { - "version": "6.4.2", - "license": "MIT", - "peer": true, - "dependencies": { - "esbuild": "^0.25.0", - "fdir": "^6.4.4", - "picomatch": "^4.0.2", - "postcss": "^8.5.3", - "rollup": "^4.34.9", - "tinyglobby": "^0.2.13" - }, - "bin": { - "vite": "bin/vite.js" - }, - "engines": { - "node": "^18.0.0 || ^20.0.0 || >=22.0.0" - }, - "funding": { - "url": "https://github.com/vitejs/vite?sponsor=1" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - }, - "peerDependencies": { - "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", - "jiti": ">=1.21.0", - "less": "*", - "lightningcss": "^1.21.0", - "sass": "*", - "sass-embedded": "*", - "stylus": "*", - "sugarss": "*", - "terser": "^5.16.0", - "tsx": "^4.8.1", - "yaml": "^2.4.2" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - }, - "jiti": { - "optional": true - }, - "less": { - "optional": true - }, - "lightningcss": { - "optional": true - }, - "sass": { - "optional": true - }, - "sass-embedded": { - "optional": true - }, - "stylus": { - "optional": true - }, - "sugarss": { - "optional": true - }, - "terser": { - "optional": true - }, - "tsx": { - "optional": true - }, - "yaml": { - "optional": true - } - } - }, - "node_modules/vite-dev-rpc": { - "version": "1.1.0", - "license": "MIT", - "dependencies": { - "birpc": "^2.4.0", - "vite-hot-client": "^2.1.0" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - }, - "peerDependencies": { - "vite": "^2.9.0 || ^3.0.0-0 || ^4.0.0-0 || ^5.0.0-0 || ^6.0.1 || ^7.0.0-0" - } - }, - "node_modules/vite-dev-rpc/node_modules/birpc": { - "version": "2.9.0", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/antfu" - } - }, - "node_modules/vite-hot-client": { - "version": "2.2.0", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/antfu" - }, - "peerDependencies": { - "vite": "^2.6.0 || ^3.0.0 || ^4.0.0 || ^5.0.0-0 || ^6.0.0-0 || ^7.0.0-0 || ^8.0.0" - } - }, - "node_modules/vite-node": { - "version": "5.3.0", - "license": "MIT", - "dependencies": { - "cac": "^6.7.14", - "es-module-lexer": "^2.0.0", - "obug": "^2.1.1", - "pathe": "^2.0.3", - "vite": "^7.3.1" - }, - "bin": { - "vite-node": "dist/cli.mjs" - }, - "engines": { - "node": "^20.19.0 || >=22.12.0" - }, - "funding": { - "url": "https://opencollective.com/antfu" - } - }, - "node_modules/vite-node/node_modules/@esbuild/linux-x64": { - "version": "0.27.7", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/vite-node/node_modules/esbuild": { - "version": "0.27.7", - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.27.7", - "@esbuild/android-arm": "0.27.7", - "@esbuild/android-arm64": "0.27.7", - "@esbuild/android-x64": "0.27.7", - "@esbuild/darwin-arm64": "0.27.7", - "@esbuild/darwin-x64": "0.27.7", - "@esbuild/freebsd-arm64": "0.27.7", - "@esbuild/freebsd-x64": "0.27.7", - "@esbuild/linux-arm": "0.27.7", - "@esbuild/linux-arm64": "0.27.7", - "@esbuild/linux-ia32": "0.27.7", - "@esbuild/linux-loong64": "0.27.7", - "@esbuild/linux-mips64el": "0.27.7", - "@esbuild/linux-ppc64": "0.27.7", - "@esbuild/linux-riscv64": "0.27.7", - "@esbuild/linux-s390x": "0.27.7", - "@esbuild/linux-x64": "0.27.7", - "@esbuild/netbsd-arm64": "0.27.7", - "@esbuild/netbsd-x64": "0.27.7", - "@esbuild/openbsd-arm64": "0.27.7", - "@esbuild/openbsd-x64": "0.27.7", - "@esbuild/openharmony-arm64": "0.27.7", - "@esbuild/sunos-x64": "0.27.7", - "@esbuild/win32-arm64": "0.27.7", - "@esbuild/win32-ia32": "0.27.7", - "@esbuild/win32-x64": "0.27.7" - } - }, - "node_modules/vite-node/node_modules/vite": { - "version": "7.3.3", - "license": "MIT", - "dependencies": { - "esbuild": "^0.27.0", - "fdir": "^6.5.0", - "picomatch": "^4.0.3", - "postcss": "^8.5.6", - "rollup": "^4.43.0", - "tinyglobby": "^0.2.15" - }, - "bin": { - "vite": "bin/vite.js" - }, - "engines": { - "node": "^20.19.0 || >=22.12.0" - }, - "funding": { - "url": "https://github.com/vitejs/vite?sponsor=1" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - }, - "peerDependencies": { - "@types/node": "^20.19.0 || >=22.12.0", - "jiti": ">=1.21.0", - "less": "^4.0.0", - "lightningcss": "^1.21.0", - "sass": "^1.70.0", - "sass-embedded": "^1.70.0", - "stylus": ">=0.54.8", - "sugarss": "^5.0.0", - "terser": "^5.16.0", - "tsx": "^4.8.1", - "yaml": "^2.4.2" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - }, - "jiti": { - "optional": true - }, - "less": { - "optional": true - }, - "lightningcss": { - "optional": true - }, - "sass": { - "optional": true - }, - "sass-embedded": { - "optional": true - }, - "stylus": { - "optional": true - }, - "sugarss": { - "optional": true - }, - "terser": { - "optional": true - }, - "tsx": { - "optional": true - }, - "yaml": { - "optional": true - } - } - }, - "node_modules/vite-plugin-checker": { - "version": "0.13.0", - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.27.1", - "chokidar": "^4.0.3", - "npm-run-path": "^6.0.0", - "picocolors": "^1.1.1", - "picomatch": "^4.0.4", - "proper-lockfile": "^4.1.2", - "tiny-invariant": "^1.3.3", - "tinyglobby": "^0.2.15", - "vscode-uri": "^3.1.0" - }, - "engines": { - "node": ">=16.11" - }, - "peerDependencies": { - "@biomejs/biome": ">=1.7", - "eslint": ">=9.39.4", - "meow": "^13.2.0 || ^14.0.0", - "optionator": "^0.9.4", - "oxlint": ">=1", - "stylelint": ">=16.26.1", - "typescript": "*", - "vite": ">=5.4.21", - "vls": "*", - "vti": "*", - "vue-tsc": "~2.2.10 || ^3.0.0" - }, - "peerDependenciesMeta": { - "@biomejs/biome": { - "optional": true - }, - "eslint": { - "optional": true - }, - "meow": { - "optional": true - }, - "optionator": { - "optional": true - }, - "oxlint": { - "optional": true - }, - "stylelint": { - "optional": true - }, - "typescript": { - "optional": true - }, - "vls": { - "optional": true - }, - "vti": { - "optional": true - }, - "vue-tsc": { - "optional": true - } - } - }, - "node_modules/vite-plugin-checker/node_modules/chokidar": { - "version": "4.0.3", - "license": "MIT", - "dependencies": { - "readdirp": "^4.0.1" - }, - "engines": { - "node": ">= 14.16.0" - }, - "funding": { - "url": "https://paulmillr.com/funding/" - } - }, - "node_modules/vite-plugin-checker/node_modules/npm-run-path": { - "version": "6.0.0", - "license": "MIT", - "dependencies": { - "path-key": "^4.0.0", - "unicorn-magic": "^0.3.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/vite-plugin-checker/node_modules/path-key": { - "version": "4.0.0", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/vite-plugin-checker/node_modules/readdirp": { - "version": "4.1.2", - "license": "MIT", - "engines": { - "node": ">= 14.18.0" - }, - "funding": { - "type": "individual", - "url": "https://paulmillr.com/funding/" - } - }, - "node_modules/vite-plugin-checker/node_modules/unicorn-magic": { - "version": "0.3.0", - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/vite-plugin-inspect": { - "version": "11.3.3", - "license": "MIT", - "dependencies": { - "ansis": "^4.1.0", - "debug": "^4.4.1", - "error-stack-parser-es": "^1.0.5", - "ohash": "^2.0.11", - "open": "^10.2.0", - "perfect-debounce": "^2.0.0", - "sirv": "^3.0.1", - "unplugin-utils": "^0.3.0", - "vite-dev-rpc": "^1.1.0" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - }, - "peerDependencies": { - "vite": "^6.0.0 || ^7.0.0-0" - }, - "peerDependenciesMeta": { - "@nuxt/kit": { - "optional": true - } - } - }, - "node_modules/vite-plugin-inspect/node_modules/open": { - "version": "10.2.0", - "license": "MIT", - "dependencies": { - "default-browser": "^5.2.1", - "define-lazy-prop": "^3.0.0", - "is-inside-container": "^1.0.0", - "wsl-utils": "^0.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/vite-plugin-inspect/node_modules/wsl-utils": { - "version": "0.1.0", - "license": "MIT", - "dependencies": { - "is-wsl": "^3.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/vite-plugin-vue-tracer": { - "version": "1.4.0", - "license": "MIT", - "dependencies": { - "estree-walker": "^3.0.3", - "exsolve": "^1.0.8", - "magic-string": "^0.30.21", - "pathe": "^2.0.3", - "source-map-js": "^1.2.1" - }, - "funding": { - "url": "https://github.com/sponsors/antfu" - }, - "peerDependencies": { - "vite": "^6.0.0 || ^7.0.0 || ^8.0.0-0", - "vue": "^3.5.0" - } - }, - "node_modules/vite-plugin-vue-tracer/node_modules/estree-walker": { - "version": "3.0.3", - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0" - } - }, - "node_modules/vite/node_modules/@esbuild/linux-x64": { - "version": "0.25.12", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "peer": true, - "engines": { - "node": ">=18" - } - }, - "node_modules/vite/node_modules/esbuild": { - "version": "0.25.12", - "hasInstallScript": true, - "license": "MIT", - "peer": true, - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.25.12", - "@esbuild/android-arm": "0.25.12", - "@esbuild/android-arm64": "0.25.12", - "@esbuild/android-x64": "0.25.12", - "@esbuild/darwin-arm64": "0.25.12", - "@esbuild/darwin-x64": "0.25.12", - "@esbuild/freebsd-arm64": "0.25.12", - "@esbuild/freebsd-x64": "0.25.12", - "@esbuild/linux-arm": "0.25.12", - "@esbuild/linux-arm64": "0.25.12", - "@esbuild/linux-ia32": "0.25.12", - "@esbuild/linux-loong64": "0.25.12", - "@esbuild/linux-mips64el": "0.25.12", - "@esbuild/linux-ppc64": "0.25.12", - "@esbuild/linux-riscv64": "0.25.12", - "@esbuild/linux-s390x": "0.25.12", - "@esbuild/linux-x64": "0.25.12", - "@esbuild/netbsd-arm64": "0.25.12", - "@esbuild/netbsd-x64": "0.25.12", - "@esbuild/openbsd-arm64": "0.25.12", - "@esbuild/openbsd-x64": "0.25.12", - "@esbuild/openharmony-arm64": "0.25.12", - "@esbuild/sunos-x64": "0.25.12", - "@esbuild/win32-arm64": "0.25.12", - "@esbuild/win32-ia32": "0.25.12", - "@esbuild/win32-x64": "0.25.12" - } - }, - "node_modules/vitest": { - "version": "2.1.9", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/expect": "2.1.9", - "@vitest/mocker": "2.1.9", - "@vitest/pretty-format": "^2.1.9", - "@vitest/runner": "2.1.9", - "@vitest/snapshot": "2.1.9", - "@vitest/spy": "2.1.9", - "@vitest/utils": "2.1.9", - "chai": "^5.1.2", - "debug": "^4.3.7", - "expect-type": "^1.1.0", - "magic-string": "^0.30.12", - "pathe": "^1.1.2", - "std-env": "^3.8.0", - "tinybench": "^2.9.0", - "tinyexec": "^0.3.1", - "tinypool": "^1.0.1", - "tinyrainbow": "^1.2.0", - "vite": "^5.0.0", - "vite-node": "2.1.9", - "why-is-node-running": "^2.3.0" - }, - "bin": { - "vitest": "vitest.mjs" - }, - "engines": { - "node": "^18.0.0 || >=20.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - }, - "peerDependencies": { - "@edge-runtime/vm": "*", - "@types/node": "^18.0.0 || >=20.0.0", - "@vitest/browser": "2.1.9", - "@vitest/ui": "2.1.9", - "happy-dom": "*", - "jsdom": "*" - }, - "peerDependenciesMeta": { - "@edge-runtime/vm": { - "optional": true - }, - "@types/node": { - "optional": true - }, - "@vitest/browser": { - "optional": true - }, - "@vitest/ui": { - "optional": true - }, - "happy-dom": { - "optional": true - }, - "jsdom": { - "optional": true - } - } - }, - "node_modules/vitest/node_modules/@esbuild/linux-x64": { - "version": "0.21.5", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/vitest/node_modules/@vitest/mocker": { - "version": "2.1.9", - "dev": true, - "license": "MIT", - "dependencies": { - "@vitest/spy": "2.1.9", - "estree-walker": "^3.0.3", - "magic-string": "^0.30.12" - }, - "funding": { - "url": "https://opencollective.com/vitest" - }, - "peerDependencies": { - "msw": "^2.4.9", - "vite": "^5.0.0" - }, - "peerDependenciesMeta": { - "msw": { - "optional": true - }, - "vite": { - "optional": true - } - } - }, - "node_modules/vitest/node_modules/es-module-lexer": { - "version": "1.7.0", - "dev": true, - "license": "MIT" - }, - "node_modules/vitest/node_modules/esbuild": { - "version": "0.21.5", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=12" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.21.5", - "@esbuild/android-arm": "0.21.5", - "@esbuild/android-arm64": "0.21.5", - "@esbuild/android-x64": "0.21.5", - "@esbuild/darwin-arm64": "0.21.5", - "@esbuild/darwin-x64": "0.21.5", - "@esbuild/freebsd-arm64": "0.21.5", - "@esbuild/freebsd-x64": "0.21.5", - "@esbuild/linux-arm": "0.21.5", - "@esbuild/linux-arm64": "0.21.5", - "@esbuild/linux-ia32": "0.21.5", - "@esbuild/linux-loong64": "0.21.5", - "@esbuild/linux-mips64el": "0.21.5", - "@esbuild/linux-ppc64": "0.21.5", - "@esbuild/linux-riscv64": "0.21.5", - "@esbuild/linux-s390x": "0.21.5", - "@esbuild/linux-x64": "0.21.5", - "@esbuild/netbsd-x64": "0.21.5", - "@esbuild/openbsd-x64": "0.21.5", - "@esbuild/sunos-x64": "0.21.5", - "@esbuild/win32-arm64": "0.21.5", - "@esbuild/win32-ia32": "0.21.5", - "@esbuild/win32-x64": "0.21.5" - } - }, - "node_modules/vitest/node_modules/estree-walker": { - "version": "3.0.3", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/estree": "^1.0.0" - } - }, - "node_modules/vitest/node_modules/pathe": { - "version": "1.1.2", - "dev": true, - "license": "MIT" - }, - "node_modules/vitest/node_modules/std-env": { - "version": "3.10.0", - "dev": true, - "license": "MIT" - }, - "node_modules/vitest/node_modules/tinyexec": { - "version": "0.3.2", - "dev": true, - "license": "MIT" - }, - "node_modules/vitest/node_modules/vite": { - "version": "5.4.21", - "dev": true, - "license": "MIT", - "dependencies": { - "esbuild": "^0.21.3", - "postcss": "^8.4.43", - "rollup": "^4.20.0" - }, - "bin": { - "vite": "bin/vite.js" - }, - "engines": { - "node": "^18.0.0 || >=20.0.0" - }, - "funding": { - "url": "https://github.com/vitejs/vite?sponsor=1" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - }, - "peerDependencies": { - "@types/node": "^18.0.0 || >=20.0.0", - "less": "*", - "lightningcss": "^1.21.0", - "sass": "*", - "sass-embedded": "*", - "stylus": "*", - "sugarss": "*", - "terser": "^5.4.0" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - }, - "less": { - "optional": true - }, - "lightningcss": { - "optional": true - }, - "sass": { - "optional": true - }, - "sass-embedded": { - "optional": true - }, - "stylus": { - "optional": true - }, - "sugarss": { - "optional": true - }, - "terser": { - "optional": true - } - } - }, - "node_modules/vitest/node_modules/vite-node": { - "version": "2.1.9", - "dev": true, - "license": "MIT", - "dependencies": { - "cac": "^6.7.14", - "debug": "^4.3.7", - "es-module-lexer": "^1.5.4", - "pathe": "^1.1.2", - "vite": "^5.0.0" - }, - "bin": { - "vite-node": "vite-node.mjs" - }, - "engines": { - "node": "^18.0.0 || >=20.0.0" - }, - "funding": { - "url": "https://opencollective.com/vitest" - } - }, - "node_modules/vscode-uri": { - "version": "3.1.0", - "license": "MIT" - }, - "node_modules/vue": { - "version": "3.5.34", - "license": "MIT", - "dependencies": { - "@vue/compiler-dom": "3.5.34", - "@vue/compiler-sfc": "3.5.34", - "@vue/runtime-dom": "3.5.34", - "@vue/server-renderer": "3.5.34", - "@vue/shared": "3.5.34" - }, - "peerDependencies": { - "typescript": "*" - }, - "peerDependenciesMeta": { - "typescript": { - "optional": true - } - } - }, - "node_modules/vue-bundle-renderer": { - "version": "2.2.0", - "license": "MIT", - "dependencies": { - "ufo": "^1.6.1" - } - }, - "node_modules/vue-component-type-helpers": { - "version": "3.3.1", - "dev": true, - "license": "MIT" - }, - "node_modules/vue-devtools-stub": { - "version": "0.1.0", - "license": "MIT" - }, - "node_modules/vue-router": { - "version": "4.6.4", - "license": "MIT", - "dependencies": { - "@vue/devtools-api": "^6.6.4" - }, - "funding": { - "url": "https://github.com/sponsors/posva" - }, - "peerDependencies": { - "vue": "^3.5.0" - } - }, - "node_modules/vue-sonner": { - "version": "2.0.9", - "license": "MIT", - "peerDependencies": { - "@nuxt/kit": "^4.0.3", - "@nuxt/schema": "^4.0.3", - "nuxt": "^4.0.3" - }, - "peerDependenciesMeta": { - "@nuxt/kit": { - "optional": true - }, - "@nuxt/schema": { - "optional": true - }, - "nuxt": { - "optional": true - } - } - }, - "node_modules/vue-tsc": { - "version": "2.2.12", - "devOptional": true, - "license": "MIT", - "dependencies": { - "@volar/typescript": "2.4.15", - "@vue/language-core": "2.2.12" - }, - "bin": { - "vue-tsc": "bin/vue-tsc.js" - }, - "peerDependencies": { - "typescript": ">=5.0.0" - } - }, - "node_modules/w3c-keyname": { - "version": "2.2.8", - "license": "MIT" - }, - "node_modules/w3c-xmlserializer": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", - "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", - "license": "MIT", - "dependencies": { - "xml-name-validator": "^5.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/webidl-conversions": { - "version": "7.0.0", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=12" - } - }, - "node_modules/webpack-virtual-modules": { - "version": "0.6.2", - "license": "MIT" - }, - "node_modules/whatwg-mimetype": { - "version": "3.0.0", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - } - }, - "node_modules/whatwg-url": { - "version": "5.0.0", - "license": "MIT", - "dependencies": { - "tr46": "~0.0.3", - "webidl-conversions": "^3.0.0" - } - }, - "node_modules/whatwg-url/node_modules/webidl-conversions": { - "version": "3.0.1", - "license": "BSD-2-Clause" - }, - "node_modules/which": { - "version": "2.0.2", - "license": "ISC", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/why-is-node-running": { - "version": "2.3.0", - "dev": true, - "license": "MIT", - "dependencies": { - "siginfo": "^2.0.0", - "stackback": "0.0.2" - }, - "bin": { - "why-is-node-running": "cli.js" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wordwrapjs": { - "version": "5.1.1", - "license": "MIT", - "engines": { - "node": ">=12.17" - } - }, - "node_modules/wrap-ansi": { - "version": "8.1.0", - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.1.0", - "string-width": "^5.0.1", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs": { - "name": "wrap-ansi", - "version": "7.0.0", - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "license": "MIT" - }, - "node_modules/wrap-ansi-cjs/node_modules/string-width": { - "version": "4.2.3", - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi/node_modules/ansi-styles": { - "version": "6.2.3", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/ws": { - "version": "8.20.1", - "license": "MIT", - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, - "node_modules/wsl-utils": { - "version": "0.3.1", - "license": "MIT", - "dependencies": { - "is-wsl": "^3.1.0", - "powershell-utils": "^0.1.0" - }, - "engines": { - "node": ">=20" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/xml-name-validator": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", - "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", - "license": "Apache-2.0", - "engines": { - "node": ">=18" - } - }, - "node_modules/xmlchars": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", - "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", - "license": "MIT" - }, - "node_modules/y18n": { - "version": "5.0.8", - "license": "ISC", - "engines": { - "node": ">=10" - } - }, - "node_modules/yallist": { - "version": "3.1.1", - "license": "ISC" - }, - "node_modules/yaml": { - "version": "2.9.0", - "license": "ISC", - "bin": { - "yaml": "bin.mjs" - }, - "engines": { - "node": ">= 14.6" - }, - "funding": { - "url": "https://github.com/sponsors/eemeli" - } - }, - "node_modules/yargs": { - "version": "18.0.0", - "license": "MIT", - "dependencies": { - "cliui": "^9.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "string-width": "^7.2.0", - "y18n": "^5.0.5", - "yargs-parser": "^22.0.0" - }, - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=23" - } - }, - "node_modules/yargs-parser": { - "version": "22.0.0", - "license": "ISC", - "engines": { - "node": "^20.19.0 || ^22.12.0 || >=23" - } - }, - "node_modules/yargs/node_modules/emoji-regex": { - "version": "10.6.0", - "license": "MIT" - }, - "node_modules/yargs/node_modules/string-width": { - "version": "7.2.0", - "license": "MIT", - "dependencies": { - "emoji-regex": "^10.3.0", - "get-east-asian-width": "^1.0.0", - "strip-ansi": "^7.1.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/youch": { - "version": "4.1.1", - "license": "MIT", - "dependencies": { - "@poppinss/colors": "^4.1.6", - "@poppinss/dumper": "^0.7.0", - "@speed-highlight/core": "^1.2.14", - "cookie-es": "^3.0.1", - "youch-core": "^0.3.3" - } - }, - "node_modules/youch-core": { - "version": "0.3.3", - "license": "MIT", - "dependencies": { - "@poppinss/exception": "^1.2.2", - "error-stack-parser-es": "^1.0.5" - } - }, - "node_modules/zip-stream": { - "version": "6.0.1", - "license": "MIT", - "dependencies": { - "archiver-utils": "^5.0.0", - "compress-commons": "^6.0.2", - "readable-stream": "^4.0.0" - }, - "engines": { - "node": ">= 14" - } - } - } -} diff --git a/src/dev-ui/pnpm-lock.yaml b/src/dev-ui/pnpm-lock.yaml index 3e9f0d6d0..b5346af1d 100644 --- a/src/dev-ui/pnpm-lock.yaml +++ b/src/dev-ui/pnpm-lock.yaml @@ -71,9 +71,15 @@ importers: cytoscape-fcose: specifier: ^2.2.0 version: 2.2.0(cytoscape@3.33.1) + isomorphic-dompurify: + specifier: ^2.36.0 + version: 2.36.0 lucide-vue-next: specifier: ^0.563.0 version: 0.563.0(vue@3.5.28(typescript@5.9.3)) + marked: + specifier: ^15.0.12 + version: 15.0.12 nuxt: specifier: ^4.3.1 version: 4.3.1(@parcel/watcher@2.5.6)(@types/node@25.6.0)(@vue/compiler-sfc@3.5.28)(cac@6.7.14)(db0@0.3.4)(ioredis@5.9.3)(lightningcss@1.30.2)(magicast@0.5.2)(rollup@4.57.1)(terser@5.46.0)(typescript@5.9.3)(vite@7.3.1(@types/node@25.6.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.46.0)(yaml@2.8.2))(vue-tsc@2.2.12(typescript@5.9.3))(yaml@2.8.2) @@ -119,13 +125,30 @@ importers: version: 5.9.3 vitest: specifier: ^2.1.9 - version: 2.1.9(@types/node@25.6.0)(happy-dom@15.11.7)(lightningcss@1.30.2)(terser@5.46.0) + version: 2.1.9(@types/node@25.6.0)(happy-dom@15.11.7)(jsdom@28.1.0)(lightningcss@1.30.2)(terser@5.46.0) vue-tsc: specifier: ^2.2.10 version: 2.2.12(typescript@5.9.3) packages: + '@acemir/cssom@0.9.31': + resolution: {integrity: sha512-ZnR3GSaH+/vJ0YlHau21FjfLYjMpYVIzTD8M8vIEQvIGxeOXyXdzCI140rrCY862p/C/BbzWsjc1dgnM9mkoTA==} + + '@asamuzakjp/css-color@5.1.11': + resolution: {integrity: sha512-KVw6qIiCTUQhByfTd78h2yD1/00waTmm9uy/R7Ck/ctUyAPj+AEDLkQIdJW0T8+qGgj3j5bpNKK7Q3G+LedJWg==} + engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0} + + '@asamuzakjp/dom-selector@6.8.1': + resolution: {integrity: sha512-MvRz1nCqW0fsy8Qz4dnLIvhOlMzqDVBabZx6lH+YywFDdjXhMY37SmpV1XFX3JzG5GWHn63j6HX6QPr3lZXHvQ==} + + '@asamuzakjp/generational-cache@1.0.1': + resolution: {integrity: sha512-wajfB8KqzMCN2KGNFdLkReeHncd0AslUSrvHVvvYWuU8ghncRJoA50kT3zP9MVL0+9g4/67H+cdvBskj9THPzg==} + engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0} + + '@asamuzakjp/nwsapi@2.3.9': + resolution: {integrity: sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==} + '@babel/code-frame@7.29.0': resolution: {integrity: sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==} engines: {node: '>=6.9.0'} @@ -258,6 +281,10 @@ packages: commander: optional: true + '@bramus/specificity@2.4.2': + resolution: {integrity: sha512-ctxtJ/eA+t+6q2++vj5j7FYX3nRu311q1wfYH3xjlLOsczhlhxAg2FWNUXhpGvAw3BWo1xBcvOV6/YLc2r5FJw==} + hasBin: true + '@clack/core@1.0.0': resolution: {integrity: sha512-Orf9Ltr5NeiEuVJS8Rk2XTw3IxNC2Bic3ash7GgYeA8LJ/zmSNpSQ/m5UAhe03lA6KFgklzZ5KTHs4OAMA/SAQ==} @@ -302,6 +329,42 @@ packages: resolution: {integrity: sha512-i+N9lSpAjGLTUPelo/bKNbQnKPDqt3k2UnRlfIWe2Lrambc4J3QFgOfpR8AalQ/1tgLRoeNtVBZ1GPpsNqae5w==} engines: {node: '>=12.2.0', npm: '>=7.0.0'} + '@csstools/color-helpers@6.0.2': + resolution: {integrity: sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==} + engines: {node: '>=20.19.0'} + + '@csstools/css-calc@3.2.1': + resolution: {integrity: sha512-DtdHlgXh5ZkA43cwBcAm+huzgJiwx3ZTWVjBs94kwz2xKqSimDA3lBgCjphYgwgVUMWatSM0pDd8TILB1yrVVg==} + engines: {node: '>=20.19.0'} + peerDependencies: + '@csstools/css-parser-algorithms': ^4.0.0 + '@csstools/css-tokenizer': ^4.0.0 + + '@csstools/css-color-parser@4.1.1': + resolution: {integrity: sha512-eZ5XOtyhK+mggRafYUWzA0tvaYOFgdY8AkgQiCJF9qNAePnUo/zmsqqYubBBb3sQ8uNUaSKTY9s9klfRaAXL0g==} + engines: {node: '>=20.19.0'} + peerDependencies: + '@csstools/css-parser-algorithms': ^4.0.0 + '@csstools/css-tokenizer': ^4.0.0 + + '@csstools/css-parser-algorithms@4.0.0': + resolution: {integrity: sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w==} + engines: {node: '>=20.19.0'} + peerDependencies: + '@csstools/css-tokenizer': ^4.0.0 + + '@csstools/css-syntax-patches-for-csstree@1.1.4': + resolution: {integrity: sha512-wgsqt92b7C7tQhIdPNxj0n9zuUbQlvAuI1exyzeNrOKOi62SD7ren8zqszmpVREjAOqg8cD2FqYhQfAuKjk4sw==} + peerDependencies: + css-tree: ^3.2.1 + peerDependenciesMeta: + css-tree: + optional: true + + '@csstools/css-tokenizer@4.0.0': + resolution: {integrity: sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA==} + engines: {node: '>=20.19.0'} + '@duckdb/duckdb-wasm@1.29.1-dev260.0': resolution: {integrity: sha512-z9vhGFqazO7ZnRUUaQYhu5k/O/Lchk8vAzTuwHbPghS704b45fQClQMgYEZXfT+mgIeIrrXmxivkeo+rYpAjcQ==} @@ -617,6 +680,15 @@ packages: cpu: [x64] os: [win32] + '@exodus/bytes@1.15.1': + resolution: {integrity: sha512-S6mL0yNB/Abt9Ei4tq8gDhcczc4S3+vQ4ra7vxnAf+YHC02srtqxKKZghx2Dq6p0e66THKwR6r8N6P95wEty7Q==} + engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0} + peerDependencies: + '@noble/hashes': ^1.8.0 || ^2.0.0 + peerDependenciesMeta: + '@noble/hashes': + optional: true + '@floating-ui/core@1.7.4': resolution: {integrity: sha512-C3HlIdsBxszvm5McXlB8PeOEWfBhcGBTZGkGlWc2U0KFY5IwG5OQEuQ8rq52DZmcHDlPLd+YFBK+cZcytwIFWg==} @@ -1983,6 +2055,9 @@ packages: resolution: {integrity: sha512-ipDqC8FrAl/76p2SSWKSI+H9tFwm7vYqXQrItCuiVPt26Km0jS+NzSsBWAaBusvSbQcfJG+JitdMm+wZAgTYqg==} hasBin: true + bidi-js@1.0.3: + resolution: {integrity: sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==} + binary-search-bounds@2.0.5: resolution: {integrity: sha512-H0ea4Fd3lS1+sTEB2TgcLoK21lLhwEJzlQv3IN47pJS976Gx4zoWe0ak3q+uYh60ppQxg9F16Ri4tS1sfD4+jA==} @@ -2247,6 +2322,10 @@ packages: resolution: {integrity: sha512-0LrrStPOdJj+SPCCrGhzryycLjwcgUSHBtxNA8aIDxf0GLsRh1cKYhB00Gd1lDOS4yGH69+SNn13+TWbVHETFQ==} engines: {node: ^10 || ^12.20.0 || ^14.13.0 || >=15.0.0, npm: '>=7.0.0'} + cssstyle@6.2.0: + resolution: {integrity: sha512-Fm5NvhYathRnXNVndkUsCCuR63DCLVVwGOOwQw782coXFi5HhkXdu289l59HlXZBawsyNccXfWRYvLzcDCdDig==} + engines: {node: '>=20'} + csstype@3.2.3: resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==} @@ -2391,6 +2470,10 @@ packages: resolution: {integrity: sha512-e1U46jVP+w7Iut8Jt8ri1YsPOvFpg46k+K8TpCb0P+zjCkjkPnV7WzfDJzMHy1LnA+wj5pLT1wjO901gLXeEhA==} engines: {node: '>=12'} + data-urls@7.0.0: + resolution: {integrity: sha512-23XHcCF+coGYevirZceTVD7NdJOqVn+49IHyxgszm+JIiHLoB2TkmPtsYkNWT1pvRSGkc35L6NHs0yHkN2SumA==} + engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0} + db0@0.3.4: resolution: {integrity: sha512-RiXXi4WaNzPTHEOu8UPQKMooIbqOEyqA1t7Z6MsdxSCeb8iUC9ko3LcmsLmeUt2SM5bctfArZKkRQggKZz7JNw==} peerDependencies: @@ -2426,6 +2509,9 @@ packages: supports-color: optional: true + decimal.js@10.6.0: + resolution: {integrity: sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==} + deep-eql@5.0.2: resolution: {integrity: sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==} engines: {node: '>=6'} @@ -2541,6 +2627,10 @@ packages: resolution: {integrity: sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==} engines: {node: '>=0.12'} + entities@8.0.0: + resolution: {integrity: sha512-zwfzJecQ/Uej6tusMqwAqU/6KL2XaB2VZ2Jg54Je6ahNBGNH6Ek6g3jjNCF0fG9EWQKGZNddNjU5F1ZQn/sBnA==} + engines: {node: '>=20.19.0'} + error-stack-parser-es@1.0.5: resolution: {integrity: sha512-5qucVt2XcuGMcEGgWI7i+yZpmpByQ8J1lHhcL7PwqCwu9FPP3VUXzT4ltHe5i2z9dePwEHcDVOAfSnHsOlCXRA==} @@ -2750,10 +2840,18 @@ packages: hookable@6.0.1: resolution: {integrity: sha512-uKGyY8BuzN/a5gvzvA+3FVWo0+wUjgtfSdnmjtrOVwQCZPHpHDH2WRO3VZSOeluYrHoDCiXFffZXs8Dj1ULWtw==} + html-encoding-sniffer@6.0.0: + resolution: {integrity: sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg==} + engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0} + http-errors@2.0.1: resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==} engines: {node: '>= 0.8'} + http-proxy-agent@7.0.2: + resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} + engines: {node: '>= 14'} + http-shutdown@1.2.2: resolution: {integrity: sha512-S9wWkJ/VSY9/k4qcjG318bqJNruzE4HySUhFYknwmu6LBP97KLLfwNf+n4V1BHurvFNkSKLFnK/RsuUnRTf9Vw==} engines: {iojs: '>= 1.0.0', node: '>= 0.12.0'} @@ -2860,6 +2958,9 @@ packages: resolution: {integrity: sha512-lJJV/5dYS+RcL8uQdBDW9c9uWFLLBNRyFhnAKXw5tVqLlKZ4RMGZKv+YQ/IA3OhD+RpbJa1LLFM1FQPGyIXvOA==} engines: {node: '>=12'} + is-potential-custom-element-name@1.0.1: + resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==} + is-reference@1.2.1: resolution: {integrity: sha512-U82MsXXiFIrjCK4otLT+o2NA2Cd2g5MLoOVXUZjIOhLurrRxpEXzI8O0KZHr3IjLvlAH1kTPYSuqer5T9ZVBKQ==} @@ -2900,6 +3001,10 @@ packages: isoformat@0.2.1: resolution: {integrity: sha512-tFLRAygk9NqrRPhJSnNGh7g7oaVWDwR0wKh/GM2LgmPa50Eg4UfyaCO4I8k6EqJHl1/uh2RAD6g06n5ygEnrjQ==} + isomorphic-dompurify@2.36.0: + resolution: {integrity: sha512-E8YkGyPY3a/U5s0WOoc8Ok+3SWL/33yn2IHCoxCFLBUUPVy9WGa++akJZFxQCcJIhI+UvYhbrbnTIFQkHKZbgA==} + engines: {node: '>=20.19.5'} + jackspeak@3.4.3: resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==} @@ -2922,6 +3027,15 @@ packages: js-tokens@9.0.1: resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==} + jsdom@28.1.0: + resolution: {integrity: sha512-0+MoQNYyr2rBHqO1xilltfDjV9G7ymYGlAUazgcDLQaUf8JDHbuGwsxN6U9qWaElZ4w1B2r7yEGIL3GdeW3Rug==} + engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0} + peerDependencies: + canvas: ^3.0.0 + peerDependenciesMeta: + canvas: + optional: true + jsesc@3.1.0: resolution: {integrity: sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==} engines: {node: '>=6'} @@ -3103,6 +3217,11 @@ packages: magicast@0.5.2: resolution: {integrity: sha512-E3ZJh4J3S9KfwdjZhe2afj6R9lGIN5Pher1pF39UGrXRqq/VDaGVIGN13BjHd2u8B61hArAGOnso7nBOouW3TQ==} + marked@15.0.12: + resolution: {integrity: sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==} + engines: {node: '>= 18'} + hasBin: true + mdn-data@2.0.28: resolution: {integrity: sha512-aylIc7Z9y4yzHYAJNuESG3hfhC+0Ibp/MAMiaOZgNv4pmEdFyfZhhhny4MNiAfWdBQ1RQ2mfDWmM1x8SvGyp8g==} @@ -3327,6 +3446,9 @@ packages: package-json-from-dist@1.0.1: resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==} + parse5@8.0.1: + resolution: {integrity: sha512-z1e/HMG90obSGeidlli3hj7cbocou0/wa5HacvI3ASx34PecNjNQeaHNo5WIZpWofN9kgkqV1q5YvXe3F0FoPw==} + parseurl@1.3.3: resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==} engines: {node: '>= 0.8'} @@ -3574,6 +3696,10 @@ packages: proto-list@1.2.4: resolution: {integrity: sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA==} + punycode@2.3.1: + resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==} + engines: {node: '>=6'} + quansync@0.2.11: resolution: {integrity: sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA==} @@ -3642,6 +3768,10 @@ packages: resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} engines: {node: '>=0.10.0'} + require-from-string@2.0.2: + resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} + engines: {node: '>=0.10.0'} + resolve-from@5.0.0: resolution: {integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==} engines: {node: '>=8'} @@ -3705,6 +3835,10 @@ packages: resolution: {integrity: sha512-1n3r/tGXO6b6VXMdFT54SHzT9ytu9yr7TaELowdYpMqY/Ao7EnlQGmAQ1+RatX7Tkkdm6hONI2owqNx2aZj5Sw==} engines: {node: '>=11.0.0'} + saxes@6.0.0: + resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==} + engines: {node: '>=v12.22.7'} + scule@1.3.0: resolution: {integrity: sha512-6FtHJEvt+pVMIB9IBY+IcCJ6Z5f1iQnytgyfKMhDKgmzYG+TeH/wx1y3l27rshSbLiSanrR9ffZDrEsmjlQF2g==} @@ -3880,6 +4014,9 @@ packages: engines: {node: '>=16'} hasBin: true + symbol-tree@3.2.4: + resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==} + system-architecture@0.1.0: resolution: {integrity: sha512-ulAk51I9UVUyJgxlv9M6lFot2WP3e7t8Kz9+IS6D4rVba1tR9kON+Ey69f+1R4Q8cd45Lod6a4IcJIxnzGc/zA==} engines: {node: '>=18'} @@ -3946,6 +4083,13 @@ packages: resolution: {integrity: sha512-n1cw8k1k0x4pgA2+9XrOkFydTerNcJ1zWCO5Nn9scWHTD+5tp8dghT2x1uduQePZTZgd3Tupf+x9BxJjeJi77Q==} engines: {node: '>=14.0.0'} + tldts-core@7.4.2: + resolution: {integrity: sha512-nwEyF4vl4RSJjwSjBUmOSxc3BFPoIFdlRthJ6e+5v9P3bHNsoD06UjuqMUspqp7vsEZ1beaHi1km+optiE17yA==} + + tldts@7.4.2: + resolution: {integrity: sha512-kCwffuaH8ntKtygnWe1b4BJKWiCUH30n5KfoTr6IchcXOwR7chAOFJxFrH3vjANafUYrIA4a7SDL+nn7SiR4Sw==} + hasBin: true + to-regex-range@5.0.1: resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} engines: {node: '>=8.0'} @@ -3958,9 +4102,17 @@ packages: resolution: {integrity: sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==} engines: {node: '>=6'} + tough-cookie@6.0.1: + resolution: {integrity: sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==} + engines: {node: '>=16'} + tr46@0.0.3: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + tr46@6.0.0: + resolution: {integrity: sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==} + engines: {node: '>=20'} + tslib@2.8.1: resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} @@ -4005,6 +4157,10 @@ packages: undici-types@7.19.2: resolution: {integrity: sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==} + undici@7.27.0: + resolution: {integrity: sha512-+t2Z/GwkZQDtu00813aP66ygViGtPHKhhoFZpQKpKrE+9jIgES+Zw+mFNaDWOVRKiuJjuqKHzD3B1sfGg8+ZOQ==} + engines: {node: '>=20.18.1'} + unenv@2.0.0-rc.24: resolution: {integrity: sha512-i7qRCmY42zmCwnYlh9H2SvLEypEFGye5iRmEMKjcGi7zk9UquigRjFtTLz0TYqr0ZGLZhaMHl/foy1bZR+Cwlw==} @@ -4361,6 +4517,10 @@ packages: w3c-keyname@2.2.8: resolution: {integrity: sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==} + w3c-xmlserializer@5.0.0: + resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==} + engines: {node: '>=18'} + webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} @@ -4368,6 +4528,10 @@ packages: resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==} engines: {node: '>=12'} + webidl-conversions@8.0.1: + resolution: {integrity: sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==} + engines: {node: '>=20'} + webpack-virtual-modules@0.6.2: resolution: {integrity: sha512-66/V2i5hQanC51vBQKPH4aI8NMAcBW59FVBs+rC7eGHupMyfn34q7rZIE+ETlJ+XTevqfUhVVBgSUNSW2flEUQ==} @@ -4375,6 +4539,14 @@ packages: resolution: {integrity: sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==} engines: {node: '>=12'} + whatwg-mimetype@5.0.0: + resolution: {integrity: sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==} + engines: {node: '>=20'} + + whatwg-url@16.0.1: + resolution: {integrity: sha512-1to4zXBxmXHV3IiSSEInrreIlu02vUOvrhxJJH5vcxYTBDAx51cqZiKdyTxlecdKNSjj8EcxGBxNf6Vg+945gw==} + engines: {node: ^20.19.0 || ^22.12.0 || >=24.0.0} + whatwg-url@5.0.0: resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} @@ -4421,6 +4593,13 @@ packages: resolution: {integrity: sha512-h3Fbisa2nKGPxCpm89Hk33lBLsnaGBvctQopaBSOW/uIs6FTe1ATyAnKFJrzVs9vpGdsTe73WF3V4lIsk4Gacw==} engines: {node: '>=18'} + xml-name-validator@5.0.0: + resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==} + engines: {node: '>=18'} + + xmlchars@2.2.0: + resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==} + y18n@5.0.8: resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} engines: {node: '>=10'} @@ -4457,6 +4636,28 @@ packages: snapshots: + '@acemir/cssom@0.9.31': {} + + '@asamuzakjp/css-color@5.1.11': + dependencies: + '@asamuzakjp/generational-cache': 1.0.1 + '@csstools/css-calc': 3.2.1(@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0))(@csstools/css-tokenizer@4.0.0) + '@csstools/css-color-parser': 4.1.1(@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0))(@csstools/css-tokenizer@4.0.0) + '@csstools/css-parser-algorithms': 4.0.0(@csstools/css-tokenizer@4.0.0) + '@csstools/css-tokenizer': 4.0.0 + + '@asamuzakjp/dom-selector@6.8.1': + dependencies: + '@asamuzakjp/nwsapi': 2.3.9 + bidi-js: 1.0.3 + css-tree: 3.1.0 + is-potential-custom-element-name: 1.0.1 + lru-cache: 11.2.6 + + '@asamuzakjp/generational-cache@1.0.1': {} + + '@asamuzakjp/nwsapi@2.3.9': {} + '@babel/code-frame@7.29.0': dependencies: '@babel/helper-validator-identifier': 7.28.5 @@ -4629,6 +4830,10 @@ snapshots: cac: 6.7.14 citty: 0.2.1 + '@bramus/specificity@2.4.2': + dependencies: + css-tree: 3.1.0 + '@clack/core@1.0.0': dependencies: picocolors: 1.1.1 @@ -4740,6 +4945,30 @@ snapshots: random: 4.1.0 regl: 2.1.1 + '@csstools/color-helpers@6.0.2': {} + + '@csstools/css-calc@3.2.1(@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0))(@csstools/css-tokenizer@4.0.0)': + dependencies: + '@csstools/css-parser-algorithms': 4.0.0(@csstools/css-tokenizer@4.0.0) + '@csstools/css-tokenizer': 4.0.0 + + '@csstools/css-color-parser@4.1.1(@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0))(@csstools/css-tokenizer@4.0.0)': + dependencies: + '@csstools/color-helpers': 6.0.2 + '@csstools/css-calc': 3.2.1(@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0))(@csstools/css-tokenizer@4.0.0) + '@csstools/css-parser-algorithms': 4.0.0(@csstools/css-tokenizer@4.0.0) + '@csstools/css-tokenizer': 4.0.0 + + '@csstools/css-parser-algorithms@4.0.0(@csstools/css-tokenizer@4.0.0)': + dependencies: + '@csstools/css-tokenizer': 4.0.0 + + '@csstools/css-syntax-patches-for-csstree@1.1.4(css-tree@3.1.0)': + optionalDependencies: + css-tree: 3.1.0 + + '@csstools/css-tokenizer@4.0.0': {} + '@duckdb/duckdb-wasm@1.29.1-dev260.0': dependencies: apache-arrow: 17.0.0 @@ -4923,6 +5152,8 @@ snapshots: '@esbuild/win32-x64@0.27.3': optional: true + '@exodus/bytes@1.15.1': {} + '@floating-ui/core@1.7.4': dependencies: '@floating-ui/utils': 0.2.10 @@ -6287,6 +6518,10 @@ snapshots: baseline-browser-mapping@2.9.19: {} + bidi-js@1.0.3: + dependencies: + require-from-string: 2.0.2 + binary-search-bounds@2.0.5: {} bindings@1.5.0: @@ -6589,6 +6824,13 @@ snapshots: dependencies: css-tree: 2.2.1 + cssstyle@6.2.0: + dependencies: + '@asamuzakjp/css-color': 5.1.11 + '@csstools/css-syntax-patches-for-csstree': 1.1.4(css-tree@3.1.0) + css-tree: 3.1.0 + lru-cache: 11.2.6 + csstype@3.2.3: {} cytoscape-cise@2.0.1(cytoscape@3.33.1): @@ -6756,6 +6998,13 @@ snapshots: d3-transition: 3.0.1(d3-selection@3.0.0) d3-zoom: 3.0.0 + data-urls@7.0.0: + dependencies: + whatwg-mimetype: 5.0.0 + whatwg-url: 16.0.1 + transitivePeerDependencies: + - '@noble/hashes' + db0@0.3.4: {} de-indent@1.0.2: {} @@ -6764,6 +7013,8 @@ snapshots: dependencies: ms: 2.1.3 + decimal.js@10.6.0: {} + deep-eql@5.0.2: {} deepmerge@4.3.1: {} @@ -6855,6 +7106,8 @@ snapshots: entities@7.0.1: {} + entities@8.0.0: {} + error-stack-parser-es@1.0.5: {} errx@0.1.0: {} @@ -7099,6 +7352,12 @@ snapshots: hookable@6.0.1: {} + html-encoding-sniffer@6.0.0: + dependencies: + '@exodus/bytes': 1.15.1 + transitivePeerDependencies: + - '@noble/hashes' + http-errors@2.0.1: dependencies: depd: 2.0.0 @@ -7107,6 +7366,13 @@ snapshots: statuses: 2.0.2 toidentifier: 1.0.1 + http-proxy-agent@7.0.2: + dependencies: + agent-base: 7.1.4 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + http-shutdown@1.2.2: {} https-proxy-agent@7.0.6: @@ -7201,6 +7467,8 @@ snapshots: is-path-inside@4.0.0: {} + is-potential-custom-element-name@1.0.1: {} + is-reference@1.2.1: dependencies: '@types/estree': 1.0.8 @@ -7231,6 +7499,15 @@ snapshots: isoformat@0.2.1: {} + isomorphic-dompurify@2.36.0: + dependencies: + dompurify: 3.4.2 + jsdom: 28.1.0 + transitivePeerDependencies: + - '@noble/hashes' + - canvas + - supports-color + jackspeak@3.4.3: dependencies: '@isaacs/cliui': 8.0.2 @@ -7253,6 +7530,33 @@ snapshots: js-tokens@9.0.1: {} + jsdom@28.1.0: + dependencies: + '@acemir/cssom': 0.9.31 + '@asamuzakjp/dom-selector': 6.8.1 + '@bramus/specificity': 2.4.2 + '@exodus/bytes': 1.15.1 + cssstyle: 6.2.0 + data-urls: 7.0.0 + decimal.js: 10.6.0 + html-encoding-sniffer: 6.0.0 + http-proxy-agent: 7.0.2 + https-proxy-agent: 7.0.6 + is-potential-custom-element-name: 1.0.1 + parse5: 8.0.1 + saxes: 6.0.0 + symbol-tree: 3.2.4 + tough-cookie: 6.0.1 + undici: 7.27.0 + w3c-xmlserializer: 5.0.0 + webidl-conversions: 8.0.1 + whatwg-mimetype: 5.0.0 + whatwg-url: 16.0.1 + xml-name-validator: 5.0.0 + transitivePeerDependencies: + - '@noble/hashes' + - supports-color + jsesc@3.1.0: {} json-bignum@0.0.3: {} @@ -7410,6 +7714,8 @@ snapshots: '@babel/types': 7.29.0 source-map-js: 1.2.1 + marked@15.0.12: {} + mdn-data@2.0.28: {} mdn-data@2.12.2: {} @@ -7861,6 +8167,10 @@ snapshots: package-json-from-dist@1.0.1: {} + parse5@8.0.1: + dependencies: + entities: 8.0.0 + parseurl@1.3.3: {} path-browserify@1.0.1: {} @@ -8082,6 +8392,8 @@ snapshots: proto-list@1.2.4: {} + punycode@2.3.1: {} + quansync@0.2.11: {} queue-microtask@1.2.3: {} @@ -8162,6 +8474,8 @@ snapshots: require-directory@2.1.1: {} + require-from-string@2.0.2: {} + resolve-from@5.0.0: {} resolve@1.22.11: @@ -8234,6 +8548,10 @@ snapshots: sax@1.4.4: {} + saxes@6.0.0: + dependencies: + xmlchars: 2.2.0 + scule@1.3.0: {} seedrandom@3.0.5: {} @@ -8409,6 +8727,8 @@ snapshots: picocolors: 1.1.1 sax: 1.4.4 + symbol-tree@3.2.4: {} + system-architecture@0.1.0: {} table-layout@4.1.1: @@ -8473,6 +8793,12 @@ snapshots: tinyspy@3.0.2: {} + tldts-core@7.4.2: {} + + tldts@7.4.2: + dependencies: + tldts-core: 7.4.2 + to-regex-range@5.0.1: dependencies: is-number: 7.0.0 @@ -8481,8 +8807,16 @@ snapshots: totalist@3.0.1: {} + tough-cookie@6.0.1: + dependencies: + tldts: 7.4.2 + tr46@0.0.3: {} + tr46@6.0.0: + dependencies: + punycode: 2.3.1 + tslib@2.8.1: {} tw-animate-css@1.4.0: {} @@ -8517,6 +8851,8 @@ snapshots: undici-types@7.19.2: optional: true + undici@7.27.0: {} + unenv@2.0.0-rc.24: dependencies: pathe: 2.0.3 @@ -8758,7 +9094,7 @@ snapshots: terser: 5.46.0 yaml: 2.8.2 - vitest@2.1.9(@types/node@25.6.0)(happy-dom@15.11.7)(lightningcss@1.30.2)(terser@5.46.0): + vitest@2.1.9(@types/node@25.6.0)(happy-dom@15.11.7)(jsdom@28.1.0)(lightningcss@1.30.2)(terser@5.46.0): dependencies: '@vitest/expect': 2.1.9 '@vitest/mocker': 2.1.9(vite@5.4.21(@types/node@25.6.0)(lightningcss@1.30.2)(terser@5.46.0)) @@ -8783,6 +9119,7 @@ snapshots: optionalDependencies: '@types/node': 25.6.0 happy-dom: 15.11.7 + jsdom: 28.1.0 transitivePeerDependencies: - less - lightningcss @@ -8837,14 +9174,30 @@ snapshots: w3c-keyname@2.2.8: {} + w3c-xmlserializer@5.0.0: + dependencies: + xml-name-validator: 5.0.0 + webidl-conversions@3.0.1: {} webidl-conversions@7.0.0: {} + webidl-conversions@8.0.1: {} + webpack-virtual-modules@0.6.2: {} whatwg-mimetype@3.0.0: {} + whatwg-mimetype@5.0.0: {} + + whatwg-url@16.0.1: + dependencies: + '@exodus/bytes': 1.15.1 + tr46: 6.0.0 + webidl-conversions: 8.0.1 + transitivePeerDependencies: + - '@noble/hashes' + whatwg-url@5.0.0: dependencies: tr46: 0.0.3 @@ -8883,6 +9236,10 @@ snapshots: dependencies: is-wsl: 3.1.0 + xml-name-validator@5.0.0: {} + + xmlchars@2.2.0: {} + y18n@5.0.8: {} yallist@3.1.1: {} From f4e3aef21dc38b1da98302decc728b5edc5c39a5 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 15:15:58 -0400 Subject: [PATCH 094/153] fix(dev-ui): compact schema entities panel on graph management Remove the type filter block and tighten prepopulation guide and entity type rows so the artifact detail column fits without excessive height. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../GraphDesignEntitiesPanel.vue | 149 ++++++++---------- 1 file changed, 64 insertions(+), 85 deletions(-) diff --git a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue index 5496a5a5a..8397f974f 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue @@ -1,7 +1,7 @@ <script setup lang="ts"> import { computed, ref, watch } from 'vue' import { toast } from 'vue-sonner' -import { ChevronDown, Loader2, RefreshCw, Search } from 'lucide-vue-next' +import { ChevronDown, Loader2, RefreshCw } from 'lucide-vue-next' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' import { Button } from '@/components/ui/button' @@ -27,7 +27,6 @@ const { apiFetch } = useApiClient() const loading = ref(true) const data = ref<DesignArtifactsResponse | null>(null) -const filterText = ref('') const instancePage = ref<Record<string, number>>({}) async function fetchEntities() { @@ -61,20 +60,10 @@ const entityRows = computed((): DesignArtifactEntityType[] => { })) }) -const filteredRows = computed(() => { - const query = filterText.value.trim().toLowerCase() - if (!query) return entityRows.value - return entityRows.value.filter((row) => row.type.toLowerCase().includes(query)) -}) - function setInstancePage(typeKey: string, page: number) { instancePage.value = { ...instancePage.value, [typeKey]: page } } -watch(filterText, () => { - instancePage.value = {} -}) - watch( () => [props.kgId, props.reloadNonce] as const, () => { @@ -87,12 +76,12 @@ defineExpose({ refresh: fetchEntities }) </script> <template> - <div :class="embedded ? 'space-y-4' : 'mx-auto max-w-4xl space-y-6'"> - <div v-if="embedded" class="flex flex-wrap items-start justify-between gap-2 border-b pb-3"> - <div> - <h2 class="text-lg font-semibold tracking-tight">Entity ontology</h2> - <p class="text-xs text-muted-foreground"> - Canonical schema and live instances from the platform database for this knowledge graph. + <div :class="embedded ? 'space-y-2' : 'mx-auto max-w-4xl space-y-4'"> + <div v-if="embedded" class="flex flex-wrap items-center justify-between gap-2 border-b pb-2"> + <div class="min-w-0"> + <h2 class="text-sm font-semibold tracking-tight">Entity ontology</h2> + <p class="text-[11px] leading-snug text-muted-foreground"> + Schema types and instances for this knowledge graph. </p> </div> <div class="flex items-center gap-2"> @@ -128,137 +117,126 @@ defineExpose({ refresh: fetchEntities }) </Card> <template v-else> - <Card> - <CardHeader class="pb-3"> - <CardTitle class="text-base">Prepopulation strategy color guide</CardTitle> - <CardDescription> - Each entity type is color-coded by its prepopulation requirement. - </CardDescription> - </CardHeader> - <CardContent class="space-y-3 text-sm"> - <div class="flex flex-wrap gap-2"> - <Badge variant="outline" class="border-cyan-500/40 bg-cyan-500/10 text-cyan-700 dark:text-cyan-300"> - true - </Badge> - <Badge variant="outline" class="border-emerald-500/40 bg-emerald-500/10 text-emerald-700 dark:text-emerald-300"> - false - </Badge> - </div> - </CardContent> - </Card> - - <Card> - <CardHeader class="pb-3"> - <CardTitle class="text-base">Filter types</CardTitle> - </CardHeader> - <CardContent> - <div class="relative"> - <Search class="absolute left-3 top-1/2 size-4 -translate-y-1/2 text-muted-foreground" /> - <input - v-model="filterText" - type="search" - placeholder="Search by entity type name…" - class="flex h-10 w-full rounded-md border border-input bg-background pl-9 pr-3 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" - /> - </div> - </CardContent> - </Card> - - <div class="space-y-3"> - <p v-if="filteredRows.length === 0" class="py-4 text-center text-sm text-muted-foreground"> - No entity types match your search. - </p> + <div + class="flex flex-wrap items-center gap-2 rounded-md border bg-muted/25 px-2.5 py-1.5 text-xs" + role="note" + aria-label="Prepopulation strategy color guide" + > + <span class="font-medium text-muted-foreground">Prepopulation colors:</span> + <Badge + variant="outline" + class="h-5 border-cyan-500/40 bg-cyan-500/10 px-1.5 text-[10px] text-cyan-700 dark:text-cyan-300" + > + true + </Badge> + <Badge + variant="outline" + class="h-5 border-emerald-500/40 bg-emerald-500/10 px-1.5 text-[10px] text-emerald-700 dark:text-emerald-300" + > + false + </Badge> + </div> + <div class="space-y-1.5"> <Card - v-for="row in filteredRows" + v-for="row in entityRows" :key="row.type" :class="['overflow-hidden', prepopulationCardClass(row.prepopulated_instances)]" > <details class="group"> - <summary class="flex cursor-pointer list-none items-start gap-3 p-4 [&::-webkit-details-marker]:hidden"> + <summary + class="flex cursor-pointer list-none items-center gap-2 px-2.5 py-2 [&::-webkit-details-marker]:hidden" + > <ChevronDown - class="mt-0.5 size-4 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" + class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" /> - <div class="flex min-w-0 flex-1 flex-wrap items-center gap-2"> - <span class="text-base font-semibold">{{ row.type }}</span> - <Badge variant="outline" :class="prepopulationBadgeClass(row.prepopulated_instances)"> + <div class="flex min-w-0 flex-1 flex-wrap items-center gap-1.5"> + <span class="text-sm font-semibold leading-tight">{{ row.type }}</span> + <Badge + variant="outline" + class="h-5 px-1.5 text-[10px]" + :class="prepopulationBadgeClass(row.prepopulated_instances)" + > {{ prepopulationLabel(row.prepopulated_instances) }} </Badge> - <Badge variant="secondary"> + <Badge variant="secondary" class="h-5 px-1.5 text-[10px]"> {{ row.instance_count }} instance{{ row.instance_count === 1 ? '' : 's' }} </Badge> </div> </summary> - <div class="space-y-4 border-t px-4 pb-4 pt-0"> - <p v-if="row.description" class="pt-3 text-sm text-muted-foreground"> + <div class="space-y-2 border-t px-2.5 pb-2.5 pt-0"> + <p v-if="row.description" class="pt-2 text-xs leading-snug text-muted-foreground"> {{ row.description }} </p> - <div v-else class="pt-2 text-sm italic text-muted-foreground">No description</div> + <div v-else class="pt-1.5 text-xs italic text-muted-foreground">No description</div> - <div class="space-y-2"> - <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground">Properties</p> + <div class="space-y-1"> + <p class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> + Properties + </p> <div v-if="row.property_definitions && Object.keys(row.property_definitions).length > 0" - class="divide-y rounded-md border" + class="divide-y rounded-md border text-xs" > <div v-for="(label, key) in row.property_definitions" :key="key" - class="flex flex-wrap gap-x-2 gap-y-1 px-3 py-2 text-sm" + class="flex flex-wrap items-center gap-x-1.5 gap-y-0.5 px-2 py-1" > - <code class="rounded bg-muted px-1.5 py-0.5 font-mono text-xs">{{ key }}</code> + <code class="rounded bg-muted px-1 py-0.5 font-mono text-[10px]">{{ key }}</code> <span class="text-muted-foreground">{{ label }}</span> <Badge v-if="row.required_properties?.includes(String(key))" variant="outline" - class="h-5 text-[10px]" + class="h-4 px-1 text-[9px]" > required </Badge> <Badge v-else-if="row.optional_properties?.includes(String(key))" variant="outline" - class="h-5 text-[10px] opacity-70" + class="h-4 px-1 text-[9px] opacity-70" > optional </Badge> </div> </div> - <p v-else class="text-sm text-muted-foreground">No property definitions</p> + <p v-else class="text-xs text-muted-foreground">No property definitions</p> </div> - <details v-if="(row.instances?.length ?? 0) > 0" class="group/inst rounded-lg border"> + <details v-if="(row.instances?.length ?? 0) > 0" class="group/inst rounded-md border"> <summary - class="flex cursor-pointer list-none items-center gap-2 px-3 py-2 text-sm font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" + class="flex cursor-pointer list-none items-center gap-1.5 px-2 py-1.5 text-xs font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" > <ChevronDown - class="size-4 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" + class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" /> Instances </summary> - <div class="space-y-3 border-t p-3"> - <ul class="space-y-2 text-sm"> + <div class="space-y-2 border-t p-2"> + <ul class="space-y-1 text-xs"> <li v-for="(inst, idx) in pageSlice(instancePage, row.type, row.instances || []).items" :key="inst.slug ?? idx" - class="rounded-md bg-muted/40 px-3 py-2" + class="rounded-md bg-muted/40 px-2 py-1" > - <div class="mb-1 font-mono text-xs text-muted-foreground"> + <div class="mb-0.5 font-mono text-[10px] text-muted-foreground"> {{ inst.slug ?? '—' }} </div> - <pre class="whitespace-pre-wrap break-all text-xs">{{ + <pre class="max-h-24 overflow-y-auto whitespace-pre-wrap break-all text-[10px] leading-snug">{{ JSON.stringify(inst.properties ?? {}, null, 2) }}</pre> </li> </ul> <div v-if="pageSlice(instancePage, row.type, row.instances || []).total > 20" - class="flex flex-wrap items-center gap-2 pt-1" + class="flex flex-wrap items-center gap-1.5" @click.stop > <Button variant="outline" size="sm" + class="h-7 px-2 text-xs" :disabled="pageSlice(instancePage, row.type, row.instances || []).page <= 0" @click.stop.prevent="setInstancePage(row.type, pageSlice(instancePage, row.type, row.instances || []).page - 1)" > @@ -272,6 +250,7 @@ defineExpose({ refresh: fetchEntities }) <Button variant="outline" size="sm" + class="h-7 px-2 text-xs" :disabled=" pageSlice(instancePage, row.type, row.instances || []).page >= pageSlice(instancePage, row.type, row.instances || []).totalPages - 1 From 4687527916972df9c18751b1848ab1a7bb046391 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 15:19:12 -0400 Subject: [PATCH 095/153] fix(dev-ui): auto-grow GMA composer to fit full draft message Remove the 14-line cap so the chat textarea expands with content instead of scrolling inside a fixed-height field. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../extraction/SharedConversationPanel.vue | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index dcf835af8..f4749cdb1 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -147,12 +147,15 @@ function adjustTextareaHeight() { const lh = parseFloat(getComputedStyle(el).lineHeight) const line = Number.isFinite(lh) && lh > 0 ? lh : 21 const minH = Math.round(line * 2.5) - const maxH = Math.round(line * 14) el.style.height = '0' - const scrollH = el.scrollHeight - const h = Math.min(Math.max(scrollH, minH), maxH) - el.style.height = `${h}px` - el.style.overflowY = scrollH > maxH ? 'auto' : 'hidden' + el.style.height = `${Math.max(el.scrollHeight, minH)}px` + el.style.overflowY = 'hidden' +} + +function onComposerInput(event: Event) { + const target = event.target as HTMLTextAreaElement + emit('update:draftMessage', target.value) + adjustTextareaHeight() } function handleComposerEnter(event: KeyboardEvent) { @@ -404,7 +407,7 @@ onMounted(() => { :disabled="composerBlocked" :placeholder="inputPlaceholder" class="w-full flex-1 resize-none rounded-md border border-input bg-background px-3 py-2 text-sm leading-relaxed shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring disabled:opacity-50" - @input="emit('update:draftMessage', ($event.target as HTMLTextAreaElement).value)" + @input="onComposerInput" @keydown.enter="handleComposerEnter" /> <Button From 86765c63fd5e9f7ceee6e62d4d69dfcb016f1c43 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 15:35:12 -0400 Subject: [PATCH 096/153] fix(dev-ui): tighten graph management layout and mode controls Scroll design artifacts inside the detail column to avoid phantom page footer growth, compact the mode switcher and relationships panel, and lock document overflow at the app shell. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/dev-ui/app/assets/css/main.css | 3 - .../GraphDesignRelationshipsPanel.vue | 192 +++++++++++------- src/dev-ui/app/layouts/default.vue | 2 +- .../pages/knowledge-graphs/[kgId]/manage.vue | 52 ++--- .../knowledge-graph-manage-workspace.test.ts | 4 +- 5 files changed, 138 insertions(+), 115 deletions(-) diff --git a/src/dev-ui/app/assets/css/main.css b/src/dev-ui/app/assets/css/main.css index 4540ddb2c..99e1a934d 100644 --- a/src/dev-ui/app/assets/css/main.css +++ b/src/dev-ui/app/assets/css/main.css @@ -118,9 +118,6 @@ body, #__nuxt { height: 100%; - } - html, - body { overflow: hidden; } body { diff --git a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue index 3d6c824e7..718300d8c 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue @@ -1,12 +1,11 @@ <script setup lang="ts"> import { computed, ref, watch } from 'vue' import { toast } from 'vue-sonner' -import { ChevronDown, Loader2, RefreshCw, Search } from 'lucide-vue-next' -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { ChevronDown, Loader2, RefreshCw } from 'lucide-vue-next' +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' import { Button } from '@/components/ui/button' import { - type DesignArtifactRelationshipType, type DesignArtifactsResponse, pageSlice, prepopulationBadgeClass, @@ -27,7 +26,6 @@ const { apiFetch } = useApiClient() const loading = ref(true) const data = ref<DesignArtifactsResponse | null>(null) -const filterText = ref('') const instancePage = ref<Record<string, number>>({}) async function fetchRelationships() { @@ -55,28 +53,10 @@ async function fetchRelationships() { const relationshipRows = computed(() => data.value?.relationships ?? []) -const filteredRows = computed(() => { - const query = filterText.value.trim().toLowerCase() - if (!query) return relationshipRows.value - return relationshipRows.value.filter((rel) => { - return ( - rel.relationship_type.toLowerCase().includes(query) - || (rel.reverse_relationship_type ?? '').toLowerCase().includes(query) - || rel.source_entity_type.toLowerCase().includes(query) - || rel.target_entity_type.toLowerCase().includes(query) - || rel.key.toLowerCase().includes(query) - ) - }) -}) - function setInstancePage(key: string, page: number) { instancePage.value = { ...instancePage.value, [key]: page } } -watch(filterText, () => { - instancePage.value = {} -}) - watch( () => [props.kgId, props.reloadNonce] as const, () => { @@ -89,12 +69,12 @@ defineExpose({ refresh: fetchRelationships }) </script> <template> - <div :class="embedded ? 'space-y-4' : 'mx-auto max-w-4xl space-y-6'"> - <div v-if="embedded" class="flex flex-wrap items-start justify-between gap-2 border-b pb-3"> - <div> - <h2 class="text-lg font-semibold tracking-tight">Relationship ontology</h2> - <p class="text-xs text-muted-foreground"> - Canonical relationship types and live edge instances from the platform database. + <div :class="embedded ? 'space-y-2' : 'mx-auto max-w-4xl space-y-4'"> + <div v-if="embedded" class="flex flex-wrap items-center justify-between gap-2 border-b pb-2"> + <div class="min-w-0"> + <h2 class="text-sm font-semibold tracking-tight">Relationship ontology</h2> + <p class="text-[11px] leading-snug text-muted-foreground"> + Relationship types and instances for this knowledge graph. </p> </div> <div class="flex items-center gap-2"> @@ -126,103 +106,157 @@ defineExpose({ refresh: fetchRelationships }) </Card> <template v-else> - <Card> - <CardHeader class="pb-3"> - <CardTitle class="text-base">Filter types</CardTitle> - </CardHeader> - <CardContent> - <div class="relative"> - <Search class="absolute left-3 top-1/2 size-4 -translate-y-1/2 text-muted-foreground" /> - <input - v-model="filterText" - type="search" - placeholder="Search by relationship, source, or target type…" - class="flex h-10 w-full rounded-md border border-input bg-background pl-9 pr-3 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" - /> - </div> - </CardContent> - </Card> - - <div class="space-y-3"> - <p v-if="filteredRows.length === 0" class="py-4 text-center text-sm text-muted-foreground"> - No relationship types match your search. - </p> + <div + class="flex flex-wrap items-center gap-2 rounded-md border bg-muted/25 px-2.5 py-1.5 text-xs" + role="note" + aria-label="Prepopulation strategy color guide" + > + <span class="font-medium text-muted-foreground">Prepopulation colors:</span> + <Badge + variant="outline" + class="h-5 border-cyan-500/40 bg-cyan-500/10 px-1.5 text-[10px] text-cyan-700 dark:text-cyan-300" + > + true + </Badge> + <Badge + variant="outline" + class="h-5 border-emerald-500/40 bg-emerald-500/10 px-1.5 text-[10px] text-emerald-700 dark:text-emerald-300" + > + false + </Badge> + </div> + <div class="space-y-1.5"> <Card - v-for="rel in filteredRows" + v-for="rel in relationshipRows" :key="rel.key" :class="['overflow-hidden', prepopulationCardClass(rel.prepopulated_instances)]" > <details class="group"> - <summary class="flex cursor-pointer list-none items-start gap-3 p-4 [&::-webkit-details-marker]:hidden"> + <summary + class="flex cursor-pointer list-none items-start gap-2 px-2.5 py-2 [&::-webkit-details-marker]:hidden" + > <ChevronDown - class="mt-0.5 size-4 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" + class="mt-0.5 size-3.5 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" /> - <div class="min-w-0 flex-1 space-y-1"> - <div class="flex min-w-0 flex-wrap items-center gap-2"> - <span class="text-sm font-semibold text-foreground">{{ rel.source_entity_type }}</span> - <Badge variant="secondary" class="font-mono text-xs">{{ rel.relationship_type }}</Badge> + <div class="min-w-0 flex-1 space-y-0.5"> + <div class="flex min-w-0 flex-wrap items-center gap-1.5"> + <span class="text-sm font-semibold leading-tight text-foreground">{{ + rel.source_entity_type + }}</span> + <Badge variant="secondary" class="h-5 px-1.5 font-mono text-[10px]">{{ + rel.relationship_type + }}</Badge> <template v-if="rel.reverse_relationship_type"> - <span class="text-xs text-muted-foreground">/</span> - <Badge variant="outline" class="font-mono text-xs">{{ rel.reverse_relationship_type }}</Badge> + <span class="text-[10px] text-muted-foreground">/</span> + <Badge variant="outline" class="h-5 px-1.5 font-mono text-[10px]">{{ + rel.reverse_relationship_type + }}</Badge> </template> - <Badge variant="outline" :class="prepopulationBadgeClass(rel.prepopulated_instances)"> + <Badge + variant="outline" + class="h-5 px-1.5 text-[10px]" + :class="prepopulationBadgeClass(rel.prepopulated_instances)" + > {{ prepopulationLabel(rel.prepopulated_instances) }} </Badge> - <span class="text-sm text-muted-foreground">→</span> - <span class="text-sm font-semibold text-foreground">{{ rel.target_entity_type }}</span> - <Badge variant="outline" class="ml-auto"> + <span class="text-xs text-muted-foreground">→</span> + <span class="text-sm font-semibold leading-tight text-foreground">{{ + rel.target_entity_type + }}</span> + <Badge variant="outline" class="h-5 px-1.5 text-[10px]"> {{ rel.instance_count }} instance{{ rel.instance_count === 1 ? '' : 's' }} </Badge> </div> - <p class="truncate text-xs text-muted-foreground">{{ rel.key }}</p> + <p class="truncate font-mono text-[10px] text-muted-foreground">{{ rel.key }}</p> </div> </summary> - <div class="space-y-4 border-t px-4 pb-4 pt-3"> - <p v-if="rel.description" class="text-sm text-muted-foreground">{{ rel.description }}</p> + <div class="space-y-2 border-t px-2.5 pb-2.5 pt-0"> + <p v-if="rel.description" class="pt-2 text-xs leading-snug text-muted-foreground"> + {{ rel.description }} + </p> + <div v-else class="pt-1.5 text-xs italic text-muted-foreground">No description</div> - <div class="space-y-2"> - <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground">Parameters</p> + <div class="space-y-1"> + <p class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> + Parameters + </p> <div v-if="rel.parameter_definitions && Object.keys(rel.parameter_definitions).length > 0" - class="divide-y rounded-md border" + class="divide-y rounded-md border text-xs" > <div v-for="(label, key) in rel.parameter_definitions" :key="key" - class="flex flex-wrap gap-x-2 gap-y-1 px-3 py-2 text-sm" + class="flex flex-wrap items-center gap-x-1.5 gap-y-0.5 px-2 py-1" > - <code class="rounded bg-muted px-1.5 py-0.5 font-mono text-xs">{{ key }}</code> + <code class="rounded bg-muted px-1 py-0.5 font-mono text-[10px]">{{ key }}</code> <span class="text-muted-foreground">{{ label }}</span> </div> </div> - <p v-else class="text-sm text-muted-foreground">No parameter definitions</p> + <p v-else class="text-xs text-muted-foreground">No parameter definitions</p> </div> - <details v-if="rel.instances.length > 0" class="group/inst rounded-lg border"> + <details v-if="rel.instances.length > 0" class="group/inst rounded-md border"> <summary - class="flex cursor-pointer list-none items-center gap-2 px-3 py-2 text-sm font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" + class="flex cursor-pointer list-none items-center gap-1.5 px-2 py-1.5 text-xs font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" > <ChevronDown - class="size-4 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" + class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" /> Instances </summary> - <div class="space-y-3 border-t p-3"> - <ul class="space-y-2 text-sm"> + <div class="space-y-2 border-t p-2"> + <ul class="space-y-1 text-xs"> <li v-for="(inst, idx) in pageSlice(instancePage, rel.key, rel.instances).items" :key="`${rel.key}-${idx}`" - class="rounded-md bg-muted/40 px-3 py-2" + class="rounded-md bg-muted/40 px-2 py-1" > - <div class="mb-1 font-mono text-xs text-muted-foreground"> + <div class="mb-0.5 font-mono text-[10px] text-muted-foreground"> {{ inst.source_slug }} --{{ rel.relationship_type }}--> {{ inst.target_slug }} </div> - <pre class="whitespace-pre-wrap break-all text-xs">{{ + <pre class="max-h-24 overflow-y-auto whitespace-pre-wrap break-all text-[10px] leading-snug">{{ JSON.stringify(inst.properties ?? {}, null, 2) }}</pre> </li> </ul> + <div + v-if="pageSlice(instancePage, rel.key, rel.instances).total > 20" + class="flex flex-wrap items-center gap-1.5" + @click.stop + > + <Button + variant="outline" + size="sm" + class="h-7 px-2 text-xs" + :disabled="pageSlice(instancePage, rel.key, rel.instances).page <= 0" + @click.stop.prevent=" + setInstancePage(rel.key, pageSlice(instancePage, rel.key, rel.instances).page - 1) + " + > + Previous + </Button> + <span class="text-xs text-muted-foreground"> + Page {{ pageSlice(instancePage, rel.key, rel.instances).page + 1 }} / + {{ pageSlice(instancePage, rel.key, rel.instances).totalPages }} + ({{ pageSlice(instancePage, rel.key, rel.instances).total }} total) + </span> + <Button + variant="outline" + size="sm" + class="h-7 px-2 text-xs" + :disabled=" + pageSlice(instancePage, rel.key, rel.instances).page + >= pageSlice(instancePage, rel.key, rel.instances).totalPages - 1 + " + @click.stop.prevent=" + setInstancePage(rel.key, pageSlice(instancePage, rel.key, rel.instances).page + 1) + " + > + Next + </Button> + </div> </div> </details> </div> diff --git a/src/dev-ui/app/layouts/default.vue b/src/dev-ui/app/layouts/default.vue index 0f3992705..4685b39cc 100644 --- a/src/dev-ui/app/layouts/default.vue +++ b/src/dev-ui/app/layouts/default.vue @@ -895,7 +895,7 @@ watch(() => route.path, () => { closeMobile() }) </header> <!-- Page Content --> - <main class="min-h-0 flex-1 overflow-y-auto p-6"> + <main class="min-h-0 flex-1 overflow-y-auto bg-background p-6"> <slot /> </main> </div> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 7633a97cb..6e1d9b0da 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -945,9 +945,9 @@ function setGraphManagementMode(mode: GraphManagementMode) { function selectSchemaRailItem(itemId: GraphManagementRailItemId) { selectedRailItemId.value = itemId void nextTick(() => { - document.getElementById('graph-management-artifact-detail')?.scrollIntoView({ + document.querySelector<HTMLElement>('.graph-management-detail')?.scrollTo({ + top: 0, behavior: 'smooth', - block: 'start', }) }) } @@ -1781,25 +1781,19 @@ watch(selectedOpsDataSourceId, () => { </div> <Card class="graph-management-controls overflow-hidden"> - <CardHeader class="space-y-4 pb-4"> - <div class="flex flex-wrap items-start gap-3"> - <div - class="flex size-10 shrink-0 items-center justify-center rounded-lg border border-primary/30 bg-primary/10 text-primary" - > - <PencilRuler class="size-5 shrink-0" aria-hidden="true" /> - </div> - <div class="min-w-0 flex-1 space-y-1"> - <CardTitle class="text-xl leading-tight">Graph Management</CardTitle> - <CardDescription> - Shared chat session with mode-specific assistant framing and workspace panels. - </CardDescription> + <CardHeader class="gap-2 space-y-2 px-4 py-3"> + <div class="flex flex-wrap items-center gap-x-3 gap-y-2"> + <div class="flex min-w-0 items-center gap-2"> + <div + class="flex size-8 shrink-0 items-center justify-center rounded-md border border-primary/30 bg-primary/10 text-primary" + > + <PencilRuler class="size-4 shrink-0" aria-hidden="true" /> + </div> + <CardTitle class="text-base leading-none">Graph Management</CardTitle> </div> - </div> - - <div class="space-y-2"> - <p class="text-sm font-medium text-muted-foreground">Mode:</p> + <span class="text-xs font-medium text-muted-foreground">Mode</span> <div - class="grid gap-2 sm:grid-cols-3" + class="flex min-w-0 flex-1 flex-wrap gap-1.5" role="tablist" aria-label="Graph management modes" > @@ -1808,7 +1802,7 @@ watch(selectedOpsDataSourceId, () => { v-if="isGraphManagementModeUnlocked(mode, graphManagementModeGate)" size="sm" variant="outline" - class="h-auto min-h-9 justify-center border py-2 shadow-none transition-colors" + class="h-8 shrink-0 px-2.5 text-xs shadow-none transition-colors" :class=" graphManagementMode === mode ? 'border-primary/70 bg-muted/50 font-medium text-foreground ring-1 ring-primary/25' @@ -1824,21 +1818,17 @@ watch(selectedOpsDataSourceId, () => { </Button> <div v-else - class="flex flex-col gap-1.5 rounded-lg border border-dashed border-rose-200/80 bg-rose-500/[0.04] px-3 py-2.5 text-left text-muted-foreground dark:border-rose-900/40 dark:bg-rose-950/20" + class="inline-flex h-8 max-w-full items-center gap-1.5 rounded-md border border-dashed border-rose-200/80 bg-rose-500/[0.04] px-2 text-xs text-muted-foreground dark:border-rose-900/40 dark:bg-rose-950/20" role="tab" :aria-selected="false" :aria-disabled="true" + :aria-label="`${GRAPH_MANAGEMENT_MODE_LABELS[mode]} locked: ${graphManagementModeLockReason(mode, graphManagementModeGate) ?? ''}`" :title="graphManagementModeLockReason(mode, graphManagementModeGate) ?? undefined" > - <div class="flex items-center gap-2"> - <Lock class="size-3.5 shrink-0 text-rose-700/80 dark:text-rose-400/90" /> - <span class="text-sm font-medium leading-tight text-foreground/80"> - {{ GRAPH_MANAGEMENT_MODE_LABELS[mode] }} - </span> - </div> - <p class="text-[11px] leading-snug text-rose-800/90 dark:text-rose-300/90"> - {{ graphManagementModeLockReason(mode, graphManagementModeGate) }} - </p> + <Lock class="size-3 shrink-0 text-rose-700/80 dark:text-rose-400/90" aria-hidden="true" /> + <span class="truncate font-medium text-foreground/80"> + {{ GRAPH_MANAGEMENT_MODE_LABELS[mode] }} + </span> </div> </template> </div> @@ -1908,7 +1898,7 @@ watch(selectedOpsDataSourceId, () => { <div id="graph-management-artifact-detail" - class="graph-management-detail min-w-0 space-y-6" + class="graph-management-detail min-h-0 min-w-0 max-h-[min(70dvh,calc(100dvh-12rem))] space-y-6 overflow-y-auto overscroll-contain" > <div v-if="selectedRailItemId === 'schema-entities'" class="min-w-0 space-y-2"> <GraphDesignEntitiesPanel diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 6cacbc39d..5c01ca2b2 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -439,7 +439,9 @@ describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { expect(manageWorkspaceVue).toContain('schemaRailItems') expect(manageWorkspaceVue).toContain('lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)]') expect(manageWorkspaceVue).toContain('lg:sticky lg:top-4') - expect(manageWorkspaceVue).toContain('scrollIntoView') + expect(manageWorkspaceVue).toContain("querySelector<HTMLElement>('.graph-management-detail')") + expect(manageWorkspaceVue).toContain('scrollTo') + expect(manageWorkspaceVue).toContain('overflow-y-auto overscroll-contain') }) it('builds rail items with status and last-updated metadata', () => { From d8c9073c6d483564cc8c8adca14430a89e60cdcf Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 16:34:45 -0400 Subject: [PATCH 097/153] feat(management): support prepopulated relationship types Allow edge types to be marked prepopulated when source and target entity types are also prepopulated, enforce on save, block bootstrap transition until instances exist, and teach GMA the updated authoring rules. Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/graph/schema-authoring.spec.md | 27 ++++- .../knowledge-graph-workspace.spec.md | 7 +- .../application/schema_authoring_guide.py | 19 ++- .../application/skill_resolution_service.py | 11 +- .../presentation/workload_routes.py | 15 ++- .../graph_canonical_schema_repository.py | 2 + .../ontology_mutation_builder.py | 2 + .../canonical_schema/ontology_projection.py | 4 + .../application/design_artifacts.py | 2 +- .../services/knowledge_graph_service.py | 26 ++++- .../domain/ontology_prepopulation.py | 44 +++++++ src/api/management/domain/value_objects.py | 12 ++ .../presentation/knowledge_graphs/models.py | 22 ++++ .../presentation/knowledge_graphs/routes.py | 6 + src/api/tests/fakes/canonical_schema.py | 2 + .../test_knowledge_graph_service.py | 37 ++++++ .../management/test_ontology_prepopulation.py | 109 ++++++++++++++++++ .../management/test_ontology_value_objects.py | 12 ++ .../pages/knowledge-graphs/[kgId]/manage.vue | 34 +++++- 19 files changed, 367 insertions(+), 26 deletions(-) create mode 100644 src/api/management/domain/ontology_prepopulation.py create mode 100644 src/api/tests/unit/management/test_ontology_prepopulation.py diff --git a/specs/graph/schema-authoring.spec.md b/specs/graph/schema-authoring.spec.md index 0505852ef..6dece7a7d 100644 --- a/specs/graph/schema-authoring.spec.md +++ b/specs/graph/schema-authoring.spec.md @@ -39,15 +39,32 @@ The system SHALL allow schema updates during `extraction_operations` mode. - AND extraction operations continue using the updated schema ### Requirement: Prepopulated Type Semantics -The system SHALL enforce `prepopulated=true` as a transition-blocking readiness constraint. +The system SHALL enforce `prepopulated=true` as a transition-blocking readiness constraint for entity and relationship types. -#### Scenario: Prepopulated type with instances -- GIVEN a type marked `prepopulated=true` +#### Scenario: Prepopulated entity type with instances +- GIVEN an entity type marked `prepopulated=true` - WHEN readiness is evaluated - THEN the type passes only if it has one or more instances -#### Scenario: Prepopulated type without instances -- GIVEN a type marked `prepopulated=true` with zero instances +#### Scenario: Prepopulated entity type without instances +- GIVEN an entity type marked `prepopulated=true` with zero instances +- WHEN readiness is evaluated +- THEN validation fails and transition to extraction mode is blocked + +#### Scenario: Prepopulated relationship type with prepopulated endpoints +- GIVEN a relationship type marked `prepopulated=true` +- AND every listed source and target entity type is marked `prepopulated=true` +- WHEN the ontology is saved +- THEN the save succeeds + +#### Scenario: Prepopulated relationship type without prepopulated endpoints +- GIVEN a relationship type marked `prepopulated=true` +- AND at least one source or target entity type is not marked `prepopulated=true` +- WHEN the ontology is saved +- THEN validation fails with a clear error + +#### Scenario: Prepopulated relationship type without instances +- GIVEN a relationship type marked `prepopulated=true` with zero instances - WHEN readiness is evaluated - THEN validation fails and transition to extraction mode is blocked diff --git a/specs/management/knowledge-graph-workspace.spec.md b/specs/management/knowledge-graph-workspace.spec.md index a3ed74b00..32840c832 100644 --- a/specs/management/knowledge-graph-workspace.spec.md +++ b/specs/management/knowledge-graph-workspace.spec.md @@ -41,10 +41,15 @@ The system SHALL define schema bootstrap readiness checks for transition eligibi - THEN validation fails unless there is at least one entity type and at least one relationship type #### Scenario: Prepopulated instance readiness -- GIVEN one or more types marked `prepopulated=true` +- GIVEN one or more entity or relationship types marked `prepopulated=true` - WHEN readiness is evaluated - THEN validation fails if any such type has zero instances +#### Scenario: Prepopulated relationship endpoint constraint +- GIVEN a relationship type marked `prepopulated=true` +- WHEN the ontology is saved +- THEN every listed source and target entity type must also be marked `prepopulated=true` + ### Requirement: Transition Authorization The system SHALL require `edit` permission on the knowledge graph for bootstrap validation and mode transition. diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index b063acb52..6ed98f4fd 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -37,15 +37,20 @@ ```json { - "label": "depends_on", - "description": "Service dependency", - "source_labels": ["service"], - "target_labels": ["service"], - "properties": [] + "label": "contains", + "description": "Test exercises an API endpoint", + "source_labels": ["test"], + "target_labels": ["api_endpoint"], + "properties": [], + "prepopulated": true, + "prepopulated_instance_count": 0 } ``` - `source_labels` / `target_labels`: allowed node type labels for edge endpoints. +- `prepopulated`: when true, bootstrap transition requires at least one instance of this + relationship type. Every listed source and target entity type must also have + `prepopulated: true`. ## Instance mutations (JSONL) @@ -80,6 +85,10 @@ Bootstrap transition needs: - At least one entity type and one relationship type. - Every `prepopulated=true` entity type must have instances (use CREATE lines). +- Every `prepopulated=true` relationship type must have instances (use CREATE edge lines). +- A prepopulated relationship type may only reference entity types that are also + prepopulated (for example `contains` from `test` to `api_endpoint` when both are + prepopulated). ## Repository context diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 182b9b6b3..42b4c3c59 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -81,8 +81,9 @@ class ResolvedExtractionSkillPack: ), "relationship_type_authoring": ( "Create or edit relationship types by updating edge_types entries: label, " - "description, source_labels, target_labels, properties. Ensure endpoints " - "reference existing entity type labels." + "description, source_labels, target_labels, properties, optional prepopulated " + "and prepopulated_instance_count. When prepopulated is true, every source and " + "target entity type label must already be prepopulated on node_types." ), "instance_authoring": ( "Create entity or relationship instances with kartograph_apply_graph_mutations " @@ -90,8 +91,10 @@ class ResolvedExtractionSkillPack: "and source_path in set_properties. Use ids like label:16hex." ), "prepopulation_validation": ( - "Prioritize prepopulated type coverage and highlight any missing " - "instances required before extraction-mode transition." + "Prioritize prepopulated entity and relationship type coverage. Entity types " + "and relationship types marked prepopulated must have instances before " + "extraction-mode transition. Relationship types may only be prepopulated when " + "their source and target entity types are prepopulated too." ), "schema_tools": ( "Available MCP tools (call by exact name): kartograph_get_schema_authoring_guide, " diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index 61650091c..93d5953e7 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -17,6 +17,7 @@ get_workload_graph_reader, get_workload_schema_service, ) +from management.domain.ontology_prepopulation import PrepopulationValidationError from management.domain.value_objects import OntologyConfig router = APIRouter(prefix="/workloads", tags=["extraction-workloads"]) @@ -123,10 +124,16 @@ async def workload_save_schema_ontology( ) -> WorkloadOntologyResponse: _require_chat_scope(auth) config = OntologyConfig.from_dict(request.model_dump()) - saved = await schema_service.replace_ontology( - knowledge_graph_id=auth.knowledge_graph_id, - config=config, - ) + try: + saved = await schema_service.replace_ontology( + knowledge_graph_id=auth.knowledge_graph_id, + config=config, + ) + except PrepopulationValidationError as e: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail=str(e), + ) from e payload = saved.to_dict() return WorkloadOntologyResponse( knowledge_graph_id=auth.knowledge_graph_id, diff --git a/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py b/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py index 6a023b13c..a688ce5cb 100644 --- a/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py +++ b/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py @@ -23,6 +23,7 @@ from infrastructure.canonical_schema.ontology_projection import ( stored_definitions_to_ontology_config, ) +from management.domain.ontology_prepopulation import validate_ontology_prepopulation from management.domain.value_objects import OntologyConfig from management.ports.canonical_schema import ICanonicalSchemaRepository from management.ports.exceptions import CanonicalSchemaMutationError @@ -48,6 +49,7 @@ async def get_ontology(self, kg_id: str) -> OntologyConfig | None: return stored_definitions_to_ontology_config(rows) async def replace_ontology(self, kg_id: str, config: OntologyConfig) -> None: + validate_ontology_prepopulation(config) await self._store.delete_all_for_kg(kg_id) await self._apply_operations( kg_id, ontology_config_to_define_operations(config), config diff --git a/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py b/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py index fdfec233b..4d797ee47 100644 --- a/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py +++ b/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py @@ -53,4 +53,6 @@ def edge_type_metadata(edge_type) -> dict: "source_labels": list(edge_type.source_labels), "target_labels": list(edge_type.target_labels), "properties": list(edge_type.properties), + "prepopulated": edge_type.prepopulated, + "prepopulated_instance_count": edge_type.prepopulated_instance_count, } diff --git a/src/api/infrastructure/canonical_schema/ontology_projection.py b/src/api/infrastructure/canonical_schema/ontology_projection.py index e8e89101f..7f00c5e39 100644 --- a/src/api/infrastructure/canonical_schema/ontology_projection.py +++ b/src/api/infrastructure/canonical_schema/ontology_projection.py @@ -41,6 +41,10 @@ def stored_definitions_to_ontology_config( source_labels=tuple(stored.metadata.get("source_labels", [])), target_labels=tuple(stored.metadata.get("target_labels", [])), properties=tuple(stored.metadata.get("properties", [])), + prepopulated=bool(stored.metadata.get("prepopulated", False)), + prepopulated_instance_count=int( + stored.metadata.get("prepopulated_instance_count", 0) + ), ) ) diff --git a/src/api/management/application/design_artifacts.py b/src/api/management/application/design_artifacts.py index f46787069..b60d4dd29 100644 --- a/src/api/management/application/design_artifacts.py +++ b/src/api/management/application/design_artifacts.py @@ -164,7 +164,7 @@ def build_design_artifacts( "relationship_type": edge_type.label, "reverse_relationship_type": None, "reverse_relationship_description": None, - "prepopulated_instances": False, + "prepopulated_instances": edge_type.prepopulated, "description": edge_type.description or None, "instance_count": len(type_instances), "instances": type_instances, diff --git a/src/api/management/application/services/knowledge_graph_service.py b/src/api/management/application/services/knowledge_graph_service.py index afd20c6ba..bcb7c0cf8 100644 --- a/src/api/management/application/services/knowledge_graph_service.py +++ b/src/api/management/application/services/knowledge_graph_service.py @@ -832,27 +832,44 @@ def _evaluate_workspace_readiness( """Evaluate transition readiness flags from canonical schema state.""" node_type_count = len(ontology.node_types) if ontology else 0 edge_type_count = len(ontology.edge_types) if ontology else 0 + from management.domain.ontology_prepopulation import relationship_readiness_key + prepopulated_without_instances: tuple[str, ...] = () + prepopulated_relationships_without_instances: tuple[str, ...] = () if ontology is not None: prepopulated_without_instances = tuple( node_type.label for node_type in ontology.node_types if node_type.prepopulated and node_type.prepopulated_instance_count <= 0 ) + prepopulated_relationships_without_instances = tuple( + relationship_readiness_key(edge_type) + for edge_type in ontology.edge_types + if edge_type.prepopulated and edge_type.prepopulated_instance_count <= 0 + ) has_min_entities = node_type_count >= 1 has_min_relationships = edge_type_count >= 1 - prepopulated_ready = len(prepopulated_without_instances) == 0 + prepopulated_ready = ( + len(prepopulated_without_instances) == 0 + and len(prepopulated_relationships_without_instances) == 0 + ) blocking_reasons: list[str] = [] if not has_min_entities: blocking_reasons.append("At least one entity type is required") if not has_min_relationships: blocking_reasons.append("At least one relationship type is required") - if not prepopulated_ready: + if prepopulated_without_instances: labels = ", ".join(prepopulated_without_instances) blocking_reasons.append( - f"Prepopulated types require instances before transition: {labels}" + f"Prepopulated entity types require instances before transition: {labels}" + ) + if prepopulated_relationships_without_instances: + labels = ", ".join(prepopulated_relationships_without_instances) + blocking_reasons.append( + "Prepopulated relationship types require instances before transition: " + f"{labels}" ) return WorkspaceReadinessStatus( @@ -860,6 +877,9 @@ def _evaluate_workspace_readiness( has_minimum_relationship_types=has_min_relationships, prepopulated_types_ready=prepopulated_ready, prepopulated_types_without_instances=prepopulated_without_instances, + prepopulated_relationship_types_without_instances=( + prepopulated_relationships_without_instances + ), blocking_reasons=tuple(blocking_reasons), ) diff --git a/src/api/management/domain/ontology_prepopulation.py b/src/api/management/domain/ontology_prepopulation.py new file mode 100644 index 000000000..a71afbe57 --- /dev/null +++ b/src/api/management/domain/ontology_prepopulation.py @@ -0,0 +1,44 @@ +"""Prepopulation validation rules for ontology authoring.""" + +from __future__ import annotations + +from management.domain.value_objects import EdgeTypeDefinition, OntologyConfig + + +class PrepopulationValidationError(ValueError): + """Raised when ontology prepopulation flags violate authoring rules.""" + + +def relationship_readiness_key(edge: EdgeTypeDefinition) -> str: + """Stable readiness identifier aligned with design-artifacts relationship keys.""" + source = edge.source_labels[0] if edge.source_labels else "?" + target = edge.target_labels[0] if edge.target_labels else "?" + return f"{source}|{edge.label}|{target}" + + +def validate_ontology_prepopulation(config: OntologyConfig) -> None: + """Ensure prepopulated relationship types only connect prepopulated entity types.""" + node_by_label = {node_type.label: node_type for node_type in config.node_types} + + for edge_type in config.edge_types: + if not edge_type.prepopulated: + continue + if not edge_type.source_labels or not edge_type.target_labels: + raise PrepopulationValidationError( + f"Relationship type `{edge_type.label}` cannot be prepopulated without " + "source_labels and target_labels" + ) + for source_label in edge_type.source_labels: + source_type = node_by_label.get(source_label) + if source_type is None or not source_type.prepopulated: + raise PrepopulationValidationError( + f"Relationship type `{edge_type.label}` is prepopulated but source " + f"entity type `{source_label}` is not prepopulated" + ) + for target_label in edge_type.target_labels: + target_type = node_by_label.get(target_label) + if target_type is None or not target_type.prepopulated: + raise PrepopulationValidationError( + f"Relationship type `{edge_type.label}` is prepopulated but target " + f"entity type `{target_label}` is not prepopulated" + ) diff --git a/src/api/management/domain/value_objects.py b/src/api/management/domain/value_objects.py index 7c0a16605..5306f0cc8 100644 --- a/src/api/management/domain/value_objects.py +++ b/src/api/management/domain/value_objects.py @@ -109,6 +109,9 @@ class WorkspaceReadinessStatus: has_minimum_relationship_types: bool prepopulated_types_ready: bool prepopulated_types_without_instances: tuple[str, ...] = field(default_factory=tuple) + prepopulated_relationship_types_without_instances: tuple[str, ...] = field( + default_factory=tuple + ) blocking_reasons: tuple[str, ...] = field(default_factory=tuple) @property @@ -119,6 +122,7 @@ def is_ready(self) -> bool: and self.has_minimum_relationship_types and self.prepopulated_types_ready and not self.prepopulated_types_without_instances + and not self.prepopulated_relationship_types_without_instances ) @@ -459,11 +463,15 @@ class EdgeTypeDefinition: source_labels: tuple[str, ...] = field(default_factory=tuple) target_labels: tuple[str, ...] = field(default_factory=tuple) properties: tuple[str, ...] = field(default_factory=tuple) + prepopulated: bool = False + prepopulated_instance_count: int = 0 def __post_init__(self) -> None: """Validate that label is non-empty.""" if not self.label or not self.label.strip(): raise ValueError("EdgeTypeDefinition label must not be empty") + if self.prepopulated_instance_count < 0: + raise ValueError("prepopulated_instance_count must be >= 0") def to_dict(self) -> dict[str, Any]: """Serialize to a plain dict suitable for JSON persistence.""" @@ -473,6 +481,8 @@ def to_dict(self) -> dict[str, Any]: "source_labels": list(self.source_labels), "target_labels": list(self.target_labels), "properties": list(self.properties), + "prepopulated": self.prepopulated, + "prepopulated_instance_count": self.prepopulated_instance_count, } @classmethod @@ -484,6 +494,8 @@ def from_dict(cls, data: dict[str, Any]) -> EdgeTypeDefinition: source_labels=tuple(data.get("source_labels", [])), target_labels=tuple(data.get("target_labels", [])), properties=tuple(data.get("properties", [])), + prepopulated=bool(data.get("prepopulated", False)), + prepopulated_instance_count=int(data.get("prepopulated_instance_count", 0)), ) diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index 5126506c8..91fb735a6 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -113,6 +113,9 @@ class WorkspaceReadinessResponse(BaseModel): has_minimum_relationship_types: bool prepopulated_types_ready: bool prepopulated_types_without_instances: list[str] = Field(default_factory=list) + prepopulated_relationship_types_without_instances: list[str] = Field( + default_factory=list + ) blocking_reasons: list[str] = Field(default_factory=list) @classmethod @@ -124,6 +127,9 @@ def from_domain(cls, readiness: WorkspaceReadinessStatus) -> "WorkspaceReadiness prepopulated_types_without_instances=list( readiness.prepopulated_types_without_instances ), + prepopulated_relationship_types_without_instances=list( + readiness.prepopulated_relationship_types_without_instances + ), blocking_reasons=list(readiness.blocking_reasons), ) @@ -307,6 +313,18 @@ class EdgeTypeDefinitionModel(BaseModel): default_factory=list, description="Properties this edge type may carry", ) + prepopulated: bool = Field( + default=False, + description=( + "Whether this relationship type must have instances before transition; " + "requires all source and target entity types to be prepopulated" + ), + ) + prepopulated_instance_count: int = Field( + default=0, + ge=0, + description="Current known instance count used for readiness evaluation", + ) def to_domain(self) -> EdgeTypeDefinition: """Convert to domain EdgeTypeDefinition value object.""" @@ -316,6 +334,8 @@ def to_domain(self) -> EdgeTypeDefinition: source_labels=tuple(self.source_labels), target_labels=tuple(self.target_labels), properties=tuple(self.properties), + prepopulated=self.prepopulated, + prepopulated_instance_count=self.prepopulated_instance_count, ) @classmethod @@ -327,6 +347,8 @@ def from_domain(cls, et: EdgeTypeDefinition) -> EdgeTypeDefinitionModel: source_labels=list(et.source_labels), target_labels=list(et.target_labels), properties=list(et.properties), + prepopulated=et.prepopulated, + prepopulated_instance_count=et.prepopulated_instance_count, ) diff --git a/src/api/management/presentation/knowledge_graphs/routes.py b/src/api/management/presentation/knowledge_graphs/routes.py index b384238c3..cf4ab596e 100644 --- a/src/api/management/presentation/knowledge_graphs/routes.py +++ b/src/api/management/presentation/knowledge_graphs/routes.py @@ -12,6 +12,7 @@ KnowledgeGraphService, ) from management.dependencies.knowledge_graph import get_knowledge_graph_service +from management.domain.ontology_prepopulation import PrepopulationValidationError from management.ports.exceptions import ( DuplicateKnowledgeGraphNameError, KnowledgeGraphNotFoundError, @@ -725,6 +726,11 @@ async def save_knowledge_graph_ontology( status_code=status.HTTP_404_NOT_FOUND, detail=str(e), ) + except PrepopulationValidationError as e: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail=str(e), + ) except HTTPException: raise except Exception: diff --git a/src/api/tests/fakes/canonical_schema.py b/src/api/tests/fakes/canonical_schema.py index d99be1c19..c5e9436fa 100644 --- a/src/api/tests/fakes/canonical_schema.py +++ b/src/api/tests/fakes/canonical_schema.py @@ -2,6 +2,7 @@ from __future__ import annotations +from management.domain.ontology_prepopulation import validate_ontology_prepopulation from management.domain.value_objects import OntologyConfig @@ -17,6 +18,7 @@ async def get_ontology(self, kg_id: str) -> OntologyConfig | None: return self._store.get(kg_id) async def replace_ontology(self, kg_id: str, config: OntologyConfig) -> None: + validate_ontology_prepopulation(config) self.replaced.append((kg_id, config)) self._store[kg_id] = config diff --git a/src/api/tests/unit/management/application/test_knowledge_graph_service.py b/src/api/tests/unit/management/application/test_knowledge_graph_service.py index 710a6cb0d..cc6c58abe 100644 --- a/src/api/tests/unit/management/application/test_knowledge_graph_service.py +++ b/src/api/tests/unit/management/application/test_knowledge_graph_service.py @@ -522,6 +522,43 @@ async def test_workspace_status_transition_not_eligible_without_schema_readiness ) assert result.transition_eligible is False + @pytest.mark.asyncio + async def test_workspace_status_fails_for_prepopulated_relationship_without_instances( + self, service, authz, kg_repo, canonical_schema_repo, user_id + ): + """Should block transition when prepopulated relationship has zero instances.""" + kg = _make_kg() + ontology_config = OntologyConfig( + node_types=( + NodeTypeDefinition(label="test", prepopulated=True, prepopulated_instance_count=1), + NodeTypeDefinition( + label="api_endpoint", + prepopulated=True, + prepopulated_instance_count=1, + ), + ), + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("test",), + target_labels=("api_endpoint",), + prepopulated=True, + prepopulated_instance_count=0, + ), + ), + ) + await _seed_stored_ontology(kg, kg_repo, canonical_schema_repo, ontology_config) + await _grant_kg_view(authz, kg.id.value, user_id) + + result = await service.get_workspace_status(user_id=user_id, kg_id=kg.id.value) + + assert result is not None + assert result.readiness.prepopulated_types_ready is False + assert result.readiness.prepopulated_relationship_types_without_instances == ( + "test|contains|api_endpoint", + ) + assert result.transition_eligible is False + @pytest.mark.asyncio async def test_workspace_status_fails_for_prepopulated_type_without_instances( self, service, authz, kg_repo, canonical_schema_repo, user_id diff --git a/src/api/tests/unit/management/test_ontology_prepopulation.py b/src/api/tests/unit/management/test_ontology_prepopulation.py new file mode 100644 index 000000000..e78f91d4d --- /dev/null +++ b/src/api/tests/unit/management/test_ontology_prepopulation.py @@ -0,0 +1,109 @@ +"""Unit tests for ontology prepopulation rules.""" + +from __future__ import annotations + +import pytest + +from management.domain.ontology_prepopulation import ( + PrepopulationValidationError, + relationship_readiness_key, + validate_ontology_prepopulation, +) +from management.domain.value_objects import ( + EdgeTypeDefinition, + NodeTypeDefinition, + OntologyConfig, +) + + +def _bootstrap_ontology( + *, + nodes: tuple[NodeTypeDefinition, ...] = (), + edges: tuple[EdgeTypeDefinition, ...] = (), +) -> OntologyConfig: + return OntologyConfig(node_types=nodes, edge_types=edges) + + +class TestValidateOntologyPrepopulation: + def test_allows_prepopulated_relationship_when_endpoints_are_prepopulated(self) -> None: + config = _bootstrap_ontology( + nodes=( + NodeTypeDefinition(label="test", prepopulated=True), + NodeTypeDefinition(label="api_endpoint", prepopulated=True), + ), + edges=( + EdgeTypeDefinition( + label="contains", + source_labels=("test",), + target_labels=("api_endpoint",), + prepopulated=True, + ), + ), + ) + + validate_ontology_prepopulation(config) + + def test_rejects_prepopulated_relationship_when_source_not_prepopulated(self) -> None: + config = _bootstrap_ontology( + nodes=( + NodeTypeDefinition(label="test", prepopulated=False), + NodeTypeDefinition(label="api_endpoint", prepopulated=True), + ), + edges=( + EdgeTypeDefinition( + label="contains", + source_labels=("test",), + target_labels=("api_endpoint",), + prepopulated=True, + ), + ), + ) + + with pytest.raises(PrepopulationValidationError, match="source entity type `test`"): + validate_ontology_prepopulation(config) + + def test_rejects_prepopulated_relationship_when_target_not_prepopulated(self) -> None: + config = _bootstrap_ontology( + nodes=( + NodeTypeDefinition(label="test", prepopulated=True), + NodeTypeDefinition(label="api_endpoint", prepopulated=False), + ), + edges=( + EdgeTypeDefinition( + label="contains", + source_labels=("test",), + target_labels=("api_endpoint",), + prepopulated=True, + ), + ), + ) + + with pytest.raises(PrepopulationValidationError, match="target entity type `api_endpoint`"): + validate_ontology_prepopulation(config) + + def test_rejects_prepopulated_relationship_when_endpoint_type_missing(self) -> None: + config = _bootstrap_ontology( + nodes=(NodeTypeDefinition(label="api_endpoint", prepopulated=True),), + edges=( + EdgeTypeDefinition( + label="contains", + source_labels=("test",), + target_labels=("api_endpoint",), + prepopulated=True, + ), + ), + ) + + with pytest.raises(PrepopulationValidationError, match="source entity type `test`"): + validate_ontology_prepopulation(config) + + +class TestRelationshipReadinessKey: + def test_builds_design_artifacts_style_key(self) -> None: + edge = EdgeTypeDefinition( + label="contains", + source_labels=("test",), + target_labels=("api_endpoint",), + ) + + assert relationship_readiness_key(edge) == "test|contains|api_endpoint" diff --git a/src/api/tests/unit/management/test_ontology_value_objects.py b/src/api/tests/unit/management/test_ontology_value_objects.py index ebf872a6e..310c73095 100644 --- a/src/api/tests/unit/management/test_ontology_value_objects.py +++ b/src/api/tests/unit/management/test_ontology_value_objects.py @@ -130,6 +130,8 @@ def test_valid_minimal_edge_type(self): assert et.source_labels == () assert et.target_labels == () assert et.properties == () + assert et.prepopulated is False + assert et.prepopulated_instance_count == 0 def test_source_labels_default_empty(self): """source_labels defaults to an empty tuple.""" @@ -191,6 +193,16 @@ def test_to_dict_contains_expected_keys(self): assert "source_labels" in d assert "target_labels" in d assert "properties" in d + assert "prepopulated" in d + assert "prepopulated_instance_count" in d + + def test_prepopulated_instance_count_must_be_non_negative_for_edges(self) -> None: + with pytest.raises(ValueError, match="prepopulated_instance_count"): + EdgeTypeDefinition( + label="contains", + prepopulated=True, + prepopulated_instance_count=-1, + ) class TestOntologyConfig: diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 6e1d9b0da..12768f15b 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -111,6 +111,7 @@ interface WorkspaceReadinessStatus { has_minimum_relationship_types: boolean prepopulated_types_ready: boolean prepopulated_types_without_instances: string[] + prepopulated_relationship_types_without_instances: string[] blocking_reasons: string[] } @@ -368,7 +369,9 @@ const graphManagementRailItems = computed(() => { workspaceMode: statusProjection.value.workspace_mode, transitionEligible: statusProjection.value.transition_eligible, blockingReasonCount: statusProjection.value.readiness.blocking_reasons.length, - prepopulatedGapCount: statusProjection.value.readiness.prepopulated_types_without_instances.length, + prepopulatedGapCount: + statusProjection.value.readiness.prepopulated_types_without_instances.length + + statusProjection.value.readiness.prepopulated_relationship_types_without_instances.length, hasMinimumEntityTypes: statusProjection.value.readiness.has_minimum_entity_types, hasMinimumRelationshipTypes: statusProjection.value.readiness.has_minimum_relationship_types, sessionUpdatedAt: extractionSession.value?.updated_at ?? null, @@ -1986,7 +1989,7 @@ watch(selectedOpsDataSourceId, () => { class="rounded-lg border border-amber-400/60 bg-amber-50/60 p-3 text-xs dark:border-amber-800 dark:bg-amber-950/20" > <p class="font-medium text-amber-800 dark:text-amber-300"> - Prepopulated types missing instances + Prepopulated entity types missing instances </p> <ul class="mt-1 list-disc space-y-1 pl-4 text-muted-foreground"> <li @@ -1997,6 +2000,26 @@ watch(selectedOpsDataSourceId, () => { </li> </ul> </div> + <div + v-if=" + statusProjection.readiness.prepopulated_relationship_types_without_instances + .length > 0 + " + class="rounded-lg border border-amber-400/60 bg-amber-50/60 p-3 text-xs dark:border-amber-800 dark:bg-amber-950/20" + > + <p class="font-medium text-amber-800 dark:text-amber-300"> + Prepopulated relationship types missing instances + </p> + <ul class="mt-1 list-disc space-y-1 pl-4 text-muted-foreground"> + <li + v-for="relKey in statusProjection.readiness + .prepopulated_relationship_types_without_instances" + :key="relKey" + > + {{ relKey }} + </li> + </ul> + </div> <div v-if="statusProjection.readiness.blocking_reasons.length > 0" class="rounded-lg border border-destructive/50 bg-card p-3" @@ -2012,7 +2035,12 @@ watch(selectedOpsDataSourceId, () => { </ul> </div> <p - v-else-if="statusProjection.readiness.prepopulated_types_without_instances.length === 0" + v-else-if=" + statusProjection.readiness.prepopulated_types_without_instances.length === 0 + && statusProjection.readiness.prepopulated_relationship_types_without_instances + .length === 0 + && statusProjection.readiness.blocking_reasons.length === 0 + " class="text-xs text-muted-foreground" > No validation diagnostics are currently blocking transition. From 43f5e4e107739e235d4d3aa2db47c6079e08c97f Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 16:43:01 -0400 Subject: [PATCH 098/153] fix(dev-ui): preserve scroll positions on graph management updates Keep chat, artifact panels, and page scroll stable during session refresh, readiness changes, and silent design-artifact reloads. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../extraction/SharedConversationPanel.vue | 67 ++++++++-- .../GraphDesignEntitiesPanel.vue | 15 ++- .../GraphDesignRelationshipsPanel.vue | 15 ++- .../composables/useScrollPositionPreserve.ts | 44 +++++++ .../pages/knowledge-graphs/[kgId]/manage.vue | 117 ++++++++++++++++-- .../tests/scroll-position-preserve.test.ts | 22 ++++ 6 files changed, 245 insertions(+), 35 deletions(-) create mode 100644 src/dev-ui/app/composables/useScrollPositionPreserve.ts create mode 100644 src/dev-ui/app/tests/scroll-position-preserve.test.ts diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index f4749cdb1..91dd28a7d 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -1,5 +1,10 @@ <script setup lang="ts"> import { computed, nextTick, onMounted, ref, watch } from 'vue' +import { + captureScrollPositions, + isScrollNearBottom, + restoreScrollPositions, +} from '@/composables/useScrollPositionPreserve' import DOMPurify from 'isomorphic-dompurify' import { marked } from 'marked' import { Bot, Loader2, RefreshCw, RotateCcw, Send, Sparkles, User } from 'lucide-vue-next' @@ -80,6 +85,8 @@ const clearConfirmOpen = ref(false) const chatScrollRef = ref<HTMLElement | null>(null) const textareaRef = ref<HTMLTextAreaElement | null>(null) const composerInputId = 'graph-management-chat-input' +const stickToBottom = ref(true) +const trackedMessageCount = ref(0) marked.setOptions({ gfm: true, breaks: true }) @@ -130,17 +137,24 @@ function messageText(entry: ConversationEntry): string { return entry.content ?? entry.message ?? '(empty)' } -function scrollToBottom() { +function scrollToBottom(force = false) { + const el = chatScrollRef.value + if (!el) return + if (!force && !stickToBottom.value && !isScrollNearBottom(el)) return requestAnimationFrame(() => { requestAnimationFrame(() => { - const el = chatScrollRef.value - if (el) { - el.scrollTop = el.scrollHeight - } + const target = chatScrollRef.value + if (target) target.scrollTop = target.scrollHeight }) }) } +function onChatScroll() { + const el = chatScrollRef.value + if (!el) return + stickToBottom.value = isScrollNearBottom(el) +} + function adjustTextareaHeight() { const el = textareaRef.value if (!el) return @@ -183,25 +197,54 @@ function confirmClearChat() { function sendDraftMessage() { const trimmed = props.draftMessage.trim() if (!trimmed || chatSendDisabled.value) return + stickToBottom.value = true emit('sendMessage', trimmed) emit('update:draftMessage', '') - void nextTick(() => adjustTextareaHeight()) + void nextTick(() => { + adjustTextareaHeight() + scrollToBottom(true) + }) } +let sessionScrollSnapshot: ReturnType<typeof captureScrollPositions> | null = null + watch( () => props.session, - async () => { + () => { + sessionScrollSnapshot = captureScrollPositions([chatScrollRef.value]) + }, + { deep: true, flush: 'sync' }, +) + +watch( + () => props.session, + async (session) => { + const nextCount = session?.message_history?.length ?? 0 + const grew = nextCount > trackedMessageCount.value + trackedMessageCount.value = nextCount await nextTick() - scrollToBottom() + if (grew && (stickToBottom.value || props.sending)) { + scrollToBottom(true) + return + } + if (sessionScrollSnapshot) { + restoreScrollPositions(sessionScrollSnapshot) + sessionScrollSnapshot = null + } }, { deep: true, flush: 'post' }, ) watch( - () => [props.activityLines, props.sending, props.loading, showRuntimeActivity.value], + () => [props.activityLines, props.sending, props.loading, showRuntimeActivity.value] as const, async () => { + const snapshot = captureScrollPositions([chatScrollRef.value]) await nextTick() - scrollToBottom() + if (props.sending || stickToBottom.value) { + scrollToBottom(props.sending) + return + } + restoreScrollPositions(snapshot) }, { deep: true, flush: 'post' }, ) @@ -221,9 +264,10 @@ watch( ) onMounted(() => { + trackedMessageCount.value = messageHistory.value.length void nextTick(() => { adjustTextareaHeight() - scrollToBottom() + scrollToBottom(true) }) }) </script> @@ -292,6 +336,7 @@ onMounted(() => { <div ref="chatScrollRef" class="min-h-[14rem] max-h-[min(32rem,60vh)] space-y-4 overflow-y-auto bg-muted/10 px-4 py-4 sm:px-6" + @scroll.passive="onChatScroll" > <div v-if="showInitialConversationLoading" diff --git a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue index 8397f974f..0e4f72363 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue @@ -29,19 +29,22 @@ const loading = ref(true) const data = ref<DesignArtifactsResponse | null>(null) const instancePage = ref<Record<string, number>>({}) -async function fetchEntities() { +async function fetchEntities(options: { preserveUiState?: boolean } = {}) { if (!props.kgId) { data.value = null loading.value = false return } - loading.value = true + const preserveUiState = options.preserveUiState === true && data.value !== null + if (!preserveUiState) { + loading.value = true + instancePage.value = {} + } try { data.value = await apiFetch<DesignArtifactsResponse>( `/management/knowledge-graphs/${props.kgId}/design-artifacts`, { query: { limit: 500 } }, ) - instancePage.value = {} } catch (err: unknown) { toast.error('Failed to load entity design artifacts', { description: err instanceof Error ? err.message : 'Request failed', @@ -66,8 +69,8 @@ function setInstancePage(typeKey: string, page: number) { watch( () => [props.kgId, props.reloadNonce] as const, - () => { - void fetchEntities() + ([, reloadNonce]) => { + void fetchEntities({ preserveUiState: reloadNonce > 0 }) }, { immediate: true }, ) @@ -96,7 +99,7 @@ defineExpose({ refresh: fetchEntities }) </div> </div> - <div v-if="loading" class="flex items-center justify-center py-16"> + <div v-if="loading && !data" class="flex items-center justify-center py-16"> <Loader2 class="size-8 animate-spin text-muted-foreground" /> </div> diff --git a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue index 718300d8c..07c0c164c 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue @@ -28,19 +28,22 @@ const loading = ref(true) const data = ref<DesignArtifactsResponse | null>(null) const instancePage = ref<Record<string, number>>({}) -async function fetchRelationships() { +async function fetchRelationships(options: { preserveUiState?: boolean } = {}) { if (!props.kgId) { data.value = null loading.value = false return } - loading.value = true + const preserveUiState = options.preserveUiState === true && data.value !== null + if (!preserveUiState) { + loading.value = true + instancePage.value = {} + } try { data.value = await apiFetch<DesignArtifactsResponse>( `/management/knowledge-graphs/${props.kgId}/design-artifacts`, { query: { limit: 500 } }, ) - instancePage.value = {} } catch (err: unknown) { toast.error('Failed to load relationship design artifacts', { description: err instanceof Error ? err.message : 'Request failed', @@ -59,8 +62,8 @@ function setInstancePage(key: string, page: number) { watch( () => [props.kgId, props.reloadNonce] as const, - () => { - void fetchRelationships() + ([, reloadNonce]) => { + void fetchRelationships({ preserveUiState: reloadNonce > 0 }) }, { immediate: true }, ) @@ -89,7 +92,7 @@ defineExpose({ refresh: fetchRelationships }) </div> </div> - <div v-if="loading" class="flex items-center justify-center py-16"> + <div v-if="loading && !data" class="flex items-center justify-center py-16"> <Loader2 class="size-8 animate-spin text-muted-foreground" /> </div> diff --git a/src/dev-ui/app/composables/useScrollPositionPreserve.ts b/src/dev-ui/app/composables/useScrollPositionPreserve.ts new file mode 100644 index 000000000..6de60782a --- /dev/null +++ b/src/dev-ui/app/composables/useScrollPositionPreserve.ts @@ -0,0 +1,44 @@ +import { nextTick } from 'vue' + +export type ScrollSnapshot = Map<HTMLElement, number> + +/** Capture scrollTop for each non-null element. */ +export function captureScrollPositions(elements: Array<HTMLElement | null | undefined>): ScrollSnapshot { + const snapshot = new Map<HTMLElement, number>() + for (const el of elements) { + if (el) snapshot.set(el, el.scrollTop) + } + return snapshot +} + +/** Restore scrollTop from a prior capture (double rAF for layout-settled DOM). */ +export function restoreScrollPositions(snapshot: ScrollSnapshot): void { + void nextTick(() => { + requestAnimationFrame(() => { + requestAnimationFrame(() => { + for (const [el, top] of snapshot) { + if (el.isConnected) el.scrollTop = top + } + }) + }) + }) +} + +/** Run async work without changing scroll positions on the given elements. */ +export async function withPreservedScrollPositions<T>( + elements: Array<HTMLElement | null | undefined>, + fn: () => Promise<T>, +): Promise<T> { + const snapshot = captureScrollPositions(elements) + try { + return await fn() + } finally { + restoreScrollPositions(snapshot) + } +} + +/** True when the user is within `thresholdPx` of the bottom of a scroll container. */ +export function isScrollNearBottom(element: HTMLElement, thresholdPx = 48): boolean { + const distance = element.scrollHeight - element.scrollTop - element.clientHeight + return distance <= thresholdPx +} diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 12768f15b..3929244f9 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -1,5 +1,11 @@ <script setup lang="ts"> import { computed, nextTick, onMounted, ref, watch } from 'vue' +import { + captureScrollPositions, + restoreScrollPositions, + withPreservedScrollPositions, + type ScrollSnapshot, +} from '@/composables/useScrollPositionPreserve' import { toast } from 'vue-sonner' import { ArrowLeft, @@ -501,6 +507,27 @@ const nextSteps = computed(() => { }) const sessionActivityLines = ref<string[]>([]) +const graphManagementDetailRef = ref<HTMLElement | null>(null) +let graphManagementScrollSnapshot: ScrollSnapshot | null = null + +function graphManagementScrollTargets(): HTMLElement[] { + const elements: HTMLElement[] = [] + const main = document.querySelector('main') + if (main instanceof HTMLElement) elements.push(main) + if (graphManagementDetailRef.value) elements.push(graphManagementDetailRef.value) + return elements +} + +function captureGraphManagementScroll(): void { + if (activeStep.value !== 'graph-management') return + graphManagementScrollSnapshot = captureScrollPositions(graphManagementScrollTargets()) +} + +function restoreGraphManagementScroll(): void { + if (!graphManagementScrollSnapshot) return + restoreScrollPositions(graphManagementScrollSnapshot) + graphManagementScrollSnapshot = null +} async function refreshDesignArtifacts(options: { silent?: boolean } = {}) { if (!hasTenant.value || !kgId.value) return @@ -510,9 +537,19 @@ async function refreshDesignArtifacts(options: { silent?: boolean } = {}) { `/management/knowledge-graphs/${kgId.value}/design-artifacts`, { query: { limit: 500 } }, ) - entityTypeLabels.value = Object.keys(artifacts.entities ?? {}).sort() - relationshipTypeLabels.value = (artifacts.relationships ?? []).map((rel) => rel.relationship_type) - designArtifactsReloadNonce.value += 1 + const applyArtifactRefresh = () => { + entityTypeLabels.value = Object.keys(artifacts.entities ?? {}).sort() + relationshipTypeLabels.value = (artifacts.relationships ?? []).map((rel) => rel.relationship_type) + designArtifactsReloadNonce.value += 1 + } + if (activeStep.value === 'graph-management') { + await withPreservedScrollPositions(graphManagementScrollTargets(), async () => { + applyArtifactRefresh() + await nextTick() + }) + } else { + applyArtifactRefresh() + } if (!options.silent) { toast.success('Design artifacts refreshed') } @@ -733,10 +770,19 @@ async function loadWorkspaceStatus() { if (!hasTenant.value || !kgId.value) return loading.value = true workspaceLoadError.value = null - try { + const preserveScroll = + activeStep.value === 'graph-management' && statusProjection.value !== null + const fetchStatus = async () => { statusProjection.value = await apiFetch<WorkspaceStatusResponse>( `/management/knowledge-graphs/${kgId.value}/workspace-status`, ) + } + try { + if (preserveScroll) { + await withPreservedScrollPositions(graphManagementScrollTargets(), fetchStatus) + } else { + await fetchStatus() + } workspaceForbidden.value = false workspaceForbiddenReason.value = null } catch (err) { @@ -1126,10 +1172,17 @@ async function validateWorkspace() { if (!kgId.value || workspaceForbidden.value) return validating.value = true try { - statusProjection.value = await apiFetch<WorkspaceStatusResponse>( - `/management/knowledge-graphs/${kgId.value}/workspace/validate`, - { method: 'POST' }, - ) + const validate = async () => { + statusProjection.value = await apiFetch<WorkspaceStatusResponse>( + `/management/knowledge-graphs/${kgId.value}/workspace/validate`, + { method: 'POST' }, + ) + } + if (activeStep.value === 'graph-management') { + await withPreservedScrollPositions(graphManagementScrollTargets(), validate) + } else { + await validate() + } toast.success('Workspace validation complete') } catch (err) { if (isForbiddenHttpError(err)) { @@ -1153,10 +1206,17 @@ async function transitionToExtraction() { transitioning.value = true const previousStatus = statusProjection.value try { - statusProjection.value = await apiFetch<WorkspaceStatusResponse>( - `/management/knowledge-graphs/${kgId.value}/workspace/transition-to-extraction`, - { method: 'POST' }, - ) + const transition = async () => { + statusProjection.value = await apiFetch<WorkspaceStatusResponse>( + `/management/knowledge-graphs/${kgId.value}/workspace/transition-to-extraction`, + { method: 'POST' }, + ) + } + if (activeStep.value === 'graph-management') { + await withPreservedScrollPositions(graphManagementScrollTargets(), transition) + } else { + await transition() + } toast.success('Workspace transitioned to extraction operations') await loadExtractionSession() } catch (err) { @@ -1268,6 +1328,38 @@ watch(selectedOpsDataSourceId, () => { selectedInlineRunId.value = null loadInlineSyncRuns() }) + +watch( + () => { + if (activeStep.value !== 'graph-management') return null + return [ + statusProjection.value, + designArtifactsReloadNonce.value, + progressChecklist.value, + graphManagementRailItems.value, + ] as const + }, + () => { + captureGraphManagementScroll() + }, + { flush: 'sync', deep: true }, +) + +watch( + () => { + if (activeStep.value !== 'graph-management') return null + return [ + statusProjection.value, + designArtifactsReloadNonce.value, + progressChecklist.value, + graphManagementRailItems.value, + ] as const + }, + () => { + restoreGraphManagementScroll() + }, + { flush: 'post', deep: true }, +) </script> <template> @@ -1901,6 +1993,7 @@ watch(selectedOpsDataSourceId, () => { <div id="graph-management-artifact-detail" + ref="graphManagementDetailRef" class="graph-management-detail min-h-0 min-w-0 max-h-[min(70dvh,calc(100dvh-12rem))] space-y-6 overflow-y-auto overscroll-contain" > <div v-if="selectedRailItemId === 'schema-entities'" class="min-w-0 space-y-2"> diff --git a/src/dev-ui/app/tests/scroll-position-preserve.test.ts b/src/dev-ui/app/tests/scroll-position-preserve.test.ts new file mode 100644 index 000000000..98be078a2 --- /dev/null +++ b/src/dev-ui/app/tests/scroll-position-preserve.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from 'vitest' +import { isScrollNearBottom } from '@/composables/useScrollPositionPreserve' + +describe('isScrollNearBottom', () => { + it('returns true when scrolled to the bottom within threshold', () => { + const el = { + scrollHeight: 1000, + clientHeight: 200, + scrollTop: 760, + } as HTMLElement + expect(isScrollNearBottom(el, 48)).toBe(true) + }) + + it('returns false when scrolled away from the bottom', () => { + const el = { + scrollHeight: 1000, + clientHeight: 200, + scrollTop: 100, + } as HTMLElement + expect(isScrollNearBottom(el, 48)).toBe(false) + }) +}) From 67341825f6b60f68a696f4422ad73ad06c0b579d Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Wed, 3 Jun 2026 17:25:27 -0400 Subject: [PATCH 099/153] feat(extraction): equip GMA for instance prepopulation and longer turns Add workload tools for live readiness, bulk instance/relationship listing, repository scanning (Read/Grep/Glob), and auto-sync of prepopulated counts; raise sticky agent turn timeout to 1000s. Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 2 +- .../kartograph_agent_runtime/agent_prompt.py | 109 +++++++++-- .../kartograph_agent_runtime/executor.py | 28 ++- .../kartograph_agent_runtime/schema_tools.py | 94 +++++++++ .../kartograph_agent_runtime/settings.py | 2 +- .../kartograph_agent_runtime/tools.py | 49 +++++ src/agent-runtime/tests/test_agent_prompt.py | 25 +++ src/agent-runtime/tests/test_executor.py | 35 ++++ src/agent-runtime/tests/test_schema_tools.py | 10 + .../application/schema_authoring_guide.py | 68 +++++-- .../application/skill_resolution_service.py | 32 ++- .../container_workload_runtime.py | 2 +- .../sticky_session_workdir_materializer.py | 18 ++ .../workload_runtime_settings.py | 2 +- src/api/extraction/ports/workload_graph.py | 63 ++++++ .../presentation/workload_routes.py | 159 +++++++++++++++ .../services/graph_query_service.py | 34 ++++ .../graph/infrastructure/graph_repository.py | 141 ++++++++++++++ src/api/graph/ports/repositories.py | 44 +++++ .../extraction_workload/dependencies.py | 1 + .../extraction_workload/graph_reader.py | 156 ++++++++++++++- .../extraction_workload/schema_service.py | 17 ++ .../workspace_readiness.py | 182 ++++++++++++++++++ .../services/knowledge_graph_service.py | 53 +---- .../application/workspace_readiness.py | 91 +++++++++ .../test_skill_resolution_service.py | 4 + ...est_sticky_session_workdir_materializer.py | 1 + .../presentation/test_workload_routes.py | 122 +++++++++++- .../test_workspace_readiness.py | 76 ++++++++ .../application/test_workspace_readiness.py | 59 ++++++ 30 files changed, 1576 insertions(+), 103 deletions(-) create mode 100644 src/api/infrastructure/extraction_workload/workspace_readiness.py create mode 100644 src/api/management/application/workspace_readiness.py create mode 100644 src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py create mode 100644 src/api/tests/unit/management/application/test_workspace_readiness.py diff --git a/compose.dev.yaml b/compose.dev.yaml index d0823cc2a..8c77419d8 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -24,7 +24,7 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_SKILLS_DIR: ${PWD}/skills KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_UID: ${HOST_UID} KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID: ${HOST_GID} - KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_TURN_TIMEOUT_SECONDS: "600" + KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_TURN_TIMEOUT_SECONDS: "1000" KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_MAX_TURNS: "500" # Vertex AI for Claude Agent SDK in sticky assistant containers CLAUDE_CODE_USE_VERTEX: "1" diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 5c64bd37f..0f2699505 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -4,7 +4,10 @@ from typing import Any -from kartograph_agent_runtime.schema_tools import KARTOGRAPH_SCHEMA_TOOL_NAMES +from kartograph_agent_runtime.schema_tools import ( + KARTOGRAPH_SCHEMA_TOOL_NAMES, + WORKSPACE_FILE_TOOL_NAMES, +) from kartograph_agent_runtime.settings import AgentRuntimeSettings _TOOLS_QUICK_REFERENCE = """ @@ -12,30 +15,102 @@ | Tool | Purpose | |------|---------| -| `kartograph_get_schema_authoring_guide` | Full JSON shapes and mutation rules — call first on schema tasks | +| `kartograph_get_schema_authoring_guide` | Full JSON shapes, instance cookbook, mutation rules — call first | +| `kartograph_get_workspace_readiness` | Prepopulated gaps, live instance counts, blocking reasons | | `kartograph_get_schema_ontology` | Read current `node_types` and `edge_types` before every save | | `kartograph_save_schema_ontology` | Replace canonical ontology (read → merge edits → save full payload) | -| `kartograph_apply_graph_mutations` | Apply JSONL CREATE/UPDATE/DELETE instance lines to the official graph DB | +| `kartograph_apply_graph_mutations` | Apply JSONL CREATE/UPDATE/DELETE instance lines (batch 25–50 lines) | +| `kartograph_list_instances_by_type` | List/count entity instances for one type (verify prepopulation) | +| `kartograph_list_relationship_instances` | List relationship edges with source/target slugs and node IDs | | `kartograph_search_graph_by_slug` | Find existing nodes by slug to avoid duplicates | +## Workspace file tools (read-only) + +| Tool | Purpose | +|------|---------| +| `Read` | Read files under the session workspace mount | +| `Grep` | Search file contents in `repository-files/<data_source>/` | +| `Glob` | List files by pattern for instance generation | + ### Quick workflow 1. `kartograph_get_schema_authoring_guide` -2. `kartograph_get_schema_ontology` -3. Model entity/relationship types → `kartograph_save_schema_ontology` -4. Create instances → `kartograph_apply_graph_mutations` (one JSON object per line) -5. Verify → `kartograph_search_graph_by_slug` - -Writes persist to the platform database for the active knowledge graph. Use Read/Grep/Glob -only for repository files under the session workspace — not for API discovery. +2. `kartograph_get_workspace_readiness` +3. `kartograph_get_schema_ontology` +4. Glob/Grep/Read `repository-files/` to derive instances +5. Model types → `kartograph_save_schema_ontology` +6. Create entity instances in batches → `kartograph_apply_graph_mutations` +7. Create relationship edges (after entity IDs are known) +8. Verify → `kartograph_list_instances_by_type` and `kartograph_list_relationship_instances` + +Writes persist to the platform database for the active knowledge graph. """.strip() +def _format_workspace_readiness(readiness: dict[str, Any]) -> str: + lines = ["## Workspace readiness (live snapshot)"] + + entity_gaps = readiness.get("prepopulated_entity_types_without_instances_live") or [] + rel_gaps = readiness.get("prepopulated_relationship_types_without_instances_live") or [] + blocking = readiness.get("blocking_reasons") or [] + prepopulated_types = readiness.get("prepopulated_entity_types") or [] + prepopulated_relationships = readiness.get("prepopulated_relationship_types") or [] + + if entity_gaps: + lines.append( + "- Prepopulated entity types still needing instances: " + + ", ".join(f"`{label}`" for label in entity_gaps) + ) + else: + lines.append("- All prepopulated entity types have at least one live instance.") + + if rel_gaps: + lines.append( + "- Prepopulated relationship types still needing instances: " + + ", ".join(f"`{key}`" for key in rel_gaps) + ) + + if prepopulated_types: + lines.append("- Prepopulated entity coverage:") + for row in prepopulated_types: + if not isinstance(row, dict): + continue + label = str(row.get("label") or "?") + live = row.get("live_instance_count", 0) + metadata = row.get("metadata_instance_count", 0) + lines.append(f" - `{label}`: live={live}, metadata={metadata}") + + if prepopulated_relationships: + lines.append("- Prepopulated relationship coverage:") + for row in prepopulated_relationships: + if not isinstance(row, dict): + continue + key = str(row.get("key") or "?") + live = row.get("live_instance_count", 0) + metadata = row.get("metadata_instance_count", 0) + lines.append(f" - `{key}`: live={live}, metadata={metadata}") + + if blocking: + lines.append("- Blocking reasons:") + for reason in blocking: + lines.append(f" - {reason}") + + transition = readiness.get("transition_eligible") + live_ready = readiness.get("prepopulated_types_ready_live") + if transition is not None: + lines.append(f"- Transition eligible: `{transition}`") + if live_ready is not None: + lines.append(f"- Prepopulated coverage ready (live): `{live_ready}`") + + return "\n".join(lines) + + def build_agent_system_prompt( agent_configuration: dict[str, Any], *, settings: AgentRuntimeSettings | None = None, workspace_appendix: str = "", + workspace_readiness: dict[str, Any] | None = None, include_tools_manifest: bool = True, ) -> str: """Build the full system prompt with skills, guardrails, tools, and session scope.""" @@ -66,8 +141,13 @@ def build_agent_system_prompt( tools_block = "" if include_tools_manifest and settings is not None and settings.workload_token.strip(): - tool_list = ", ".join(f"`{name}`" for name in KARTOGRAPH_SCHEMA_TOOL_NAMES) - tools_block = f"{_TOOLS_QUICK_REFERENCE}\n\nRegistered tools: {tool_list}." + kartograph_tools = ", ".join(f"`{name}`" for name in KARTOGRAPH_SCHEMA_TOOL_NAMES) + file_tools = ", ".join(f"`{name}`" for name in WORKSPACE_FILE_TOOL_NAMES) + tools_block = ( + f"{_TOOLS_QUICK_REFERENCE}\n\n" + f"Registered Kartograph tools: {kartograph_tools}.\n" + f"Registered workspace file tools: {file_tools}." + ) session_block = "" if settings is not None: @@ -84,6 +164,10 @@ def build_agent_system_prompt( ) session_block = "\n".join(lines) + readiness_block = "" + if workspace_readiness: + readiness_block = _format_workspace_readiness(workspace_readiness) + sections = [ section for section in ( @@ -91,6 +175,7 @@ def build_agent_system_prompt( guardrail_lines, skills_block, tools_block, + readiness_block, session_block, workspace_appendix.strip(), ) diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index fed15aa19..37547b0f6 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -20,7 +20,7 @@ from kartograph_agent_runtime.tools import RuntimeTooling from kartograph_agent_runtime.vertex import build_claude_agent_env -_DEFAULT_TURN_TIMEOUT_SECONDS = 600.0 +_DEFAULT_TURN_TIMEOUT_SECONDS = 1000.0 _SDK_HEARTBEAT_SECONDS = 8.0 @@ -29,11 +29,13 @@ def _build_system_prompt( *, settings: AgentRuntimeSettings | None = None, workspace_appendix: str = "", + workspace_readiness: dict[str, Any] | None = None, ) -> str: return build_agent_system_prompt( agent_configuration, settings=settings, workspace_appendix=workspace_appendix, + workspace_readiness=workspace_readiness, ) @@ -87,6 +89,14 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: for path in sample_paths[:6]: if path: lines.append(f" - `{path}`") + extension_counts = source.get("file_extension_counts") + if isinstance(extension_counts, dict) and extension_counts: + top_extensions = sorted( + extension_counts.items(), + key=lambda item: (-int(item[1]), str(item[0])), + )[:8] + summary = ", ".join(f"{ext}={count}" for ext, count in top_extensions) + lines.append(f" - extensions: {summary}") return "\n".join(lines) repo_root = root / "repository-files" @@ -363,10 +373,20 @@ async def _stream_with_claude_sdk( from claude_agent_sdk import ClaudeAgentOptions, query from claude_agent_sdk.types import ResultMessage, TaskNotificationMessage + workspace_dir = settings.workspace_dir.strip() or "/workspace" + tooling = RuntimeTooling(settings=settings) + workspace_readiness: dict[str, Any] | None = None + if settings.workload_token.strip(): + try: + workspace_readiness = await tooling.get_workspace_readiness() + except Exception: # noqa: BLE001 + workspace_readiness = None + system_prompt = _build_system_prompt( agent_configuration, settings=settings, workspace_appendix=_build_workspace_prompt_appendix(settings), + workspace_readiness=workspace_readiness, ) history_lines = [ f"{entry.get('role', 'unknown')}: {entry.get('content', '')}" @@ -381,19 +401,17 @@ async def _stream_with_claude_sdk( yield {"type": "thinking", "recent": list(recent)} sdk_env = _build_sdk_env(settings) - workspace_dir = settings.workspace_dir.strip() or "/workspace" - tooling = RuntimeTooling(settings=settings) options_kwargs: dict[str, Any] = {} if settings.workload_token.strip(): from kartograph_agent_runtime.schema_tools import ( - KARTOGRAPH_SCHEMA_TOOL_NAMES, + GMA_ALLOWED_TOOL_NAMES, build_kartograph_schema_mcp_server, ) options_kwargs["mcp_servers"] = { "kartograph": build_kartograph_schema_mcp_server(tooling), } - options_kwargs["allowed_tools"] = list(KARTOGRAPH_SCHEMA_TOOL_NAMES) + options_kwargs["allowed_tools"] = list(GMA_ALLOWED_TOOL_NAMES) options = ClaudeAgentOptions( system_prompt=system_prompt, env=sdk_env, diff --git a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py index 212e78773..463cfc5e6 100644 --- a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py @@ -8,14 +8,21 @@ from kartograph_agent_runtime.tools import RuntimeTooling +WORKSPACE_FILE_TOOL_NAMES = ("Read", "Grep", "Glob") + KARTOGRAPH_SCHEMA_TOOL_NAMES = ( "kartograph_get_schema_authoring_guide", + "kartograph_get_workspace_readiness", "kartograph_get_schema_ontology", "kartograph_save_schema_ontology", "kartograph_apply_graph_mutations", + "kartograph_list_instances_by_type", + "kartograph_list_relationship_instances", "kartograph_search_graph_by_slug", ) +GMA_ALLOWED_TOOL_NAMES = KARTOGRAPH_SCHEMA_TOOL_NAMES + WORKSPACE_FILE_TOOL_NAMES + def build_kartograph_schema_mcp_server(tooling: RuntimeTooling): """Register Kartograph schema tools on an SDK MCP server.""" @@ -36,6 +43,20 @@ async def get_schema_authoring_guide(_args: dict[str, Any]) -> dict[str, Any]: "is_error": True, } + @tool( + "kartograph_get_workspace_readiness", + "Return bootstrap readiness: prepopulated gaps, live instance counts, and blocking reasons.", + {}, + ) + async def get_workspace_readiness(_args: dict[str, Any]) -> dict[str, Any]: + try: + return RuntimeTooling.format_tool_result(await tooling.get_workspace_readiness()) + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to load workspace readiness: {exc}"}], + "is_error": True, + } + @tool( "kartograph_get_schema_ontology", "Read the current canonical ontology (node_types and edge_types) for this knowledge graph.", @@ -99,6 +120,76 @@ async def apply_graph_mutations(args: dict[str, Any]) -> dict[str, Any]: "is_error": True, } + @tool( + "kartograph_list_instances_by_type", + "List entity instances for one type with pagination (use to verify prepopulation).", + {"entity_type": str, "limit": int, "offset": int}, + ) + async def list_instances_by_type(args: dict[str, Any]) -> dict[str, Any]: + entity_type = str(args.get("entity_type") or "").strip() + if not entity_type: + return { + "content": [{"type": "text", "text": "entity_type must not be empty."}], + "is_error": True, + } + limit = args.get("limit", 100) + offset = args.get("offset", 0) + try: + return RuntimeTooling.format_tool_result( + await tooling.list_instances_by_type( + entity_type=entity_type, + limit=int(limit) if isinstance(limit, int) else 100, + offset=int(offset) if isinstance(offset, int) else 0, + ), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to list instances: {exc}"}], + "is_error": True, + } + + @tool( + "kartograph_list_relationship_instances", + "List relationship instances with source/target slugs and IDs for edge prepopulation.", + { + "relationship_type": str, + "source_entity_type": str, + "target_entity_type": str, + "limit": int, + "offset": int, + }, + ) + async def list_relationship_instances(args: dict[str, Any]) -> dict[str, Any]: + relationship_type = str(args.get("relationship_type") or "").strip() + if not relationship_type: + return { + "content": [{"type": "text", "text": "relationship_type must not be empty."}], + "is_error": True, + } + source_entity_type = args.get("source_entity_type") + target_entity_type = args.get("target_entity_type") + limit = args.get("limit", 100) + offset = args.get("offset", 0) + try: + return RuntimeTooling.format_tool_result( + await tooling.list_relationship_instances( + relationship_type=relationship_type, + source_entity_type=str(source_entity_type).strip() + if source_entity_type + else None, + target_entity_type=str(target_entity_type).strip() + if target_entity_type + else None, + limit=int(limit) if isinstance(limit, int) else 100, + offset=int(offset) if isinstance(offset, int) else 0, + ), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to list relationships: {exc}"}], + "is_error": True, + } + @tool( "kartograph_search_graph_by_slug", "Search existing graph nodes by slug within the active knowledge graph.", @@ -130,9 +221,12 @@ async def search_graph_by_slug(args: dict[str, Any]) -> dict[str, Any]: version="1.0.0", tools=[ get_schema_authoring_guide, + get_workspace_readiness, get_schema_ontology, save_schema_ontology, apply_graph_mutations, + list_instances_by_type, + list_relationship_instances, search_graph_by_slug, ], ) diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py index 02d899ce4..4936c8492 100644 --- a/src/agent-runtime/kartograph_agent_runtime/settings.py +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -28,7 +28,7 @@ class AgentRuntimeSettings(BaseSettings): gcloud_config_dir: str = Field(default="", alias="CLOUDSDK_CONFIG") google_application_credentials: str = Field(default="", alias="GOOGLE_APPLICATION_CREDENTIALS") home_dir: str = Field(default="/tmp", alias="HOME") - turn_timeout_seconds: float = Field(default=600.0, ge=30.0, le=900.0, alias="KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS") + turn_timeout_seconds: float = Field(default=1000.0, ge=30.0, le=1200.0, alias="KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS") max_turns: int = Field(default=500, ge=1, le=1000, alias="KARTOGRAPH_AGENT_MAX_TURNS") def vertex_enabled(self) -> bool: diff --git a/src/agent-runtime/kartograph_agent_runtime/tools.py b/src/agent-runtime/kartograph_agent_runtime/tools.py index 8729aa9e2..7b8ab8f98 100644 --- a/src/agent-runtime/kartograph_agent_runtime/tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/tools.py @@ -30,6 +30,13 @@ async def get_schema_authoring_guide(self) -> dict[str, Any]: response.raise_for_status() return response.json() + async def get_workspace_readiness(self) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/schema/readiness" + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=self._headers()) + response.raise_for_status() + return response.json() + async def get_schema_ontology(self) -> dict[str, Any]: url = f"{self._base_url()}/extraction/workloads/schema/ontology" async with httpx.AsyncClient(timeout=30.0) as client: @@ -55,6 +62,48 @@ async def apply_graph_mutations(self, *, jsonl: str) -> dict[str, Any]: response.raise_for_status() return response.json() + async def list_instances_by_type( + self, + *, + entity_type: str, + limit: int = 100, + offset: int = 0, + ) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/graph/instances" + params = { + "entity_type": entity_type, + "limit": str(max(1, min(limit, 500))), + "offset": str(max(0, offset)), + } + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=self._headers(), params=params) + response.raise_for_status() + return response.json() + + async def list_relationship_instances( + self, + *, + relationship_type: str, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/graph/relationships" + params: dict[str, str] = { + "relationship_type": relationship_type, + "limit": str(max(1, min(limit, 500))), + "offset": str(max(0, offset)), + } + if source_entity_type: + params["source_entity_type"] = source_entity_type + if target_entity_type: + params["target_entity_type"] = target_entity_type + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=self._headers(), params=params) + response.raise_for_status() + return response.json() + async def search_graph_by_slug( self, *, slug: str, entity_type: str | None = None ) -> dict[str, Any]: diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index 9cdffef84..0415f2d87 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -36,6 +36,31 @@ def test_build_agent_system_prompt_includes_skills_tools_and_session_scope() -> assert "Files here" in prompt +def test_build_agent_system_prompt_includes_workspace_readiness() -> None: + prompt = build_agent_system_prompt( + {"system_prompt": "Base"}, + settings=AgentRuntimeSettings( + KARTOGRAPH_WORKLOAD_TOKEN="token", + KARTOGRAPH_KNOWLEDGE_GRAPH_ID="kg-123", + ), + workspace_readiness={ + "prepopulated_entity_types_without_instances_live": ["folder"], + "prepopulated_relationship_types_without_instances_live": [], + "prepopulated_entity_types": [ + {"label": "folder", "live_instance_count": 0, "metadata_instance_count": 0} + ], + "blocking_reasons": ["Prepopulated entity types require instances before transition: folder"], + "transition_eligible": False, + }, + ) + + assert "Workspace readiness" in prompt + assert "`folder`" in prompt + assert "kartograph_get_workspace_readiness" in prompt + assert "Read" in prompt + assert "Glob" in prompt + + def test_build_agent_system_prompt_omits_tools_without_workload_token() -> None: prompt = build_agent_system_prompt( {"system_prompt": "Base"}, diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index 93bade7e8..e718b131d 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -55,6 +55,41 @@ def test_build_workspace_prompt_appendix_prefers_sources_index(tmp_path: Path) - assert "pkg/api/adapter_status_types_test.go" in appendix +def test_build_workspace_prompt_appendix_includes_extension_counts(tmp_path: Path) -> None: + package_root = tmp_path / "repository-files" / "hyperfleet-api" / "pkg" / "api" + package_root.mkdir(parents=True) + (package_root / "adapter_status_types_test.go").write_text("package api\n", encoding="utf-8") + (package_root / "README.md").write_text("# docs\n", encoding="utf-8") + (tmp_path / "sources-index.json").write_text( + json.dumps( + { + "version": 1, + "knowledge_graph_id": "kg-1", + "sources": [ + { + "job_package_id": "pkg-1", + "data_source_id": "ds-hyperfleet-api", + "data_source_name": "Hyperfleet API", + "repository_folder": "hyperfleet-api", + "entry_count": 2, + "repository_root": "repository-files/hyperfleet-api", + "sample_paths": ["pkg/api/adapter_status_types_test.go"], + "file_extension_counts": {".go": 1, ".md": 1}, + } + ], + } + ), + encoding="utf-8", + ) + + appendix = _build_workspace_prompt_appendix( + AgentRuntimeSettings(KARTOGRAPH_WORKSPACE_DIR=str(tmp_path)) + ) + + assert ".go=1" in appendix + assert ".md=1" in appendix + + def test_build_workspace_prompt_appendix_lists_materialized_repository_files( tmp_path: Path, ) -> None: diff --git a/src/agent-runtime/tests/test_schema_tools.py b/src/agent-runtime/tests/test_schema_tools.py index 18b19ebff..8e864f086 100644 --- a/src/agent-runtime/tests/test_schema_tools.py +++ b/src/agent-runtime/tests/test_schema_tools.py @@ -12,9 +12,19 @@ def test_schema_tool_names_cover_authoring_surface() -> None: assert "kartograph_get_schema_authoring_guide" in KARTOGRAPH_SCHEMA_TOOL_NAMES + assert "kartograph_get_workspace_readiness" in KARTOGRAPH_SCHEMA_TOOL_NAMES assert "kartograph_get_schema_ontology" in KARTOGRAPH_SCHEMA_TOOL_NAMES assert "kartograph_save_schema_ontology" in KARTOGRAPH_SCHEMA_TOOL_NAMES assert "kartograph_apply_graph_mutations" in KARTOGRAPH_SCHEMA_TOOL_NAMES + assert "kartograph_list_instances_by_type" in KARTOGRAPH_SCHEMA_TOOL_NAMES + assert "kartograph_list_relationship_instances" in KARTOGRAPH_SCHEMA_TOOL_NAMES + + +def test_gma_allowed_tools_include_workspace_file_tools() -> None: + from kartograph_agent_runtime.schema_tools import GMA_ALLOWED_TOOL_NAMES, WORKSPACE_FILE_TOOL_NAMES + + for tool_name in WORKSPACE_FILE_TOOL_NAMES: + assert tool_name in GMA_ALLOWED_TOOL_NAMES def test_build_kartograph_schema_mcp_server_registers_tools() -> None: diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 6ed98f4fd..efeeec906 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -4,13 +4,17 @@ # Kartograph schema authoring (Graph Management Assistant) Use the Kartograph schema tools — never probe undocumented HTTP routes. +Use Read, Grep, and Glob against the session workspace mount to scan prepared repository files. ## Workflow 1. Call `kartograph_get_schema_authoring_guide` (this document). -2. Call `kartograph_get_schema_ontology` to read the current entity/relationship types. -3. Edit the ontology JSON (full replace) and call `kartograph_save_schema_ontology`. -4. For instances, call `kartograph_apply_graph_mutations` with JSONL lines. +2. Call `kartograph_get_workspace_readiness` to see prepopulated gaps and live instance counts. +3. Call `kartograph_get_schema_ontology` to read the current entity/relationship types. +4. Edit the ontology JSON (full replace) and call `kartograph_save_schema_ontology`. +5. Scan `repository-files/<data_source_name>/` with Read/Grep/Glob to derive instances. +6. Create entity instances in batches via `kartograph_apply_graph_mutations` (JSONL CREATE lines). +7. Verify with `kartograph_list_instances_by_type`, `kartograph_list_relationship_instances`, and `kartograph_search_graph_by_slug`. ## Entity type (node type) shape @@ -56,44 +60,66 @@ Apply after types exist. One JSON object per line. -Define-only line (usually handled by save_schema_ontology instead): - -```json -{"op":"DEFINE","type":"node","label":"service","description":"A service","required_properties":["name"]} -``` - Create entity instance: ```json {"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service","set_properties":{"name":"api-gateway","slug":"api-gateway","data_source_id":"schema-bootstrap","source_path":"graph-management-assistant"}} ``` -Create relationship instance: +Create relationship instance (requires entity node IDs from prior CREATE or list tool): ```json {"op":"CREATE","type":"edge","id":"depends_on:0123456789abc001","label":"depends_on","start_id":"service:0123456789abcdef","end_id":"service:fedcba9876543210","set_properties":{"data_source_id":"schema-bootstrap","source_path":"graph-management-assistant"}} ``` Rules: -- `id` format: `{label}:{16 lowercase hex chars}`. +- `id` format: `{label}:{16 lowercase hex chars}` — generate with `secrets.token_hex(8)`. - CREATE requires `data_source_id` and `source_path` in `set_properties`. -- Node CREATE requires `slug` in `set_properties`. +- Node CREATE requires `slug` in `set_properties` (kebab-case, unique per type). - `knowledge_graph_id` is stamped by the platform — do not set it. +- Apply in batches of 25–50 CREATE lines; create all entity nodes before relationship edges. +- Sort instances deterministically (by slug or path) before emitting CREATE lines. + +## Instance generation cookbook + +Scan prepared files under `repository-files/<data_source_slug>/` (see session workspace appendix). + +| Pattern | When to use | Scan strategy | Slug rule | Key properties | +|---------|-------------|---------------|-----------|----------------| +| **data_source** | One instance per connected repo | Top-level folders under `repository-files/` | folder name | `name`, `source_type`, `file_count` | +| **folder** | Directory hierarchy anchors | `Glob **/*` dirs per data source | `folder-{path-kebab}` | `folder_path`, `data_source`, child counts | +| **source_file** | File-level extraction jobs | `Glob **/*.{go,py,yaml,md,json,...}` | path → kebab (`pkg-api-foo-go`) | `file_path`, `source_path`, `name` | + +Workflow for bulk prepopulation: +1. Mark the entity type `prepopulated: true` and save ontology. +2. Use Glob to list candidate paths (exclude dot-directories). +3. Derive slugs deterministically from relative paths. +4. Call `kartograph_search_graph_by_slug` for a sample slug to avoid duplicates. +5. Emit JSONL CREATE batches via `kartograph_apply_graph_mutations`. +6. Confirm coverage with `kartograph_list_instances_by_type`. +7. For prepopulated relationships: use `kartograph_list_relationship_instances` or entity lists to resolve `start_id`/`end_id`, then CREATE edges. ## Readiness checklist Bootstrap transition needs: - At least one entity type and one relationship type. -- Every `prepopulated=true` entity type must have instances (use CREATE lines). -- Every `prepopulated=true` relationship type must have instances (use CREATE edge lines). -- A prepopulated relationship type may only reference entity types that are also - prepopulated (for example `contains` from `test` to `api_endpoint` when both are - prepopulated). +- Every `prepopulated=true` entity type must have at least one live instance. +- Every `prepopulated=true` relationship type must have at least one live edge instance. +- A prepopulated relationship type may only reference entity types that are also prepopulated. + +Call `kartograph_get_workspace_readiness` for: +- `prepopulated_entity_types_without_instances_live` — entity types still needing CREATE lines. +- `prepopulated_relationship_types_without_instances_live` — relationship keys still needing edge CREATE lines. +- `prepopulated_entity_types` / `prepopulated_relationship_types` — metadata vs live counts. +- `blocking_reasons` — transition blockers. + +After applying instance mutations, ontology `prepopulated_instance_count` metadata is refreshed automatically from live graph totals. ## Repository context -Use Read/Grep/Glob on prepared JobPackage files under `repository-files/<data_source_name>/` -(one folder per connected data source for this knowledge graph; folder names are slugified -data source names such as `hyperfleet-api`, not other knowledge graphs) to infer domain -concepts — then model them as ontology types, not as ad-hoc API discoveries. +Prepared JobPackage files live under `repository-files/<data_source_name>/` relative to the +workspace mount (one folder per connected data source; names are slugified data source names +such as `hyperfleet-api`). Use Read, Grep, and Glob on those paths — not HTTP discovery. +The session workspace appendix lists data sources, file counts, sample paths, and extension +hints when available. """.strip() diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 42b4c3c59..de963970b 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -88,7 +88,20 @@ class ResolvedExtractionSkillPack: "instance_authoring": ( "Create entity or relationship instances with kartograph_apply_graph_mutations " "JSONL CREATE lines after types exist. Nodes require slug, data_source_id, " - "and source_path in set_properties. Use ids like label:16hex." + "and source_path in set_properties. Use ids like label:16hex. Create entity " + "nodes before edges; batch 25–50 lines per apply call." + ), + "relationship_instance_authoring": ( + "Create relationship instances only after endpoint entity nodes exist. Use " + "kartograph_list_instances_by_type or kartograph_list_relationship_instances " + "to resolve start_id/end_id and source_slug/target_slug pairs. Edge CREATE " + "lines require start_id, end_id, data_source_id, and source_path." + ), + "instance_generation": ( + "For prepopulated types, scan repository-files with Read/Grep/Glob using the " + "data_source, folder, or source_file patterns from the authoring guide. " + "Derive kebab-case slugs from paths, apply CREATE batches, then verify with " + "kartograph_list_instances_by_type and kartograph_get_workspace_readiness." ), "prepopulation_validation": ( "Prioritize prepopulated entity and relationship type coverage. Entity types " @@ -98,14 +111,19 @@ class ResolvedExtractionSkillPack: ), "schema_tools": ( "Available MCP tools (call by exact name): kartograph_get_schema_authoring_guide, " - "kartograph_get_schema_ontology, kartograph_save_schema_ontology, " - "kartograph_apply_graph_mutations, kartograph_search_graph_by_slug. " - "Always start with get_schema_authoring_guide, then get_schema_ontology before saves." + "kartograph_get_workspace_readiness, kartograph_get_schema_ontology, " + "kartograph_save_schema_ontology, kartograph_apply_graph_mutations, " + "kartograph_list_instances_by_type, kartograph_list_relationship_instances, " + "kartograph_search_graph_by_slug. " + "Filesystem tools: Read, Grep, Glob (workspace mount only). " + "Always start with get_schema_authoring_guide, then get_workspace_readiness." ), "tools_quickstart": ( - "Workflow: (1) kartograph_get_schema_authoring_guide → (2) kartograph_get_schema_ontology " - "→ (3) kartograph_save_schema_ontology for types → (4) kartograph_apply_graph_mutations " - "for instances → (5) kartograph_search_graph_by_slug to verify. " + "Workflow: (1) kartograph_get_schema_authoring_guide → " + "(2) kartograph_get_workspace_readiness → (3) kartograph_get_schema_ontology " + "→ (4) Read/Grep/Glob repository-files → (5) kartograph_save_schema_ontology " + "for types → (6) kartograph_apply_graph_mutations for instances in batches " + "→ (7) kartograph_list_instances_by_type to verify. " "Never call /management or /graph HTTP routes." ), }, diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index 765f56617..e2ac4b789 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -62,7 +62,7 @@ def __init__( gcloud_config_container_path: str = "/gcloud/config", container_run_uid: int | None = None, container_run_gid: int | None = None, - agent_turn_timeout_seconds: float = 600.0, + agent_turn_timeout_seconds: float = 1000.0, agent_max_turns: int = 500, ) -> None: self._container_runtime = container_runtime diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index 8377098e4..237efa724 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -84,6 +84,9 @@ def prepare( "sync_mode": str(manifest.sync_mode), "repository_root": f"repository-files/{repository_folder}", "sample_paths": sample_paths, + "file_extension_counts": self._extension_counts( + repository_files_dir / repository_folder + ), } ) @@ -96,6 +99,21 @@ def prepare( ) return session_root + @staticmethod + def _extension_counts(root: Path) -> dict[str, int]: + """Summarize file extensions under one materialized repository folder.""" + counts: dict[str, int] = {} + if not root.is_dir(): + return counts + for file_path in root.rglob("*"): + if not file_path.is_file(): + continue + if any(part.startswith(".") for part in file_path.parts): + continue + suffix = file_path.suffix.lower() or "(no extension)" + counts[suffix] = counts.get(suffix, 0) + 1 + return dict(sorted(counts.items())) + def _write_workspace_index( self, *, diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 045646ac9..6aa0bcb8b 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -50,7 +50,7 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): ), ) sticky_health_timeout_seconds: float = Field(default=90.0, ge=5.0, le=600.0) - sticky_turn_timeout_seconds: float = Field(default=600.0, ge=30.0, le=900.0) + sticky_turn_timeout_seconds: float = Field(default=1000.0, ge=30.0, le=1200.0) sticky_max_turns: int = Field(default=500, ge=1, le=1000) vertex_project_id: str = Field(default="") vertex_region: str = Field(default="us-east5") diff --git a/src/api/extraction/ports/workload_graph.py b/src/api/extraction/ports/workload_graph.py index 744a565e4..d24f808c1 100644 --- a/src/api/extraction/ports/workload_graph.py +++ b/src/api/extraction/ports/workload_graph.py @@ -16,6 +16,21 @@ class WorkloadGraphNode: properties: dict +@dataclass(frozen=True) +class WorkloadGraphRelationship: + """Graph relationship returned to sticky session agent tools.""" + + id: str + relationship_type: str + start_id: str + end_id: str + source_slug: str | None + target_slug: str | None + source_entity_type: str + target_entity_type: str + properties: dict + + class IWorkloadGraphReader(Protocol): """Read-only graph access scoped to a workload token context.""" @@ -29,3 +44,51 @@ async def search_by_slug( ) -> list[WorkloadGraphNode]: """Search nodes by slug within one knowledge graph.""" ... + + async def list_instances_by_type( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + entity_type: str, + limit: int = 100, + offset: int = 0, + ) -> tuple[list[WorkloadGraphNode], int]: + """List entity instances for one type; returns (page, total_count).""" + ... + + async def count_entity_instances_by_type( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + entity_type: str, + ) -> int: + """Count live entity instances for one type.""" + ... + + async def list_relationship_instances( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + relationship_type: str, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> tuple[list[WorkloadGraphRelationship], int]: + """List relationship instances; returns (page, total_count).""" + ... + + async def count_relationship_instances( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + relationship_type: str, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + ) -> int: + """Count live relationship instances for one relationship type.""" + ... diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index 93d5953e7..b60dae24e 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -74,6 +74,52 @@ class WorkloadSchemaAuthoringGuideResponse(BaseModel): guide: str +class WorkloadInstanceListResponse(BaseModel): + """Paginated entity instances for one type.""" + + entity_type: str + nodes: list[dict] + count: int + total: int + limit: int + offset: int + + +class WorkloadReadinessResponse(BaseModel): + """Bootstrap readiness snapshot for schema prepopulation.""" + + knowledge_graph_id: str + has_minimum_entity_types: bool + has_minimum_relationship_types: bool + prepopulated_types_ready_metadata: bool + prepopulated_types_without_instances_metadata: list[str] = Field(default_factory=list) + prepopulated_relationship_types_without_instances_metadata: list[str] = Field( + default_factory=list + ) + prepopulated_entity_types_without_instances_live: list[str] = Field(default_factory=list) + prepopulated_relationship_types_without_instances_live: list[str] = Field( + default_factory=list + ) + prepopulated_types_ready_live: bool = False + prepopulated_entity_types: list[dict[str, object]] = Field(default_factory=list) + prepopulated_relationship_types: list[dict[str, object]] = Field(default_factory=list) + blocking_reasons: list[str] = Field(default_factory=list) + transition_eligible: bool + + +class WorkloadRelationshipListResponse(BaseModel): + """Paginated relationship instances for one type.""" + + relationship_type: str + source_entity_type: str | None = None + target_entity_type: str | None = None + relationships: list[dict] + count: int + total: int + limit: int + offset: int + + @router.get( "/schema/authoring-guide", response_model=WorkloadSchemaAuthoringGuideResponse, @@ -192,3 +238,116 @@ async def workload_search_graph_by_slug( for node in nodes ] return WorkloadGraphSearchResponse(nodes=serialized, count=len(serialized)) + + +@router.get( + "/graph/instances", + response_model=WorkloadInstanceListResponse, +) +async def workload_list_instances_by_type( + entity_type: Annotated[str, Query(min_length=1)], + limit: Annotated[int, Query(ge=1, le=500)] = 100, + offset: Annotated[int, Query(ge=0)] = 0, + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + reader: Annotated[IWorkloadGraphReader, Depends(get_workload_graph_reader)] = ..., +) -> WorkloadInstanceListResponse: + _require_chat_scope(auth) + + nodes, total = await reader.list_instances_by_type( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + entity_type=entity_type, + limit=limit, + offset=offset, + ) + serialized = [ + { + "id": node.id, + "entity_type": node.entity_type, + "slug": node.slug, + "properties": node.properties, + } + for node in nodes + ] + return WorkloadInstanceListResponse( + entity_type=entity_type, + nodes=serialized, + count=len(serialized), + total=total, + limit=limit, + offset=offset, + ) + + +@router.get( + "/graph/relationships", + response_model=WorkloadRelationshipListResponse, +) +async def workload_list_relationship_instances( + relationship_type: Annotated[str, Query(min_length=1)], + source_entity_type: Annotated[str | None, Query()] = None, + target_entity_type: Annotated[str | None, Query()] = None, + limit: Annotated[int, Query(ge=1, le=500)] = 100, + offset: Annotated[int, Query(ge=0)] = 0, + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + reader: Annotated[IWorkloadGraphReader, Depends(get_workload_graph_reader)] = ..., +) -> WorkloadRelationshipListResponse: + _require_chat_scope(auth) + + relationships, total = await reader.list_relationship_instances( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + relationship_type=relationship_type, + source_entity_type=source_entity_type, + target_entity_type=target_entity_type, + limit=limit, + offset=offset, + ) + serialized = [ + { + "id": rel.id, + "relationship_type": rel.relationship_type, + "start_id": rel.start_id, + "end_id": rel.end_id, + "source_slug": rel.source_slug, + "target_slug": rel.target_slug, + "source_entity_type": rel.source_entity_type, + "target_entity_type": rel.target_entity_type, + "properties": rel.properties, + } + for rel in relationships + ] + return WorkloadRelationshipListResponse( + relationship_type=relationship_type, + source_entity_type=source_entity_type, + target_entity_type=target_entity_type, + relationships=serialized, + count=len(serialized), + total=total, + limit=limit, + offset=offset, + ) + + +@router.get( + "/schema/readiness", + response_model=WorkloadReadinessResponse, +) +async def workload_get_workspace_readiness( + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., + reader: Annotated[IWorkloadGraphReader, Depends(get_workload_graph_reader)] = ..., +) -> WorkloadReadinessResponse: + _require_chat_scope(auth) + from infrastructure.extraction_workload.workspace_readiness import ( + build_workload_readiness_snapshot, + ) + + ontology = await schema_service.get_ontology(knowledge_graph_id=auth.knowledge_graph_id) + snapshot = await build_workload_readiness_snapshot( + ontology=ontology, + knowledge_graph_id=auth.knowledge_graph_id, + tenant_id=auth.tenant_id, + graph_reader=reader, + ) + return WorkloadReadinessResponse(**snapshot) diff --git a/src/api/graph/application/services/graph_query_service.py b/src/api/graph/application/services/graph_query_service.py index 9320b4aea..1e63cfc0c 100644 --- a/src/api/graph/application/services/graph_query_service.py +++ b/src/api/graph/application/services/graph_query_service.py @@ -69,6 +69,40 @@ def search_by_slug( ) return nodes + def list_by_label( + self, + node_type: str, + *, + knowledge_graph_id: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> list[NodeRecord]: + """List nodes of one entity type.""" + nodes = self._repository.find_nodes_by_label( + node_type, + knowledge_graph_id=knowledge_graph_id, + limit=limit, + offset=offset, + ) + self._probe.slug_searched( + slug=f"type:{node_type}", + node_type=node_type, + result_count=len(nodes), + ) + return nodes + + def count_by_label( + self, + node_type: str, + *, + knowledge_graph_id: str | None = None, + ) -> int: + """Count nodes of one entity type.""" + return self._repository.count_nodes_by_label( + node_type, + knowledge_graph_id=knowledge_graph_id, + ) + def get_neighbors( self, node_id: str, diff --git a/src/api/graph/infrastructure/graph_repository.py b/src/api/graph/infrastructure/graph_repository.py index 17aea8926..e5802f3d7 100644 --- a/src/api/graph/infrastructure/graph_repository.py +++ b/src/api/graph/infrastructure/graph_repository.py @@ -137,6 +137,147 @@ def find_nodes_by_slug( return nodes + def find_nodes_by_label( + self, + node_type: str, + *, + knowledge_graph_id: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> list[NodeRecord]: + """List nodes of one entity type, optionally scoped to a knowledge graph.""" + bounded_limit = max(1, min(limit, 500)) + bounded_offset = max(0, offset) + kg_filter = ( + f", knowledge_graph_id: '{knowledge_graph_id}'" + if knowledge_graph_id + else "" + ) + query = f""" + MATCH (n:{node_type} {{graph_id: '{self._graph_id}'{kg_filter}}}) + RETURN {{node: n}} + SKIP {bounded_offset} + LIMIT {bounded_limit} + """ + result = self._client.execute_cypher(query) + + nodes: list[NodeRecord] = [] + for row in result.rows: + if len(row) > 0 and isinstance(row[0], dict): + result_map = row[0] + if "node" in result_map and result_map["node"] is not None: + nodes.append(self._vertex_to_node_record(result_map["node"])) + return nodes + + def count_nodes_by_label( + self, + node_type: str, + *, + knowledge_graph_id: str | None = None, + ) -> int: + """Count nodes of one entity type within an optional knowledge graph scope.""" + kg_filter = ( + f", knowledge_graph_id: '{knowledge_graph_id}'" + if knowledge_graph_id + else "" + ) + query = f""" + MATCH (n:{node_type} {{graph_id: '{self._graph_id}'{kg_filter}}}) + RETURN count(n) AS total + """ + result = self._client.execute_cypher(query) + if not result.rows: + return 0 + row = result.rows[0] + if not row: + return 0 + value = row[0] + if isinstance(value, dict) and "total" in value: + return int(value["total"]) + return int(value) + + def find_relationship_instances( + self, + relationship_label: str, + *, + knowledge_graph_id: str | None = None, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> list[tuple[EdgeRecord, NodeRecord, NodeRecord]]: + """List relationship instances with resolved source and target nodes.""" + bounded_limit = max(1, min(limit, 500)) + bounded_offset = max(0, offset) + source_type = f":{source_entity_type}" if source_entity_type else "" + target_type = f":{target_entity_type}" if target_entity_type else "" + kg_filter = ( + f", knowledge_graph_id: '{knowledge_graph_id}'" + if knowledge_graph_id + else "" + ) + query = f""" + MATCH (source{source_type})-[edge:{relationship_label} {{ + graph_id: '{self._graph_id}'{kg_filter} + }}]->(target{target_type}) + RETURN {{edge: edge, source: source, target: target}} + SKIP {bounded_offset} + LIMIT {bounded_limit} + """ + result = self._client.execute_cypher(query) + + instances: list[tuple[EdgeRecord, NodeRecord, NodeRecord]] = [] + for row in result.rows: + if len(row) == 0 or not isinstance(row[0], dict): + continue + result_map = row[0] + edge_vertex = result_map.get("edge") + source_vertex = result_map.get("source") + target_vertex = result_map.get("target") + if edge_vertex is None or source_vertex is None or target_vertex is None: + continue + instances.append( + ( + self._edge_to_edge_record(edge_vertex), + self._vertex_to_node_record(source_vertex), + self._vertex_to_node_record(target_vertex), + ) + ) + return instances + + def count_relationship_instances( + self, + relationship_label: str, + *, + knowledge_graph_id: str | None = None, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + ) -> int: + """Count relationship instances matching optional endpoint type filters.""" + source_type = f":{source_entity_type}" if source_entity_type else "" + target_type = f":{target_entity_type}" if target_entity_type else "" + kg_filter = ( + f", knowledge_graph_id: '{knowledge_graph_id}'" + if knowledge_graph_id + else "" + ) + query = f""" + MATCH (source{source_type})-[edge:{relationship_label} {{ + graph_id: '{self._graph_id}'{kg_filter} + }}]->(target{target_type}) + RETURN count(edge) AS total + """ + result = self._client.execute_cypher(query) + if not result.rows: + return 0 + row = result.rows[0] + if not row: + return 0 + value = row[0] + if isinstance(value, dict) and "total" in value: + return int(value["total"]) + return int(value) + def get_neighbors( self, node_id: str, diff --git a/src/api/graph/ports/repositories.py b/src/api/graph/ports/repositories.py index a8dd5877a..f99a38b01 100644 --- a/src/api/graph/ports/repositories.py +++ b/src/api/graph/ports/repositories.py @@ -54,6 +54,50 @@ def find_nodes_by_slug( """ ... + def find_nodes_by_label( + self, + node_type: str, + *, + knowledge_graph_id: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> list[NodeRecord]: + """List nodes of one entity type, optionally scoped to a knowledge graph.""" + ... + + def count_nodes_by_label( + self, + node_type: str, + *, + knowledge_graph_id: str | None = None, + ) -> int: + """Count nodes of one entity type within an optional knowledge graph scope.""" + ... + + def find_relationship_instances( + self, + relationship_label: str, + *, + knowledge_graph_id: str | None = None, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> list[tuple]: + """List relationship instances with source and target nodes.""" + ... + + def count_relationship_instances( + self, + relationship_label: str, + *, + knowledge_graph_id: str | None = None, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + ) -> int: + """Count relationship instances matching optional endpoint filters.""" + ... + def get_neighbors( self, node_id: str, diff --git a/src/api/infrastructure/extraction_workload/dependencies.py b/src/api/infrastructure/extraction_workload/dependencies.py index 4f34f15fd..a3638c9ee 100644 --- a/src/api/infrastructure/extraction_workload/dependencies.py +++ b/src/api/infrastructure/extraction_workload/dependencies.py @@ -50,4 +50,5 @@ def get_workload_schema_service( settings=get_database_settings(), session=session, ), + graph_reader=GraphWorkloadGraphReader(pool=pool, settings=get_database_settings()), ) diff --git a/src/api/infrastructure/extraction_workload/graph_reader.py b/src/api/infrastructure/extraction_workload/graph_reader.py index 52ab33a90..791c6af02 100644 --- a/src/api/infrastructure/extraction_workload/graph_reader.py +++ b/src/api/infrastructure/extraction_workload/graph_reader.py @@ -10,7 +10,7 @@ from infrastructure.database.connection_pool import ConnectionPool from infrastructure.settings import DatabaseSettings -from extraction.ports.workload_graph import IWorkloadGraphReader, WorkloadGraphNode +from extraction.ports.workload_graph import IWorkloadGraphReader, WorkloadGraphNode, WorkloadGraphRelationship class GraphWorkloadGraphReader(IWorkloadGraphReader): @@ -59,3 +59,157 @@ async def search_by_slug( ] finally: client.disconnect() + + async def list_instances_by_type( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + entity_type: str, + limit: int = 100, + offset: int = 0, + ) -> tuple[list[WorkloadGraphNode], int]: + graph_name = f"tenant_{tenant_id}" + factory = ConnectionFactory(self._settings, pool=self._pool) + client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) + client.connect() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=graph_name, + ) + service = GraphQueryService(repository=repository, probe=DefaultGraphServiceProbe()) + bounded_limit = max(1, min(limit, 500)) + bounded_offset = max(0, offset) + total = service.count_by_label( + entity_type, + knowledge_graph_id=knowledge_graph_id, + ) + nodes = service.list_by_label( + entity_type, + knowledge_graph_id=knowledge_graph_id, + limit=bounded_limit, + offset=bounded_offset, + ) + serialized = [ + WorkloadGraphNode( + id=node.id, + entity_type=node.label, + slug=node.properties.get("slug"), + properties=node.properties, + ) + for node in nodes + ] + return serialized, total + finally: + client.disconnect() + + async def count_entity_instances_by_type( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + entity_type: str, + ) -> int: + graph_name = f"tenant_{tenant_id}" + factory = ConnectionFactory(self._settings, pool=self._pool) + client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) + client.connect() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=graph_name, + ) + service = GraphQueryService(repository=repository, probe=DefaultGraphServiceProbe()) + return service.count_by_label( + entity_type, + knowledge_graph_id=knowledge_graph_id, + ) + finally: + client.disconnect() + + @staticmethod + def _slug_from_node(node) -> str | None: + slug = node.properties.get("slug") + return str(slug) if slug else None + + async def list_relationship_instances( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + relationship_type: str, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> tuple[list[WorkloadGraphRelationship], int]: + graph_name = f"tenant_{tenant_id}" + factory = ConnectionFactory(self._settings, pool=self._pool) + client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) + client.connect() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=graph_name, + ) + bounded_limit = max(1, min(limit, 500)) + bounded_offset = max(0, offset) + total = repository.count_relationship_instances( + relationship_type, + knowledge_graph_id=knowledge_graph_id, + source_entity_type=source_entity_type, + target_entity_type=target_entity_type, + ) + rows = repository.find_relationship_instances( + relationship_type, + knowledge_graph_id=knowledge_graph_id, + source_entity_type=source_entity_type, + target_entity_type=target_entity_type, + limit=bounded_limit, + offset=bounded_offset, + ) + relationships = [ + WorkloadGraphRelationship( + id=edge.id, + relationship_type=edge.label, + start_id=edge.start_id, + end_id=edge.end_id, + source_slug=self._slug_from_node(source), + target_slug=self._slug_from_node(target), + source_entity_type=source.label, + target_entity_type=target.label, + properties=edge.properties, + ) + for edge, source, target in rows + ] + return relationships, total + finally: + client.disconnect() + + async def count_relationship_instances( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + relationship_type: str, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + ) -> int: + graph_name = f"tenant_{tenant_id}" + factory = ConnectionFactory(self._settings, pool=self._pool) + client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) + client.connect() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=graph_name, + ) + return repository.count_relationship_instances( + relationship_type, + knowledge_graph_id=knowledge_graph_id, + source_entity_type=source_entity_type, + target_entity_type=target_entity_type, + ) + finally: + client.disconnect() diff --git a/src/api/infrastructure/extraction_workload/schema_service.py b/src/api/infrastructure/extraction_workload/schema_service.py index 75e005577..94d3b716b 100644 --- a/src/api/infrastructure/extraction_workload/schema_service.py +++ b/src/api/infrastructure/extraction_workload/schema_service.py @@ -12,6 +12,9 @@ from infrastructure.extraction_workload.graph_mutation_writer import ( GraphWorkloadGraphMutationWriter, ) +from infrastructure.extraction_workload.workspace_readiness import ( + sync_prepopulated_instance_counts, +) from management.domain.value_objects import OntologyConfig from management.ports.exceptions import CanonicalSchemaMutationError @@ -24,10 +27,12 @@ def __init__( session: AsyncSession, *, mutation_writer: GraphWorkloadGraphMutationWriter, + graph_reader=None, ) -> None: self._session = session self._repository = GraphCanonicalSchemaRepository(session) self._mutation_writer = mutation_writer + self._graph_reader = graph_reader async def get_ontology(self, *, knowledge_graph_id: str) -> OntologyConfig | None: return await self._repository.get_ontology(knowledge_graph_id) @@ -87,5 +92,17 @@ async def apply_mutation_jsonl( await self._session.rollback() return {"applied": False, "errors": errors} + if instance_ops and self._graph_reader is not None: + ontology = await self.get_ontology(knowledge_graph_id=knowledge_graph_id) + if ontology is not None: + synced = await sync_prepopulated_instance_counts( + ontology=ontology, + knowledge_graph_id=knowledge_graph_id, + tenant_id=tenant_id, + graph_reader=self._graph_reader, + ) + if synced is not ontology: + await self._repository.replace_ontology(knowledge_graph_id, synced) + await self._session.commit() return {"applied": True, "errors": [], "operations_applied": operations_applied} diff --git a/src/api/infrastructure/extraction_workload/workspace_readiness.py b/src/api/infrastructure/extraction_workload/workspace_readiness.py new file mode 100644 index 000000000..734355b39 --- /dev/null +++ b/src/api/infrastructure/extraction_workload/workspace_readiness.py @@ -0,0 +1,182 @@ +"""Build workspace readiness snapshots for Graph Management Assistant tools.""" + +from __future__ import annotations + +from dataclasses import replace + +from management.application.workspace_readiness import ( + evaluate_workspace_readiness, + prepopulated_gaps_from_live_counts, +) +from management.domain.ontology_prepopulation import relationship_readiness_key +from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig + + +async def build_workload_readiness_snapshot( + *, + ontology: OntologyConfig | None, + knowledge_graph_id: str, + tenant_id: str, + graph_reader, +) -> dict[str, object]: + """Merge canonical readiness metadata with live graph instance counts.""" + metadata_readiness = evaluate_workspace_readiness(ontology) + + entity_instance_counts: dict[str, int] = {} + relationship_instance_counts: dict[str, int] = {} + + if ontology is not None: + for node_type in ontology.node_types: + if not node_type.prepopulated: + continue + entity_instance_counts[node_type.label] = await graph_reader.count_entity_instances_by_type( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + entity_type=node_type.label, + ) + + for edge_type in ontology.edge_types: + if not edge_type.prepopulated: + continue + key = relationship_readiness_key(edge_type) + source_label = edge_type.source_labels[0] if edge_type.source_labels else None + target_label = edge_type.target_labels[0] if edge_type.target_labels else None + relationship_instance_counts[key] = await graph_reader.count_relationship_instances( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + relationship_type=edge_type.label, + source_entity_type=source_label, + target_entity_type=target_label, + ) + + live_gaps = prepopulated_gaps_from_live_counts( + ontology, + entity_instance_counts=entity_instance_counts, + relationship_instance_counts=relationship_instance_counts, + ) + + prepopulated_entity_types = [ + { + "label": node_type.label, + "metadata_instance_count": node_type.prepopulated_instance_count, + "live_instance_count": entity_instance_counts.get(node_type.label, 0), + } + for node_type in (ontology.node_types if ontology else ()) + if node_type.prepopulated + ] + + prepopulated_relationship_types = [ + { + "key": relationship_readiness_key(edge_type), + "relationship_type": edge_type.label, + "source_entity_type": edge_type.source_labels[0] if edge_type.source_labels else "", + "target_entity_type": edge_type.target_labels[0] if edge_type.target_labels else "", + "metadata_instance_count": edge_type.prepopulated_instance_count, + "live_instance_count": relationship_instance_counts.get( + relationship_readiness_key(edge_type), + 0, + ), + } + for edge_type in (ontology.edge_types if ontology else ()) + if edge_type.prepopulated + ] + + live_entity_gaps = live_gaps["entity_types_without_instances"] + live_relationship_gaps = live_gaps["relationship_types_without_instances"] + live_prepopulated_ready = len(live_entity_gaps) == 0 and len(live_relationship_gaps) == 0 + + blocking_reasons = list(metadata_readiness.blocking_reasons) + if live_entity_gaps and not any("Prepopulated entity types" in reason for reason in blocking_reasons): + blocking_reasons.append( + "Live graph missing prepopulated entity instances: " + + ", ".join(live_entity_gaps) + ) + if live_relationship_gaps and not any( + "Prepopulated relationship types" in reason for reason in blocking_reasons + ): + blocking_reasons.append( + "Live graph missing prepopulated relationship instances: " + + ", ".join(live_relationship_gaps) + ) + + transition_eligible = ( + metadata_readiness.has_minimum_entity_types + and metadata_readiness.has_minimum_relationship_types + and live_prepopulated_ready + ) + + return { + "knowledge_graph_id": knowledge_graph_id, + "has_minimum_entity_types": metadata_readiness.has_minimum_entity_types, + "has_minimum_relationship_types": metadata_readiness.has_minimum_relationship_types, + "prepopulated_types_ready_metadata": metadata_readiness.prepopulated_types_ready, + "prepopulated_types_ready_live": live_prepopulated_ready, + "prepopulated_types_without_instances_metadata": list( + metadata_readiness.prepopulated_types_without_instances + ), + "prepopulated_relationship_types_without_instances_metadata": list( + metadata_readiness.prepopulated_relationship_types_without_instances + ), + "prepopulated_entity_types_without_instances_live": list(live_entity_gaps), + "prepopulated_relationship_types_without_instances_live": list(live_relationship_gaps), + "prepopulated_entity_types": prepopulated_entity_types, + "prepopulated_relationship_types": prepopulated_relationship_types, + "blocking_reasons": blocking_reasons, + "transition_eligible": transition_eligible, + } + + +async def sync_prepopulated_instance_counts( + *, + ontology: OntologyConfig, + knowledge_graph_id: str, + tenant_id: str, + graph_reader, +) -> OntologyConfig: + """Refresh ontology metadata counts from live graph instance totals.""" + updated_nodes: list[NodeTypeDefinition] = [] + nodes_changed = False + for node_type in ontology.node_types: + if not node_type.prepopulated: + updated_nodes.append(node_type) + continue + live_count = await graph_reader.count_entity_instances_by_type( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + entity_type=node_type.label, + ) + if live_count != node_type.prepopulated_instance_count: + nodes_changed = True + updated_nodes.append( + replace(node_type, prepopulated_instance_count=live_count), + ) + + updated_edges: list[EdgeTypeDefinition] = [] + edges_changed = False + for edge_type in ontology.edge_types: + if not edge_type.prepopulated: + updated_edges.append(edge_type) + continue + source_label = edge_type.source_labels[0] if edge_type.source_labels else None + target_label = edge_type.target_labels[0] if edge_type.target_labels else None + live_count = await graph_reader.count_relationship_instances( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + relationship_type=edge_type.label, + source_entity_type=source_label, + target_entity_type=target_label, + ) + if live_count != edge_type.prepopulated_instance_count: + edges_changed = True + updated_edges.append( + replace(edge_type, prepopulated_instance_count=live_count), + ) + + if not nodes_changed and not edges_changed: + return ontology + + return replace( + ontology, + node_types=tuple(updated_nodes), + edge_types=tuple(updated_edges), + ) diff --git a/src/api/management/application/services/knowledge_graph_service.py b/src/api/management/application/services/knowledge_graph_service.py index bcb7c0cf8..67f6a0f8b 100644 --- a/src/api/management/application/services/knowledge_graph_service.py +++ b/src/api/management/application/services/knowledge_graph_service.py @@ -830,58 +830,9 @@ def _evaluate_workspace_readiness( self, ontology: OntologyConfig | None ) -> WorkspaceReadinessStatus: """Evaluate transition readiness flags from canonical schema state.""" - node_type_count = len(ontology.node_types) if ontology else 0 - edge_type_count = len(ontology.edge_types) if ontology else 0 - from management.domain.ontology_prepopulation import relationship_readiness_key - - prepopulated_without_instances: tuple[str, ...] = () - prepopulated_relationships_without_instances: tuple[str, ...] = () - if ontology is not None: - prepopulated_without_instances = tuple( - node_type.label - for node_type in ontology.node_types - if node_type.prepopulated and node_type.prepopulated_instance_count <= 0 - ) - prepopulated_relationships_without_instances = tuple( - relationship_readiness_key(edge_type) - for edge_type in ontology.edge_types - if edge_type.prepopulated and edge_type.prepopulated_instance_count <= 0 - ) + from management.application.workspace_readiness import evaluate_workspace_readiness - has_min_entities = node_type_count >= 1 - has_min_relationships = edge_type_count >= 1 - prepopulated_ready = ( - len(prepopulated_without_instances) == 0 - and len(prepopulated_relationships_without_instances) == 0 - ) - - blocking_reasons: list[str] = [] - if not has_min_entities: - blocking_reasons.append("At least one entity type is required") - if not has_min_relationships: - blocking_reasons.append("At least one relationship type is required") - if prepopulated_without_instances: - labels = ", ".join(prepopulated_without_instances) - blocking_reasons.append( - f"Prepopulated entity types require instances before transition: {labels}" - ) - if prepopulated_relationships_without_instances: - labels = ", ".join(prepopulated_relationships_without_instances) - blocking_reasons.append( - "Prepopulated relationship types require instances before transition: " - f"{labels}" - ) - - return WorkspaceReadinessStatus( - has_minimum_entity_types=has_min_entities, - has_minimum_relationship_types=has_min_relationships, - prepopulated_types_ready=prepopulated_ready, - prepopulated_types_without_instances=prepopulated_without_instances, - prepopulated_relationship_types_without_instances=( - prepopulated_relationships_without_instances - ), - blocking_reasons=tuple(blocking_reasons), - ) + return evaluate_workspace_readiness(ontology) async def get_workspace_status( self, diff --git a/src/api/management/application/workspace_readiness.py b/src/api/management/application/workspace_readiness.py new file mode 100644 index 000000000..0ced02c13 --- /dev/null +++ b/src/api/management/application/workspace_readiness.py @@ -0,0 +1,91 @@ +"""Workspace bootstrap readiness evaluation for knowledge graphs.""" + +from __future__ import annotations + +from management.domain.ontology_prepopulation import relationship_readiness_key +from management.domain.value_objects import OntologyConfig, WorkspaceReadinessStatus + + +def evaluate_workspace_readiness(ontology: OntologyConfig | None) -> WorkspaceReadinessStatus: + """Evaluate transition readiness flags from canonical schema metadata.""" + node_type_count = len(ontology.node_types) if ontology else 0 + edge_type_count = len(ontology.edge_types) if ontology else 0 + + prepopulated_without_instances: tuple[str, ...] = () + prepopulated_relationships_without_instances: tuple[str, ...] = () + if ontology is not None: + prepopulated_without_instances = tuple( + node_type.label + for node_type in ontology.node_types + if node_type.prepopulated and node_type.prepopulated_instance_count <= 0 + ) + prepopulated_relationships_without_instances = tuple( + relationship_readiness_key(edge_type) + for edge_type in ontology.edge_types + if edge_type.prepopulated and edge_type.prepopulated_instance_count <= 0 + ) + + has_min_entities = node_type_count >= 1 + has_min_relationships = edge_type_count >= 1 + prepopulated_ready = ( + len(prepopulated_without_instances) == 0 + and len(prepopulated_relationships_without_instances) == 0 + ) + + blocking_reasons: list[str] = [] + if not has_min_entities: + blocking_reasons.append("At least one entity type is required") + if not has_min_relationships: + blocking_reasons.append("At least one relationship type is required") + if prepopulated_without_instances: + labels = ", ".join(prepopulated_without_instances) + blocking_reasons.append( + f"Prepopulated entity types require instances before transition: {labels}" + ) + if prepopulated_relationships_without_instances: + labels = ", ".join(prepopulated_relationships_without_instances) + blocking_reasons.append( + "Prepopulated relationship types require instances before transition: " + f"{labels}" + ) + + return WorkspaceReadinessStatus( + has_minimum_entity_types=has_min_entities, + has_minimum_relationship_types=has_min_relationships, + prepopulated_types_ready=prepopulated_ready, + prepopulated_types_without_instances=prepopulated_without_instances, + prepopulated_relationship_types_without_instances=( + prepopulated_relationships_without_instances + ), + blocking_reasons=tuple(blocking_reasons), + ) + + +def prepopulated_gaps_from_live_counts( + ontology: OntologyConfig | None, + *, + entity_instance_counts: dict[str, int], + relationship_instance_counts: dict[str, int], +) -> dict[str, tuple[str, ...]]: + """Return prepopulated type labels/keys with zero live graph instances.""" + if ontology is None: + return { + "entity_types_without_instances": (), + "relationship_types_without_instances": (), + } + + entity_gaps = tuple( + node_type.label + for node_type in ontology.node_types + if node_type.prepopulated and entity_instance_counts.get(node_type.label, 0) <= 0 + ) + relationship_gaps = tuple( + relationship_readiness_key(edge_type) + for edge_type in ontology.edge_types + if edge_type.prepopulated + and relationship_instance_counts.get(relationship_readiness_key(edge_type), 0) <= 0 + ) + return { + "entity_types_without_instances": entity_gaps, + "relationship_types_without_instances": relationship_gaps, + } diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 0d8e06983..39990c3df 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -38,8 +38,12 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): assert "entity_type_authoring" in resolved.skills assert "relationship_type_authoring" in resolved.skills assert "instance_authoring" in resolved.skills + assert "instance_generation" in resolved.skills + assert "relationship_instance_authoring" in resolved.skills assert "schema_tools" in resolved.skills assert "kartograph_get_schema_ontology" in resolved.skills["schema_tools"] + assert "kartograph_get_workspace_readiness" in resolved.skills["schema_tools"] + assert "Read" in resolved.skills["schema_tools"] assert "prepopulation_validation" in resolved.skills assert "capabilities_intake" in resolved.skills assert "goal" in resolved.system_prompt.lower() diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index a4fc852f0..58f814636 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -132,6 +132,7 @@ def test_materializer_writes_sources_index(tmp_path: Path) -> None: assert source["entry_count"] == 1 assert source["sample_paths"] == ["pkg/api/example.go"] assert source["repository_root"] == "repository-files/hyperfleet-e2e" + assert source["file_extension_counts"] == {".go": 1} def test_materializer_refresh_preserves_session_root_directory(tmp_path: Path) -> None: diff --git a/src/api/tests/unit/extraction/presentation/test_workload_routes.py b/src/api/tests/unit/extraction/presentation/test_workload_routes.py index 3a97182e8..e5ac7c897 100644 --- a/src/api/tests/unit/extraction/presentation/test_workload_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_workload_routes.py @@ -9,8 +9,12 @@ from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer from extraction.presentation import workload_routes from extraction.presentation.workload_auth import WorkloadAuthContext, get_workload_auth_context -from infrastructure.extraction_workload.dependencies import get_workload_schema_service -from management.domain.value_objects import OntologyConfig +from extraction.ports.workload_graph import WorkloadGraphNode, WorkloadGraphRelationship +from infrastructure.extraction_workload.dependencies import ( + get_workload_graph_reader, + get_workload_schema_service, +) +from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig class _FakeSchemaService: @@ -41,9 +45,75 @@ async def apply_mutation_jsonl( return {"applied": True, "errors": []} +class _FakeGraphReader: + async def search_by_slug(self, **kwargs): + return [] + + async def list_instances_by_type(self, **kwargs): + return ( + [ + WorkloadGraphNode( + id="service:abc", + entity_type="service", + slug="api-gateway", + properties={"name": "api-gateway"}, + ) + ], + 1, + ) + + async def count_entity_instances_by_type(self, **kwargs): + entity_type = kwargs.get("entity_type") + if entity_type == "service": + return 1 + return 0 + + async def list_relationship_instances(self, **kwargs): + return ( + [ + WorkloadGraphRelationship( + id="contains:abc", + relationship_type="contains", + start_id="folder:aaa", + end_id="file:bbb", + source_slug="root-hyperfleet", + target_slug="pkg-api-example-go", + source_entity_type="folder", + target_entity_type="source_file", + properties={}, + ) + ], + 1, + ) + + async def count_relationship_instances(self, **kwargs): + relationship_type = kwargs.get("relationship_type") + if relationship_type == "contains": + return 1 + return 0 + + @pytest.fixture def workload_client() -> tuple[TestClient, _FakeSchemaService, str]: fake = _FakeSchemaService() + fake.saved = OntologyConfig( + node_types=( + NodeTypeDefinition(label="service", prepopulated=True, prepopulated_instance_count=0), + ), + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("folder",), + target_labels=("source_file",), + prepopulated=True, + ), + EdgeTypeDefinition( + label="depends_on", + source_labels=("service",), + target_labels=("service",), + ), + ), + ) issuer = ScopedWorkloadCredentialIssuer(default_ttl=__import__("datetime").timedelta(minutes=10)) credentials = issuer.issue_for_sticky_session( tenant_id="tenant-1", @@ -53,6 +123,7 @@ def workload_client() -> tuple[TestClient, _FakeSchemaService, str]: app = FastAPI() app.include_router(workload_routes.router, prefix="/extraction") app.dependency_overrides[get_workload_schema_service] = lambda: fake + app.dependency_overrides[get_workload_graph_reader] = lambda: _FakeGraphReader() app.dependency_overrides[get_workload_auth_context] = lambda: WorkloadAuthContext( credentials=credentials, tenant_id="tenant-1", @@ -71,6 +142,53 @@ def test_workload_get_schema_authoring_guide(workload_client: tuple[TestClient, ) assert response.status_code == 200 assert "kartograph_get_schema_ontology" in response.json()["guide"] + assert "Instance generation cookbook" in response.json()["guide"] + + +def test_workload_get_workspace_readiness(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, _fake, token = workload_client + response = client.get( + "/extraction/workloads/schema/readiness", + headers={"X-Workload-Token": token}, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["knowledge_graph_id"] == "kg-1" + assert payload["prepopulated_entity_types_without_instances_live"] == [] + assert payload["prepopulated_entity_types"][0]["live_instance_count"] == 1 + assert payload["prepopulated_entity_types"][0]["label"] == "service" + + +def test_workload_list_instances_by_type(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, _fake, token = workload_client + response = client.get( + "/extraction/workloads/graph/instances", + headers={"X-Workload-Token": token}, + params={"entity_type": "service"}, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["entity_type"] == "service" + assert payload["count"] == 1 + assert payload["nodes"][0]["slug"] == "api-gateway" + + +def test_workload_list_relationship_instances(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, _fake, token = workload_client + response = client.get( + "/extraction/workloads/graph/relationships", + headers={"X-Workload-Token": token}, + params={ + "relationship_type": "contains", + "source_entity_type": "folder", + "target_entity_type": "source_file", + }, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["relationship_type"] == "contains" + assert payload["count"] == 1 + assert payload["relationships"][0]["source_slug"] == "root-hyperfleet" def test_workload_save_schema_ontology(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py b/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py new file mode 100644 index 000000000..fb0c7c65a --- /dev/null +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py @@ -0,0 +1,76 @@ +"""Unit tests for workload readiness snapshot helpers.""" + +from __future__ import annotations + +import pytest + +from infrastructure.extraction_workload.workspace_readiness import ( + build_workload_readiness_snapshot, + sync_prepopulated_instance_counts, +) +from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig + + +class _FakeGraphReader: + async def count_entity_instances_by_type(self, **kwargs): + entity_type = kwargs.get("entity_type") + return {"service": 2, "folder": 0}.get(entity_type, 0) + + async def count_relationship_instances(self, **kwargs): + relationship_type = kwargs.get("relationship_type") + return 1 if relationship_type == "contains" else 0 + + +@pytest.mark.asyncio +async def test_build_workload_readiness_snapshot_reports_live_relationship_gaps() -> None: + ontology = OntologyConfig( + node_types=( + NodeTypeDefinition(label="folder", prepopulated=True), + NodeTypeDefinition(label="service", prepopulated=True, prepopulated_instance_count=0), + ), + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("folder",), + target_labels=("source_file",), + prepopulated=True, + ), + ), + ) + + snapshot = await build_workload_readiness_snapshot( + ontology=ontology, + knowledge_graph_id="kg-1", + tenant_id="tenant-1", + graph_reader=_FakeGraphReader(), + ) + + assert "folder" in snapshot["prepopulated_entity_types_without_instances_live"] + assert snapshot["prepopulated_types_ready_live"] is False + assert snapshot["prepopulated_relationship_types"][0]["live_instance_count"] == 1 + + +@pytest.mark.asyncio +async def test_sync_prepopulated_instance_counts_updates_metadata() -> None: + ontology = OntologyConfig( + node_types=(NodeTypeDefinition(label="service", prepopulated=True, prepopulated_instance_count=0),), + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("folder",), + target_labels=("source_file",), + prepopulated=True, + prepopulated_instance_count=0, + ), + ), + ) + + synced = await sync_prepopulated_instance_counts( + ontology=ontology, + knowledge_graph_id="kg-1", + tenant_id="tenant-1", + graph_reader=_FakeGraphReader(), + ) + + assert synced.node_types[0].prepopulated_instance_count == 2 + assert synced.edge_types[0].prepopulated_instance_count == 1 diff --git a/src/api/tests/unit/management/application/test_workspace_readiness.py b/src/api/tests/unit/management/application/test_workspace_readiness.py new file mode 100644 index 000000000..1ee5f2bfa --- /dev/null +++ b/src/api/tests/unit/management/application/test_workspace_readiness.py @@ -0,0 +1,59 @@ +"""Unit tests for workspace readiness evaluation.""" + +from __future__ import annotations + +from management.application.workspace_readiness import ( + evaluate_workspace_readiness, + prepopulated_gaps_from_live_counts, +) +from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig + + +def test_evaluate_workspace_readiness_flags_missing_prepopulated_entity_types() -> None: + config = OntologyConfig( + node_types=( + NodeTypeDefinition(label="service", prepopulated=True, prepopulated_instance_count=0), + NodeTypeDefinition(label="team"), + ), + edge_types=( + EdgeTypeDefinition( + label="owns", + source_labels=("team",), + target_labels=("service",), + ), + ), + ) + + readiness = evaluate_workspace_readiness(config) + + assert readiness.has_minimum_entity_types is True + assert readiness.has_minimum_relationship_types is True + assert readiness.prepopulated_types_ready is False + assert readiness.prepopulated_types_without_instances == ("service",) + assert any("service" in reason for reason in readiness.blocking_reasons) + + +def test_prepopulated_gaps_from_live_counts_uses_graph_counts() -> None: + config = OntologyConfig( + node_types=( + NodeTypeDefinition(label="folder", prepopulated=True), + NodeTypeDefinition(label="file", prepopulated=True), + ), + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("folder",), + target_labels=("file",), + prepopulated=True, + ), + ), + ) + + gaps = prepopulated_gaps_from_live_counts( + config, + entity_instance_counts={"folder": 3, "file": 0}, + relationship_instance_counts={"folder|contains|file": 0}, + ) + + assert gaps["entity_types_without_instances"] == ("file",) + assert gaps["relationship_types_without_instances"] == ("folder|contains|file",) From 2f041de6200a8796e6f40c3c606271f19156c115 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 4 Jun 2026 16:50:04 -0400 Subject: [PATCH 100/153] feat(extraction): add GMA bulk prepopulation and strict mutation preflight Equip the Graph Management Assistant with Bash workspace generators, JSONL validate/apply-from-file tools, slug batch checks, and CREATE preflight so bootstrap prepopulation cannot duplicate existing instances. Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/extraction/chat-turns.spec.md | 15 ++ specs/graph/schema-authoring.spec.md | 25 +++ .../kartograph_agent_runtime/agent_prompt.py | 49 ++++-- .../kartograph_agent_runtime/executor.py | 16 +- .../kartograph_agent_runtime/schema_tools.py | 119 ++++++++++++- .../kartograph_agent_runtime/tools.py | 40 +++++ .../workspace_paths.py | 21 +++ src/agent-runtime/tests/test_agent_prompt.py | 22 +++ src/agent-runtime/tests/test_schema_tools.py | 9 + .../tests/test_workspace_paths.py | 21 +++ .../application/schema_authoring_guide.py | 28 +++- .../application/skill_resolution_service.py | 67 +------- .../instance_generator_templates/README.md | 50 ++++++ .../instance_generator_templates/__init__.py | 14 ++ .../data_source.py | 32 ++++ .../instance_generator_templates/folder.py | 49 ++++++ .../json_instances_to_jsonl.py | 137 +++++++++++++++ .../json_relationships_to_jsonl.py | 156 ++++++++++++++++++ .../source_file.py | 59 +++++++ .../sticky_session_workdir_materializer.py | 15 ++ src/api/extraction/ports/workload_graph.py | 42 +++++ src/api/extraction/ports/workload_schema.py | 9 + .../presentation/workload_routes.py | 79 +++++++++ .../graph/infrastructure/graph_repository.py | 83 ++++++++++ .../ontology_mutation_builder.py | 10 +- .../canonical_schema/ontology_projection.py | 13 ++ .../extraction_workload/graph_reader.py | 117 +++++++++++++ .../extraction_workload/mutation_preflight.py | 124 ++++++++++++++ .../extraction_workload/schema_service.py | 53 +++++- src/api/management/domain/value_objects.py | 22 ++- .../presentation/knowledge_graphs/models.py | 12 ++ .../test_skill_resolution_service.py | 18 +- .../test_json_instances_to_jsonl.py | 71 ++++++++ .../test_json_relationships_to_jsonl.py | 48 ++++++ ...est_sticky_session_workdir_materializer.py | 24 +++ .../presentation/test_workload_routes.py | 52 +++++- .../unit/graph/test_repository_protocol.py | 28 +++- .../test_ontology_projection.py | 40 +++++ .../test_mutation_preflight.py | 55 ++++++ .../test_schema_service.py | 39 +++++ 40 files changed, 1776 insertions(+), 107 deletions(-) create mode 100644 src/agent-runtime/kartograph_agent_runtime/workspace_paths.py create mode 100644 src/agent-runtime/tests/test_workspace_paths.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/README.md create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/__init__.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/data_source.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/folder.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/json_instances_to_jsonl.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/json_relationships_to_jsonl.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/source_file.py create mode 100644 src/api/infrastructure/extraction_workload/mutation_preflight.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_json_instances_to_jsonl.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_json_relationships_to_jsonl.py create mode 100644 src/api/tests/unit/infrastructure/canonical_schema/test_ontology_projection.py create mode 100644 src/api/tests/unit/infrastructure/extraction_workload/test_mutation_preflight.py diff --git a/specs/extraction/chat-turns.spec.md b/specs/extraction/chat-turns.spec.md index f5679f9f9..6cb080f76 100644 --- a/specs/extraction/chat-turns.spec.md +++ b/specs/extraction/chat-turns.spec.md @@ -79,3 +79,18 @@ The system SHALL reset sticky session runtime when clear-chat is invoked. - WHEN the user clicks Clear chat - THEN the sticky container is reset - AND a new clean session is started for the same scope + +### Requirement: Graph Management Assistant Tooling +The system SHALL expose schema, mutation, and workspace tooling appropriate for bootstrap prepopulation workflows. + +#### Scenario: Bash and workspace generators +- GIVEN an active graph-management chat turn in schema bootstrap mode +- WHEN the agent runtime starts +- THEN Bash is an allowed tool scoped to the session workspace +- AND `instance_generators/` contains example scripts the agent may copy or extend + +#### Scenario: Compact follow-up prompts +- GIVEN a graph-management session with prior user messages in the turn history +- WHEN a follow-up chat message is processed +- THEN the system prompt omits the full skill prose block +- AND still includes live workspace readiness and a short tools summary diff --git a/specs/graph/schema-authoring.spec.md b/specs/graph/schema-authoring.spec.md index 6dece7a7d..9aba27707 100644 --- a/specs/graph/schema-authoring.spec.md +++ b/specs/graph/schema-authoring.spec.md @@ -68,3 +68,28 @@ The system SHALL enforce `prepopulated=true` as a transition-blocking readiness - WHEN readiness is evaluated - THEN validation fails and transition to extraction mode is blocked +### Requirement: Workload Bulk Instance Authoring +The system SHALL support bulk instance authoring for the Graph Management Assistant via workspace files and strict CREATE semantics. + +#### Scenario: Dry-run mutation validation +- GIVEN a JSONL batch of mutation lines for one knowledge graph +- WHEN the assistant calls workload mutation validate +- THEN the system returns validation errors without writing to the graph +- AND CREATE lines that target existing instance ids or slugs are rejected + +#### Scenario: Apply mutations from workspace file +- GIVEN a JSONL file under the sticky session workspace mount +- WHEN the assistant applies mutations from that file path +- THEN the system reads the full file and applies all valid operations in one request + +#### Scenario: Optional instance generator metadata +- GIVEN an entity type with `instance_generator` set to a script name under `instance_generators/` +- WHEN the ontology is saved and read back +- THEN the script name is preserved as authoring metadata for the assistant + +#### Scenario: Session workspace generator templates +- GIVEN a sticky session work directory is prepared +- WHEN the assistant lists `instance_generators/` +- THEN example generator scripts and JSONL converter helpers are present +- AND the assistant may add custom generator scripts alongside them + diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 0f2699505..92311a70c 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -2,7 +2,9 @@ from __future__ import annotations -from typing import Any +from typing import Any, Literal + +PromptDetail = Literal["full", "compact"] from kartograph_agent_runtime.schema_tools import ( KARTOGRAPH_SCHEMA_TOOL_NAMES, @@ -19,33 +21,44 @@ | `kartograph_get_workspace_readiness` | Prepopulated gaps, live instance counts, blocking reasons | | `kartograph_get_schema_ontology` | Read current `node_types` and `edge_types` before every save | | `kartograph_save_schema_ontology` | Replace canonical ontology (read → merge edits → save full payload) | -| `kartograph_apply_graph_mutations` | Apply JSONL CREATE/UPDATE/DELETE instance lines (batch 25–50 lines) | +| `kartograph_validate_graph_mutations` | Dry-run JSONL (strict CREATE — no duplicates) | +| `kartograph_apply_graph_mutations` | Apply JSONL CREATE/UPDATE/DELETE (small batches) | +| `kartograph_validate_graph_mutations_from_file` | Dry-run a workspace `.jsonl` file | +| `kartograph_apply_graph_mutations_from_file` | Apply a workspace `.jsonl` file in one call | | `kartograph_list_instances_by_type` | List/count entity instances for one type (verify prepopulation) | | `kartograph_list_relationship_instances` | List relationship edges with source/target slugs and node IDs | | `kartograph_search_graph_by_slug` | Find existing nodes by slug to avoid duplicates | +| `kartograph_check_graph_slugs` | Batch check which slugs already exist for one entity type | -## Workspace file tools (read-only) +## Workspace tools | Tool | Purpose | |------|---------| | `Read` | Read files under the session workspace mount | | `Grep` | Search file contents in `repository-files/<data_source>/` | | `Glob` | List files by pattern for instance generation | +| `Bash` | Run `instance_generators/*.py` against `repository-files/` (workspace only) | ### Quick workflow 1. `kartograph_get_schema_authoring_guide` 2. `kartograph_get_workspace_readiness` 3. `kartograph_get_schema_ontology` -4. Glob/Grep/Read `repository-files/` to derive instances +4. For large prepopulation: Bash `python3 instance_generators/<template>.py repository-files` 5. Model types → `kartograph_save_schema_ontology` -6. Create entity instances in batches → `kartograph_apply_graph_mutations` -7. Create relationship edges (after entity IDs are known) -8. Verify → `kartograph_list_instances_by_type` and `kartograph_list_relationship_instances` +6. Apply CREATE mutations → `kartograph_apply_graph_mutations` (small fixes inline; bulk via generator output) +7. Create relationship edges after entity IDs are known +8. Verify with `kartograph_list_instances_by_type` and `kartograph_get_workspace_readiness` Writes persist to the platform database for the active knowledge graph. """.strip() +_TOOLS_COMPACT_REFERENCE = ( + "Tools: kartograph_* schema MCP tools, plus Read/Grep/Glob/Bash on the workspace. " + "Bulk prepopulation: Bash generator → `json_instances_to_jsonl.py` → validate-from-file → apply-from-file. " + "CREATE is strict (use UPDATE to edit existing instances)." +) + def _format_workspace_readiness(readiness: dict[str, Any]) -> str: lines = ["## Workspace readiness (live snapshot)"] @@ -112,8 +125,9 @@ def build_agent_system_prompt( workspace_appendix: str = "", workspace_readiness: dict[str, Any] | None = None, include_tools_manifest: bool = True, + prompt_detail: PromptDetail = "full", ) -> str: - """Build the full system prompt with skills, guardrails, tools, and session scope.""" + """Build the system prompt with guardrails, optional skills/tools, and session scope.""" system_prompt = str(agent_configuration.get("system_prompt") or "").strip() guardrails = agent_configuration.get("guardrails") or [] skills = agent_configuration.get("skills") or {} @@ -136,18 +150,21 @@ def build_agent_system_prompt( skill_sections.append(f"**{key}**: {text}") skills_block = "" - if skill_sections: + if prompt_detail == "full" and skill_sections: skills_block = "## Skills\n\n" + "\n\n".join(skill_sections) tools_block = "" if include_tools_manifest and settings is not None and settings.workload_token.strip(): - kartograph_tools = ", ".join(f"`{name}`" for name in KARTOGRAPH_SCHEMA_TOOL_NAMES) - file_tools = ", ".join(f"`{name}`" for name in WORKSPACE_FILE_TOOL_NAMES) - tools_block = ( - f"{_TOOLS_QUICK_REFERENCE}\n\n" - f"Registered Kartograph tools: {kartograph_tools}.\n" - f"Registered workspace file tools: {file_tools}." - ) + if prompt_detail == "compact": + tools_block = f"## Tools\n\n{_TOOLS_COMPACT_REFERENCE}" + else: + kartograph_tools = ", ".join(f"`{name}`" for name in KARTOGRAPH_SCHEMA_TOOL_NAMES) + file_tools = ", ".join(f"`{name}`" for name in WORKSPACE_FILE_TOOL_NAMES) + tools_block = ( + f"{_TOOLS_QUICK_REFERENCE}\n\n" + f"Registered Kartograph tools: {kartograph_tools}.\n" + f"Registered workspace tools: {file_tools}." + ) session_block = "" if settings is not None: diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 37547b0f6..cb198e924 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -30,12 +30,14 @@ def _build_system_prompt( settings: AgentRuntimeSettings | None = None, workspace_appendix: str = "", workspace_readiness: dict[str, Any] | None = None, + prompt_detail: str = "full", ) -> str: return build_agent_system_prompt( agent_configuration, settings=settings, workspace_appendix=workspace_appendix, workspace_readiness=workspace_readiness, + prompt_detail="compact" if prompt_detail == "compact" else "full", ) @@ -57,10 +59,10 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: f"Workspace mount: `{settings.workspace_dir}`", ( "Prepared repository files live under " - "`repository-files/<data_source_name>/` relative to the workspace mount " - "(one folder per data source for this session's knowledge graph; folder " - "names are slugified data source names such as `hyperfleet-api`). " - "Use Read, Grep, and Glob tools against those paths." + "`repository-files/<data_source_name>/`. " + "Prebuilt instance generator scripts are in `instance_generators/` " + "(run with Bash: `python3 instance_generators/<script>.py repository-files`). " + "Use Read, Grep, Glob, and Bash against the workspace mount only." ), ] for source in sources[:12]: @@ -382,11 +384,17 @@ async def _stream_with_claude_sdk( except Exception: # noqa: BLE001 workspace_readiness = None + prior_turns = sum( + 1 for entry in message_history if isinstance(entry, dict) and entry.get("role") == "user" + ) + prompt_detail = "full" if prior_turns <= 1 else "compact" + system_prompt = _build_system_prompt( agent_configuration, settings=settings, workspace_appendix=_build_workspace_prompt_appendix(settings), workspace_readiness=workspace_readiness, + prompt_detail=prompt_detail, ) history_lines = [ f"{entry.get('role', 'unknown')}: {entry.get('content', '')}" diff --git a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py index 463cfc5e6..67da5b8a7 100644 --- a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py @@ -8,17 +8,21 @@ from kartograph_agent_runtime.tools import RuntimeTooling -WORKSPACE_FILE_TOOL_NAMES = ("Read", "Grep", "Glob") +WORKSPACE_FILE_TOOL_NAMES = ("Read", "Grep", "Glob", "Bash") KARTOGRAPH_SCHEMA_TOOL_NAMES = ( "kartograph_get_schema_authoring_guide", "kartograph_get_workspace_readiness", "kartograph_get_schema_ontology", "kartograph_save_schema_ontology", + "kartograph_validate_graph_mutations", "kartograph_apply_graph_mutations", + "kartograph_validate_graph_mutations_from_file", + "kartograph_apply_graph_mutations_from_file", "kartograph_list_instances_by_type", "kartograph_list_relationship_instances", "kartograph_search_graph_by_slug", + "kartograph_check_graph_slugs", ) GMA_ALLOWED_TOOL_NAMES = KARTOGRAPH_SCHEMA_TOOL_NAMES + WORKSPACE_FILE_TOOL_NAMES @@ -98,9 +102,31 @@ async def save_schema_ontology(args: dict[str, Any]) -> dict[str, Any]: "is_error": True, } + @tool( + "kartograph_validate_graph_mutations", + "Dry-run: validate JSONL mutations without writing (strict CREATE — no duplicate types/instances).", + {"jsonl": str}, + ) + async def validate_graph_mutations(args: dict[str, Any]) -> dict[str, Any]: + jsonl = str(args.get("jsonl") or "").strip() + if not jsonl: + return { + "content": [{"type": "text", "text": "jsonl must not be empty."}], + "is_error": True, + } + try: + return RuntimeTooling.format_tool_result( + await tooling.validate_graph_mutations(jsonl=jsonl), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to validate mutations: {exc}"}], + "is_error": True, + } + @tool( "kartograph_apply_graph_mutations", - "Apply JSONL mutation lines to create/update/delete entity or relationship instances.", + "Apply JSONL mutation lines. CREATE fails if type or instance already exists; use UPDATE to edit.", {"jsonl": str}, ) async def apply_graph_mutations(args: dict[str, Any]) -> dict[str, Any]: @@ -120,6 +146,60 @@ async def apply_graph_mutations(args: dict[str, Any]) -> dict[str, Any]: "is_error": True, } + @tool( + "kartograph_validate_graph_mutations_from_file", + "Dry-run validate a .jsonl file under the workspace (path relative to session root).", + {"path": str}, + ) + async def validate_graph_mutations_from_file(args: dict[str, Any]) -> dict[str, Any]: + path = str(args.get("path") or "").strip() + if not path: + return { + "content": [{"type": "text", "text": "path must not be empty."}], + "is_error": True, + } + try: + return RuntimeTooling.format_tool_result( + await tooling.validate_graph_mutations_from_file(path=path), + ) + except ValueError as exc: + return { + "content": [{"type": "text", "text": str(exc)}], + "is_error": True, + } + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to validate file: {exc}"}], + "is_error": True, + } + + @tool( + "kartograph_apply_graph_mutations_from_file", + "Apply a workspace .jsonl file in one call (strict CREATE semantics).", + {"path": str}, + ) + async def apply_graph_mutations_from_file(args: dict[str, Any]) -> dict[str, Any]: + path = str(args.get("path") or "").strip() + if not path: + return { + "content": [{"type": "text", "text": "path must not be empty."}], + "is_error": True, + } + try: + return RuntimeTooling.format_tool_result( + await tooling.apply_graph_mutations_from_file(path=path), + ) + except ValueError as exc: + return { + "content": [{"type": "text", "text": str(exc)}], + "is_error": True, + } + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Failed to apply file: {exc}"}], + "is_error": True, + } + @tool( "kartograph_list_instances_by_type", "List entity instances for one type with pagination (use to verify prepopulation).", @@ -190,6 +270,37 @@ async def list_relationship_instances(args: dict[str, Any]) -> dict[str, Any]: "is_error": True, } + @tool( + "kartograph_check_graph_slugs", + "Check which slugs already exist for one entity type (before bulk CREATE).", + {"entity_type": str, "slugs": list}, + ) + async def check_graph_slugs(args: dict[str, Any]) -> dict[str, Any]: + entity_type = str(args.get("entity_type") or "").strip() + slugs = args.get("slugs") or [] + if not entity_type: + return { + "content": [{"type": "text", "text": "entity_type must not be empty."}], + "is_error": True, + } + if not isinstance(slugs, list) or not slugs: + return { + "content": [{"type": "text", "text": "slugs must be a non-empty list."}], + "is_error": True, + } + try: + return RuntimeTooling.format_tool_result( + await tooling.check_graph_slugs( + entity_type=entity_type, + slugs=[str(slug).strip() for slug in slugs if str(slug).strip()], + ), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [{"type": "text", "text": f"Slug check failed: {exc}"}], + "is_error": True, + } + @tool( "kartograph_search_graph_by_slug", "Search existing graph nodes by slug within the active knowledge graph.", @@ -224,9 +335,13 @@ async def search_graph_by_slug(args: dict[str, Any]) -> dict[str, Any]: get_workspace_readiness, get_schema_ontology, save_schema_ontology, + validate_graph_mutations, apply_graph_mutations, + validate_graph_mutations_from_file, + apply_graph_mutations_from_file, list_instances_by_type, list_relationship_instances, search_graph_by_slug, + check_graph_slugs, ], ) diff --git a/src/agent-runtime/kartograph_agent_runtime/tools.py b/src/agent-runtime/kartograph_agent_runtime/tools.py index 7b8ab8f98..478743dee 100644 --- a/src/agent-runtime/kartograph_agent_runtime/tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/tools.py @@ -53,6 +53,17 @@ async def save_schema_ontology(self, *, ontology: dict[str, Any]) -> dict[str, A async def apply_graph_mutations(self, *, jsonl: str) -> dict[str, Any]: url = f"{self._base_url()}/extraction/workloads/mutations/apply" + async with httpx.AsyncClient(timeout=600.0) as client: + response = await client.post( + url, + headers=self._headers(), + json={"jsonl": jsonl}, + ) + response.raise_for_status() + return response.json() + + async def validate_graph_mutations(self, *, jsonl: str) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/mutations/validate" async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( url, @@ -62,6 +73,19 @@ async def apply_graph_mutations(self, *, jsonl: str) -> dict[str, Any]: response.raise_for_status() return response.json() + def read_jsonl_from_workspace(self, *, relative_path: str) -> str: + from kartograph_agent_runtime.workspace_paths import read_workspace_text_file + + return read_workspace_text_file(self.settings.workspace_dir, relative_path) + + async def apply_graph_mutations_from_file(self, *, path: str) -> dict[str, Any]: + jsonl = self.read_jsonl_from_workspace(relative_path=path) + return await self.apply_graph_mutations(jsonl=jsonl) + + async def validate_graph_mutations_from_file(self, *, path: str) -> dict[str, Any]: + jsonl = self.read_jsonl_from_workspace(relative_path=path) + return await self.validate_graph_mutations(jsonl=jsonl) + async def list_instances_by_type( self, *, @@ -104,6 +128,22 @@ async def list_relationship_instances( response.raise_for_status() return response.json() + async def check_graph_slugs( + self, + *, + entity_type: str, + slugs: list[str], + ) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/graph/check-slugs" + async with httpx.AsyncClient(timeout=60.0) as client: + response = await client.post( + url, + headers=self._headers(), + json={"entity_type": entity_type, "slugs": slugs}, + ) + response.raise_for_status() + return response.json() + async def search_graph_by_slug( self, *, slug: str, entity_type: str | None = None ) -> dict[str, Any]: diff --git a/src/agent-runtime/kartograph_agent_runtime/workspace_paths.py b/src/agent-runtime/kartograph_agent_runtime/workspace_paths.py new file mode 100644 index 000000000..0928f5e32 --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/workspace_paths.py @@ -0,0 +1,21 @@ +"""Safe path resolution under the sticky session workspace mount.""" + +from __future__ import annotations + +from pathlib import Path + + +def resolve_workspace_file(workspace_dir: str, relative_path: str) -> Path: + """Resolve a user-supplied path that must stay inside the workspace root.""" + root = Path(workspace_dir).resolve() + candidate = (root / relative_path.strip()).resolve() + if root != candidate and root not in candidate.parents: + raise ValueError(f"Path must stay within workspace: {relative_path}") + if not candidate.is_file(): + raise ValueError(f"Workspace file not found: {relative_path}") + return candidate + + +def read_workspace_text_file(workspace_dir: str, relative_path: str) -> str: + """Read a text file from the workspace using a relative path.""" + return resolve_workspace_file(workspace_dir, relative_path).read_text(encoding="utf-8") diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index 0415f2d87..b74b734c8 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -31,6 +31,8 @@ def test_build_agent_system_prompt_includes_skills_tools_and_session_scope() -> assert "**schema_modeling**" in prompt assert "kartograph_get_schema_ontology" in prompt assert "Quick workflow" in prompt + assert "Bash" in prompt + assert "instance_generators" in prompt assert "kg-123" in prompt assert "tenant-456" in prompt assert "Files here" in prompt @@ -69,3 +71,23 @@ def test_build_agent_system_prompt_omits_tools_without_workload_token() -> None: assert "Quick workflow" not in prompt assert "Base" in prompt + + +def test_build_agent_system_prompt_compact_omits_skills_and_full_tools_table() -> None: + prompt = build_agent_system_prompt( + { + "system_prompt": "You are the Graph Management Assistant.", + "skills": {"prepopulation": "Run instance_generators with Bash."}, + }, + settings=AgentRuntimeSettings( + KARTOGRAPH_WORKLOAD_TOKEN="token", + KARTOGRAPH_KNOWLEDGE_GRAPH_ID="kg-123", + ), + prompt_detail="compact", + ) + + assert "**prepopulation**" not in prompt + assert "Quick workflow" not in prompt + assert "json_instances_to_jsonl.py" in prompt + assert "validate-from-file" in prompt + assert "apply-from-file" in prompt diff --git a/src/agent-runtime/tests/test_schema_tools.py b/src/agent-runtime/tests/test_schema_tools.py index 8e864f086..8172666be 100644 --- a/src/agent-runtime/tests/test_schema_tools.py +++ b/src/agent-runtime/tests/test_schema_tools.py @@ -15,7 +15,10 @@ def test_schema_tool_names_cover_authoring_surface() -> None: assert "kartograph_get_workspace_readiness" in KARTOGRAPH_SCHEMA_TOOL_NAMES assert "kartograph_get_schema_ontology" in KARTOGRAPH_SCHEMA_TOOL_NAMES assert "kartograph_save_schema_ontology" in KARTOGRAPH_SCHEMA_TOOL_NAMES + assert "kartograph_validate_graph_mutations" in KARTOGRAPH_SCHEMA_TOOL_NAMES assert "kartograph_apply_graph_mutations" in KARTOGRAPH_SCHEMA_TOOL_NAMES + assert "kartograph_apply_graph_mutations_from_file" in KARTOGRAPH_SCHEMA_TOOL_NAMES + assert "kartograph_check_graph_slugs" in KARTOGRAPH_SCHEMA_TOOL_NAMES assert "kartograph_list_instances_by_type" in KARTOGRAPH_SCHEMA_TOOL_NAMES assert "kartograph_list_relationship_instances" in KARTOGRAPH_SCHEMA_TOOL_NAMES @@ -27,6 +30,12 @@ def test_gma_allowed_tools_include_workspace_file_tools() -> None: assert tool_name in GMA_ALLOWED_TOOL_NAMES +def test_gma_allowed_tools_include_bash() -> None: + from kartograph_agent_runtime.schema_tools import GMA_ALLOWED_TOOL_NAMES + + assert "Bash" in GMA_ALLOWED_TOOL_NAMES + + def test_build_kartograph_schema_mcp_server_registers_tools() -> None: tooling = RuntimeTooling( settings=AgentRuntimeSettings( diff --git a/src/agent-runtime/tests/test_workspace_paths.py b/src/agent-runtime/tests/test_workspace_paths.py new file mode 100644 index 000000000..f92c7291d --- /dev/null +++ b/src/agent-runtime/tests/test_workspace_paths.py @@ -0,0 +1,21 @@ +"""Unit tests for workspace path resolution.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from kartograph_agent_runtime.workspace_paths import read_workspace_text_file, resolve_workspace_file + + +def test_resolve_workspace_file_rejects_path_traversal(tmp_path: Path) -> None: + (tmp_path / "safe.jsonl").write_text("{}\n", encoding="utf-8") + with pytest.raises(ValueError, match="within workspace"): + resolve_workspace_file(str(tmp_path), "../outside.jsonl") + + +def test_read_workspace_text_file_reads_relative_path(tmp_path: Path) -> None: + (tmp_path / "batch.jsonl").write_text('{"op":"CREATE"}\n', encoding="utf-8") + content = read_workspace_text_file(str(tmp_path), "batch.jsonl") + assert "CREATE" in content diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index efeeec906..9728ce39e 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -4,7 +4,8 @@ # Kartograph schema authoring (Graph Management Assistant) Use the Kartograph schema tools — never probe undocumented HTTP routes. -Use Read, Grep, and Glob against the session workspace mount to scan prepared repository files. +Use Read, Grep, Glob, and Bash against the session workspace mount. Prebuilt generator scripts +live under `instance_generators/` (see README there). ## Workflow @@ -12,9 +13,15 @@ 2. Call `kartograph_get_workspace_readiness` to see prepopulated gaps and live instance counts. 3. Call `kartograph_get_schema_ontology` to read the current entity/relationship types. 4. Edit the ontology JSON (full replace) and call `kartograph_save_schema_ontology`. -5. Scan `repository-files/<data_source_name>/` with Read/Grep/Glob to derive instances. -6. Create entity instances in batches via `kartograph_apply_graph_mutations` (JSONL CREATE lines). -7. Verify with `kartograph_list_instances_by_type`, `kartograph_list_relationship_instances`, and `kartograph_search_graph_by_slug`. +5. For prepopulated types at scale: run a script under `instance_generators/` (examples: + `data_source.py`, `folder.py`, `source_file.py`, or your own), then + `python3 instance_generators/json_instances_to_jsonl.py <entity_label> out/instances.json`. +6. After entity nodes exist, convert relationship JSON with + `json_relationships_to_jsonl.py <edge_label> <source_entity> <target_entity> out/relationships.json`. +7. Optional: `kartograph_check_graph_slugs` to batch-check which slugs already exist before CREATE. +8. Dry-run with `kartograph_validate_graph_mutations_from_file`, then apply with + `kartograph_apply_graph_mutations_from_file` (or inline tools for small fixes). +9. Verify with `kartograph_list_instances_by_type` and `kartograph_get_workspace_readiness`. ## Entity type (node type) shape @@ -27,12 +34,14 @@ "required_properties": ["name"], "optional_properties": ["team"], "prepopulated": false, - "prepopulated_instance_count": 0 + "prepopulated_instance_count": 0, + "instance_generator": "source_file.py" } ``` - `label`: lowercase snake_case type name (required). - `prepopulated`: when true, bootstrap transition requires at least one instance. +- `instance_generator`: optional script name under `instance_generators/` (example templates or your own). - Saving replaces the entire ontology — read first, merge your edits, then save. ## Relationship type (edge type) shape @@ -47,11 +56,13 @@ "target_labels": ["api_endpoint"], "properties": [], "prepopulated": true, - "prepopulated_instance_count": 0 + "prepopulated_instance_count": 0, + "instance_generator": "my_edges.py" } ``` - `source_labels` / `target_labels`: allowed node type labels for edge endpoints. +- `instance_generator`: optional script under `instance_generators/` for relationship prepopulation. - `prepopulated`: when true, bootstrap transition requires at least one instance of this relationship type. Every listed source and target entity type must also have `prepopulated: true`. @@ -77,7 +88,10 @@ - CREATE requires `data_source_id` and `source_path` in `set_properties`. - Node CREATE requires `slug` in `set_properties` (kebab-case, unique per type). - `knowledge_graph_id` is stamped by the platform — do not set it. -- Apply in batches of 25–50 CREATE lines; create all entity nodes before relationship edges. +- For large sets: Bash + custom script under `instance_generators/` → JSONL file → apply-from-file tool. +- CREATE is strict: existing types/instances must be changed with UPDATE, not CREATE again. +- Dry-run before apply: `kartograph_validate_graph_mutations` or `kartograph_validate_graph_mutations_from_file`. +- Create all entity nodes before relationship edges. - Sort instances deterministically (by slug or path) before emitting CREATE lines. ## Instance generation cookbook diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index de963970b..b093ce459 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -64,67 +64,16 @@ class ResolvedExtractionSkillPack: _GLOBAL_SKILL_TEMPLATES: dict[ExtractionSessionMode, dict[str, str]] = { ExtractionSessionMode.SCHEMA_BOOTSTRAP: { "capabilities_intake": ( - "Begin by asking for user capabilities/goals and confirm whether they " - "want a first-pass schema attempt or guided co-design." + "Ask for goals once, then co-design or propose a first-pass schema." ), - "schema_modeling": ( - "Author entity types (node_types) and relationship types (edge_types) via " - "kartograph_get_schema_ontology → edit → kartograph_save_schema_ontology. " - "Each entity type needs label, description, required_properties, optional " - "prepopulated flag. Each relationship type needs source_labels and " - "target_labels. Call kartograph_get_schema_authoring_guide before the first save." + "schema_workflow": ( + "Call kartograph_get_schema_authoring_guide when you need shapes or mutation rules. " + "Read/save ontology via kartograph_get_schema_ontology and kartograph_save_schema_ontology." ), - "entity_type_authoring": ( - "Create or edit entity types by updating node_types entries: label " - "(lowercase snake_case), description, required_properties, optional_properties, " - "prepopulated, prepopulated_instance_count. Always read the current ontology first." - ), - "relationship_type_authoring": ( - "Create or edit relationship types by updating edge_types entries: label, " - "description, source_labels, target_labels, properties, optional prepopulated " - "and prepopulated_instance_count. When prepopulated is true, every source and " - "target entity type label must already be prepopulated on node_types." - ), - "instance_authoring": ( - "Create entity or relationship instances with kartograph_apply_graph_mutations " - "JSONL CREATE lines after types exist. Nodes require slug, data_source_id, " - "and source_path in set_properties. Use ids like label:16hex. Create entity " - "nodes before edges; batch 25–50 lines per apply call." - ), - "relationship_instance_authoring": ( - "Create relationship instances only after endpoint entity nodes exist. Use " - "kartograph_list_instances_by_type or kartograph_list_relationship_instances " - "to resolve start_id/end_id and source_slug/target_slug pairs. Edge CREATE " - "lines require start_id, end_id, data_source_id, and source_path." - ), - "instance_generation": ( - "For prepopulated types, scan repository-files with Read/Grep/Glob using the " - "data_source, folder, or source_file patterns from the authoring guide. " - "Derive kebab-case slugs from paths, apply CREATE batches, then verify with " - "kartograph_list_instances_by_type and kartograph_get_workspace_readiness." - ), - "prepopulation_validation": ( - "Prioritize prepopulated entity and relationship type coverage. Entity types " - "and relationship types marked prepopulated must have instances before " - "extraction-mode transition. Relationship types may only be prepopulated when " - "their source and target entity types are prepopulated too." - ), - "schema_tools": ( - "Available MCP tools (call by exact name): kartograph_get_schema_authoring_guide, " - "kartograph_get_workspace_readiness, kartograph_get_schema_ontology, " - "kartograph_save_schema_ontology, kartograph_apply_graph_mutations, " - "kartograph_list_instances_by_type, kartograph_list_relationship_instances, " - "kartograph_search_graph_by_slug. " - "Filesystem tools: Read, Grep, Glob (workspace mount only). " - "Always start with get_schema_authoring_guide, then get_workspace_readiness." - ), - "tools_quickstart": ( - "Workflow: (1) kartograph_get_schema_authoring_guide → " - "(2) kartograph_get_workspace_readiness → (3) kartograph_get_schema_ontology " - "→ (4) Read/Grep/Glob repository-files → (5) kartograph_save_schema_ontology " - "for types → (6) kartograph_apply_graph_mutations for instances in batches " - "→ (7) kartograph_list_instances_by_type to verify. " - "Never call /management or /graph HTTP routes." + "prepopulation": ( + "For prepopulated types: set instance_generator on the type when helpful, run script " + "under instance_generators/ with Bash, convert with json_*_to_jsonl helpers, validate " + "then apply-from-file. CREATE cannot duplicate existing instances — use UPDATE to edit." ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { diff --git a/src/api/extraction/infrastructure/instance_generator_templates/README.md b/src/api/extraction/infrastructure/instance_generator_templates/README.md new file mode 100644 index 000000000..da33aefd4 --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/README.md @@ -0,0 +1,50 @@ +# Instance generators (examples) + +These scripts are **starting examples**, not fixed entity types. Copy or author your own +`instance_generators/<your_script>.py` for each prepopulated entity type you define in the ontology. + +## Usage + +From the session workspace root (`/workspace` in the agent container): + +```bash +python3 instance_generators/data_source.py repository-files +python3 instance_generators/folder.py repository-files +python3 instance_generators/source_file.py repository-files +``` + +Bulk pipeline (generator → JSONL → validate → apply): + +```bash +mkdir -p instance_generators/out +python3 instance_generators/source_file.py repository-files \ + > instance_generators/out/source_file.json +python3 instance_generators/json_instances_to_jsonl.py source_file \ + --data-source-id schema-bootstrap \ + --source-path graph-management-assistant \ + instance_generators/out/source_file.json \ + > instance_generators/out/source_file.jsonl +# kartograph_validate_graph_mutations_from_file → kartograph_apply_graph_mutations_from_file +``` + +## Contract + +- **Input:** path to `repository-files/` (one folder per connected data source). +- **Output:** JSON array on stdout: `[{"slug": "...", "properties": {...}}, ...]` +- **Deterministic:** sorted iteration, no timestamps in output. +- **Customize:** copy a template script for your entity type label, adjust property names to match your ontology, then run and convert output to graph CREATE mutations. + +## Templates + +| Script | Use when | +|--------|----------| +| `data_source.py` | One instance per top-level folder under `repository-files/` | +| `folder.py` | Directory hierarchy anchors per data source | +| `source_file.py` | One instance per source file (common code/doc extensions) | +| `json_instances_to_jsonl.py` | Convert any generator JSON array to CREATE JSONL for one entity label | +| `json_relationships_to_jsonl.py` | Convert relationship JSON (`source_slug`/`target_slug`) to edge CREATE JSONL | + +Set `instance_generator` on the entity or relationship type in the ontology (e.g. `"source_file.py"` or +`"my_custom_tests.py"`) to document which script the assistant should run. + +After generating slugs, convert to JSONL, dry-run validate, then apply from file. diff --git a/src/api/extraction/infrastructure/instance_generator_templates/__init__.py b/src/api/extraction/infrastructure/instance_generator_templates/__init__.py new file mode 100644 index 000000000..b4aea78b2 --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/__init__.py @@ -0,0 +1,14 @@ +"""Bundled deterministic instance generator scripts for sticky session workspaces.""" + +from pathlib import Path + +TEMPLATES_DIR = Path(__file__).resolve().parent + +TEMPLATE_SCRIPT_NAMES = ( + "data_source.py", + "folder.py", + "source_file.py", + "json_instances_to_jsonl.py", + "json_relationships_to_jsonl.py", + "README.md", +) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/data_source.py b/src/api/extraction/infrastructure/instance_generator_templates/data_source.py new file mode 100644 index 000000000..2803aee60 --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/data_source.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +"""Generate one entity instance per data-source folder under repository-files.""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + + +def generate_instances(data_dir: Path) -> list[dict]: + instances: list[dict] = [] + for source_dir in sorted(data_dir.iterdir()): + if not source_dir.is_dir() or source_dir.name.startswith("."): + continue + file_count = sum(1 for path in source_dir.rglob("*") if path.is_file()) + instances.append( + { + "slug": source_dir.name, + "properties": { + "name": source_dir.name, + "source_type": "repository", + "file_count": file_count, + }, + } + ) + return instances + + +if __name__ == "__main__": + root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") + print(json.dumps(generate_instances(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/folder.py b/src/api/extraction/infrastructure/instance_generator_templates/folder.py new file mode 100644 index 000000000..b576a3c56 --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/folder.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""Generate folder instances from directory structure under repository-files.""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + + +def _folder_instance(folder: Path, data_dir: Path, source_name: str, *, is_root: bool) -> dict: + rel_path = folder.relative_to(data_dir) + if is_root: + slug = f"root-{source_name}" + else: + slug = f"folder-{str(rel_path).replace('/', '-').replace('_', '-').lower()}" + child_folders = sum( + 1 for entry in folder.iterdir() if entry.is_dir() and not entry.name.startswith(".") + ) + child_files = sum( + 1 for entry in folder.iterdir() if entry.is_file() and not entry.name.startswith(".") + ) + return { + "slug": slug, + "properties": { + "folder_path": str(rel_path), + "data_source": source_name, + "child_folder_count": child_folders, + "child_file_count": child_files, + }, + } + + +def generate_instances(data_dir: Path) -> list[dict]: + instances: list[dict] = [] + for source_dir in sorted(data_dir.iterdir()): + if not source_dir.is_dir() or source_dir.name.startswith("."): + continue + source_name = source_dir.name + instances.append(_folder_instance(source_dir, data_dir, source_name, is_root=True)) + for subdir in sorted(source_dir.rglob("*")): + if subdir.is_dir() and not any(part.startswith(".") for part in subdir.parts): + instances.append(_folder_instance(subdir, data_dir, source_name, is_root=False)) + return instances + + +if __name__ == "__main__": + root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") + print(json.dumps(generate_instances(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/json_instances_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/json_instances_to_jsonl.py new file mode 100644 index 000000000..0e37cee8b --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/json_instances_to_jsonl.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +"""Convert generator JSON output to Kartograph CREATE JSONL (entity nodes). + +Reads a JSON array from a file or stdin: + + [{"slug": "my-entity", "properties": {"name": "My Entity", ...}}, ...] + +Writes one CREATE line per instance, sorted by slug. Node ids are deterministic from +entity label + slug (SHA256, same algorithm as the platform EntityIdGenerator with +an empty tenant scope unless --tenant-id is passed). + +Example: + + python3 instance_generators/source_file.py repository-files \\ + > instance_generators/out/files.json + + python3 instance_generators/json_instances_to_jsonl.py source_file \\ + --data-source-id schema-bootstrap \\ + --source-path graph-management-assistant \\ + instance_generators/out/files.json \\ + > instance_generators/out/files.jsonl + + # Then validate and apply via Kartograph schema tools (from-file). +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import sys +from pathlib import Path +from typing import Any + + +def deterministic_node_id(*, entity_label: str, slug: str, tenant_id: str = "") -> str: + normalized_type = entity_label.strip().lower() + combined = f"{tenant_id}:{normalized_type}:{slug.strip()}" + digest = hashlib.sha256(combined.encode()).hexdigest()[:16] + return f"{normalized_type}:{digest}" + + +def instance_to_create_line( + *, + entity_label: str, + slug: str, + properties: dict[str, Any], + data_source_id: str, + source_path: str, + tenant_id: str, +) -> dict[str, Any]: + set_properties = dict(properties) + set_properties.setdefault("slug", slug) + set_properties.setdefault("name", slug) + set_properties["data_source_id"] = data_source_id + set_properties["source_path"] = source_path + return { + "op": "CREATE", + "type": "node", + "id": deterministic_node_id( + entity_label=entity_label, + slug=slug, + tenant_id=tenant_id, + ), + "label": entity_label.strip().lower(), + "set_properties": set_properties, + } + + +def load_instances(payload: Any) -> list[dict[str, Any]]: + if not isinstance(payload, list): + raise ValueError("Generator output must be a JSON array") + instances: list[dict[str, Any]] = [] + for index, row in enumerate(payload): + if not isinstance(row, dict): + raise ValueError(f"Instance at index {index} must be an object") + slug = row.get("slug") + if not slug or not str(slug).strip(): + raise ValueError(f"Instance at index {index} is missing slug") + properties = row.get("properties") or {} + if not isinstance(properties, dict): + raise ValueError(f"Instance at index {index} properties must be an object") + instances.append({"slug": str(slug).strip(), "properties": properties}) + return sorted(instances, key=lambda item: item["slug"]) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Convert generator JSON array to Kartograph node CREATE JSONL.", + ) + parser.add_argument( + "entity_label", + help="Entity type label in the ontology (e.g. source_file, folder).", + ) + parser.add_argument( + "input", + nargs="?", + help="Path to JSON file; omit to read stdin.", + ) + parser.add_argument( + "--tenant-id", + default="", + help="Tenant id for deterministic node ids (optional).", + ) + parser.add_argument( + "--data-source-id", + default="schema-bootstrap", + help="data_source_id stamped on each CREATE line.", + ) + parser.add_argument( + "--source-path", + default="graph-management-assistant", + help="source_path stamped on each CREATE line.", + ) + args = parser.parse_args() + + if args.input: + raw = Path(args.input).read_text(encoding="utf-8") + else: + raw = sys.stdin.read() + + instances = load_instances(json.loads(raw)) + for row in instances: + line = instance_to_create_line( + entity_label=args.entity_label, + slug=row["slug"], + properties=row["properties"], + data_source_id=args.data_source_id, + source_path=args.source_path, + tenant_id=args.tenant_id, + ) + sys.stdout.write(json.dumps(line, separators=(",", ":")) + "\n") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/json_relationships_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/json_relationships_to_jsonl.py new file mode 100644 index 000000000..8eaf26d0f --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/json_relationships_to_jsonl.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +"""Convert relationship generator JSON to Kartograph edge CREATE JSONL. + +Input JSON array: + + [ + { + "source_slug": "entity-a", + "target_slug": "entity-b", + "properties": {"confidence": 0.9} + } + ] + +Endpoint node ids are derived deterministically from source/target entity type labels +and slugs (same hashing rules as the platform). Run after entity nodes exist. + +Example: + + python3 instance_generators/json_relationships_to_jsonl.py depends_on service service \\ + instance_generators/out/depends_on.json \\ + > instance_generators/out/depends_on.jsonl +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import sys +from pathlib import Path +from typing import Any + + +def deterministic_node_id(*, entity_label: str, slug: str, tenant_id: str = "") -> str: + normalized_type = entity_label.strip().lower() + combined = f"{tenant_id}:{normalized_type}:{slug.strip()}" + digest = hashlib.sha256(combined.encode()).hexdigest()[:16] + return f"{normalized_type}:{digest}" + + +def deterministic_edge_id( + *, + relationship_label: str, + start_id: str, + end_id: str, + tenant_id: str = "", +) -> str: + normalized_label = relationship_label.strip().lower() + combined = f"{tenant_id}:{start_id.strip()}:{normalized_label}:{end_id.strip()}" + digest = hashlib.sha256(combined.encode()).hexdigest()[:16] + return f"{normalized_label}:{digest}" + + +def relationship_to_create_line( + *, + relationship_label: str, + source_entity_type: str, + target_entity_type: str, + source_slug: str, + target_slug: str, + properties: dict[str, Any], + data_source_id: str, + source_path: str, + tenant_id: str, +) -> dict[str, Any]: + start_id = deterministic_node_id( + entity_label=source_entity_type, + slug=source_slug, + tenant_id=tenant_id, + ) + end_id = deterministic_node_id( + entity_label=target_entity_type, + slug=target_slug, + tenant_id=tenant_id, + ) + set_properties = dict(properties) + set_properties["data_source_id"] = data_source_id + set_properties["source_path"] = source_path + return { + "op": "CREATE", + "type": "edge", + "id": deterministic_edge_id( + relationship_label=relationship_label, + start_id=start_id, + end_id=end_id, + tenant_id=tenant_id, + ), + "label": relationship_label.strip().lower(), + "start_id": start_id, + "end_id": end_id, + "set_properties": set_properties, + } + + +def load_relationships(payload: Any) -> list[dict[str, Any]]: + if not isinstance(payload, list): + raise ValueError("Generator output must be a JSON array") + relationships: list[dict[str, Any]] = [] + for index, row in enumerate(payload): + if not isinstance(row, dict): + raise ValueError(f"Relationship at index {index} must be an object") + source_slug = row.get("source_slug") + target_slug = row.get("target_slug") + if not source_slug or not str(source_slug).strip(): + raise ValueError(f"Relationship at index {index} is missing source_slug") + if not target_slug or not str(target_slug).strip(): + raise ValueError(f"Relationship at index {index} is missing target_slug") + properties = row.get("properties") or {} + if not isinstance(properties, dict): + raise ValueError(f"Relationship at index {index} properties must be an object") + relationships.append( + { + "source_slug": str(source_slug).strip(), + "target_slug": str(target_slug).strip(), + "properties": properties, + } + ) + return sorted( + relationships, + key=lambda item: (item["source_slug"], item["target_slug"]), + ) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Convert relationship generator JSON to Kartograph edge CREATE JSONL.", + ) + parser.add_argument("relationship_label", help="Relationship type label in the ontology.") + parser.add_argument("source_entity_type", help="Source endpoint entity type label.") + parser.add_argument("target_entity_type", help="Target endpoint entity type label.") + parser.add_argument("input", nargs="?", help="Path to JSON file; omit to read stdin.") + parser.add_argument("--tenant-id", default="", help="Tenant id for deterministic ids.") + parser.add_argument("--data-source-id", default="schema-bootstrap") + parser.add_argument("--source-path", default="graph-management-assistant") + args = parser.parse_args() + + raw = Path(args.input).read_text(encoding="utf-8") if args.input else sys.stdin.read() + relationships = load_relationships(json.loads(raw)) + for row in relationships: + line = relationship_to_create_line( + relationship_label=args.relationship_label, + source_entity_type=args.source_entity_type, + target_entity_type=args.target_entity_type, + source_slug=row["source_slug"], + target_slug=row["target_slug"], + properties=row["properties"], + data_source_id=args.data_source_id, + source_path=args.source_path, + tenant_id=args.tenant_id, + ) + sys.stdout.write(json.dumps(line, separators=(",", ":")) + "\n") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/source_file.py b/src/api/extraction/infrastructure/instance_generator_templates/source_file.py new file mode 100644 index 000000000..34b845d11 --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/source_file.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +"""Generate one entity instance per source file under repository-files.""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +FILE_EXTENSIONS = ( + ".md", + ".go", + ".py", + ".yaml", + ".yml", + ".json", + ".ts", + ".tsx", + ".js", + ".java", + ".rs", + ".rb", + ".sh", +) + + +def _path_to_slug(rel_path: Path) -> str: + return str(rel_path).replace("/", "-").replace("_", "-").replace(".", "-").lower() + + +def generate_instances(data_dir: Path) -> list[dict]: + instances: list[dict] = [] + for source_dir in sorted(data_dir.iterdir()): + if not source_dir.is_dir() or source_dir.name.startswith("."): + continue + for file_path in sorted(source_dir.rglob("*")): + if not file_path.is_file(): + continue + if file_path.suffix.lower() not in FILE_EXTENSIONS: + continue + if any(part.startswith(".") for part in file_path.parts): + continue + rel_path = file_path.relative_to(data_dir) + instances.append( + { + "slug": _path_to_slug(rel_path), + "properties": { + "file_path": str(rel_path), + "name": file_path.name, + "source_path": str(rel_path), + }, + } + ) + return instances + + +if __name__ == "__main__": + root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") + print(json.dumps(generate_instances(root), indent=2)) diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index 237efa724..73097db26 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -8,6 +8,10 @@ import zipfile from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from extraction.infrastructure.instance_generator_templates import ( + TEMPLATES_DIR, + TEMPLATE_SCRIPT_NAMES, +) from shared_kernel.job_package.path_safety import validate_zip_entry_name from shared_kernel.job_package.reader import JobPackageReader from shared_kernel.job_package.value_objects import JobPackageId @@ -92,6 +96,7 @@ def prepare( marker = session_root / "knowledge-graph-id" marker.write_text(knowledge_graph_id, encoding="utf-8") + self._materialize_instance_generators(session_root) self._write_workspace_index( session_root=session_root, knowledge_graph_id=knowledge_graph_id, @@ -99,6 +104,16 @@ def prepare( ) return session_root + @staticmethod + def _materialize_instance_generators(session_root: Path) -> None: + """Copy bundled generator templates into the session workspace.""" + target_dir = session_root / "instance_generators" + _replace_directory(target_dir) + for name in TEMPLATE_SCRIPT_NAMES: + source = TEMPLATES_DIR / name + if source.is_file(): + shutil.copy2(source, target_dir / name) + @staticmethod def _extension_counts(root: Path) -> dict[str, int]: """Summarize file extensions under one materialized repository folder.""" diff --git a/src/api/extraction/ports/workload_graph.py b/src/api/extraction/ports/workload_graph.py index d24f808c1..64272913e 100644 --- a/src/api/extraction/ports/workload_graph.py +++ b/src/api/extraction/ports/workload_graph.py @@ -92,3 +92,45 @@ async def count_relationship_instances( ) -> int: """Count live relationship instances for one relationship type.""" ... + + async def find_existing_node_ids( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + node_ids: tuple[str, ...], + ) -> frozenset[str]: + """Return node IDs that already exist in the knowledge graph.""" + ... + + async def find_existing_edge_ids( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + edge_ids: tuple[str, ...], + ) -> frozenset[str]: + """Return edge IDs that already exist in the knowledge graph.""" + ... + + async def find_existing_slugs_for_entity_type( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + entity_type: str, + slugs: tuple[str, ...], + ) -> frozenset[str]: + """Return slugs that already exist for one entity type.""" + ... + + async def partition_slugs_by_existence( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + entity_type: str, + slugs: tuple[str, ...], + ) -> tuple[list[str], list[str]]: + """Return (existing_slugs, missing_slugs) sorted for one entity type.""" + ... diff --git a/src/api/extraction/ports/workload_schema.py b/src/api/extraction/ports/workload_schema.py index 10b19a84f..ca32512d3 100644 --- a/src/api/extraction/ports/workload_schema.py +++ b/src/api/extraction/ports/workload_schema.py @@ -21,6 +21,15 @@ async def replace_ontology( ) -> OntologyConfig: """Replace the canonical ontology via DEFINE mutation-log operations.""" + async def validate_mutation_jsonl( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + jsonl: str, + ) -> dict[str, object]: + """Dry-run validation for JSONL mutations without writing to the graph.""" + async def apply_mutation_jsonl( self, *, diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index b60dae24e..b65e582f1 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -61,11 +61,26 @@ class WorkloadMutationApplyRequest(BaseModel): jsonl: str = Field(min_length=1) +class WorkloadMutationValidateRequest(BaseModel): + """JSONL mutation batch to validate without applying.""" + + jsonl: str = Field(min_length=1) + + class WorkloadMutationApplyResponse(BaseModel): """Result of applying a JSONL mutation batch.""" applied: bool errors: list[str] = Field(default_factory=list) + operations_applied: int = 0 + + +class WorkloadMutationValidateResponse(BaseModel): + """Dry-run validation result for a JSONL mutation batch.""" + + valid: bool + errors: list[str] = Field(default_factory=list) + operation_count: int = 0 class WorkloadSchemaAuthoringGuideResponse(BaseModel): @@ -189,6 +204,28 @@ async def workload_save_schema_ontology( ) +@router.post( + "/mutations/validate", + response_model=WorkloadMutationValidateResponse, +) +async def workload_validate_mutations( + request: WorkloadMutationValidateRequest, + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., +) -> WorkloadMutationValidateResponse: + _require_chat_scope(auth) + result = await schema_service.validate_mutation_jsonl( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + jsonl=request.jsonl, + ) + return WorkloadMutationValidateResponse( + valid=bool(result.get("valid")), + errors=[str(item) for item in result.get("errors", [])], + operation_count=int(result.get("operation_count", 0)), + ) + + @router.post( "/mutations/apply", response_model=WorkloadMutationApplyResponse, @@ -207,6 +244,48 @@ async def workload_apply_mutations( return WorkloadMutationApplyResponse( applied=bool(result.get("applied")), errors=[str(item) for item in result.get("errors", [])], + operations_applied=int(result.get("operations_applied", 0)), + ) + + +class WorkloadCheckSlugsRequest(BaseModel): + """Batch slug existence check for one entity type.""" + + entity_type: str = Field(min_length=1) + slugs: list[str] = Field(min_length=1) + + +class WorkloadCheckSlugsResponse(BaseModel): + """Partition of requested slugs into existing and missing.""" + + entity_type: str + existing_slugs: list[str] = Field(default_factory=list) + missing_slugs: list[str] = Field(default_factory=list) + + +@router.post( + "/graph/check-slugs", + response_model=WorkloadCheckSlugsResponse, +) +async def workload_check_slugs( + request: WorkloadCheckSlugsRequest, + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + reader: Annotated[IWorkloadGraphReader, Depends(get_workload_graph_reader)] = ..., +) -> WorkloadCheckSlugsResponse: + _require_chat_scope(auth) + normalized = tuple( + sorted({str(slug).strip() for slug in request.slugs if str(slug).strip()}) + ) + existing, missing = await reader.partition_slugs_by_existence( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + entity_type=request.entity_type.strip(), + slugs=normalized, + ) + return WorkloadCheckSlugsResponse( + entity_type=request.entity_type.strip(), + existing_slugs=existing, + missing_slugs=missing, ) diff --git a/src/api/graph/infrastructure/graph_repository.py b/src/api/graph/infrastructure/graph_repository.py index e5802f3d7..eb8bb1bbf 100644 --- a/src/api/graph/infrastructure/graph_repository.py +++ b/src/api/graph/infrastructure/graph_repository.py @@ -13,6 +13,7 @@ from age.models import Edge as AgeEdge # type: ignore from age.models import Vertex as AgeVertex +from graph.infrastructure.age_bulk_loading.utils import validate_label_name from graph.ports.repositories import IGraphReadOnlyRepository from graph.domain.value_objects import EdgeRecord, NodeRecord, QueryResultRow from graph.ports.protocols import GraphClientProtocol, NodeNeighborsResult @@ -23,6 +24,10 @@ pass +def _escape_cypher_string(value: str) -> str: + return value.replace("\\", "\\\\").replace("'", "\\'") + + class GraphExtractionReadOnlyRepository(IGraphReadOnlyRepository): """Read-only repository for the Extraction bounded context. @@ -169,6 +174,84 @@ def find_nodes_by_label( nodes.append(self._vertex_to_node_record(result_map["node"])) return nodes + def find_existing_node_ids( + self, + node_ids: list[str], + *, + knowledge_graph_id: str, + chunk_size: int = 200, + ) -> set[str]: + """Return node IDs from ``node_ids`` that already exist in the knowledge graph.""" + if not node_ids: + return set() + existing: set[str] = set() + for offset in range(0, len(node_ids), chunk_size): + chunk = node_ids[offset : offset + chunk_size] + literals = ", ".join(f"'{_escape_cypher_string(node_id)}'" for node_id in chunk) + query = f""" + MATCH (n {{graph_id: '{self._graph_id}', knowledge_graph_id: '{_escape_cypher_string(knowledge_graph_id)}'}}) + WHERE n.id IN [{literals}] + RETURN n.id AS id + """ + result = self._client.execute_cypher(query) + for row in result.rows: + if row and row[0] is not None: + existing.add(str(row[0])) + return existing + + def find_existing_edge_ids( + self, + edge_ids: list[str], + *, + knowledge_graph_id: str, + chunk_size: int = 200, + ) -> set[str]: + """Return edge IDs from ``edge_ids`` that already exist in the knowledge graph.""" + if not edge_ids: + return set() + existing: set[str] = set() + for offset in range(0, len(edge_ids), chunk_size): + chunk = edge_ids[offset : offset + chunk_size] + literals = ", ".join(f"'{_escape_cypher_string(edge_id)}'" for edge_id in chunk) + query = f""" + MATCH ()-[r {{graph_id: '{self._graph_id}', knowledge_graph_id: '{_escape_cypher_string(knowledge_graph_id)}'}}]->() + WHERE r.id IN [{literals}] + RETURN r.id AS id + """ + result = self._client.execute_cypher(query) + for row in result.rows: + if row and row[0] is not None: + existing.add(str(row[0])) + return existing + + def find_existing_slugs_for_entity_type( + self, + entity_type: str, + slugs: list[str], + *, + knowledge_graph_id: str, + chunk_size: int = 200, + ) -> set[str]: + """Return slugs that already exist for one entity type within a knowledge graph.""" + if not slugs: + return set() + validate_label_name(entity_type) + existing: set[str] = set() + kg = _escape_cypher_string(knowledge_graph_id) + for offset in range(0, len(slugs), chunk_size): + chunk = slugs[offset : offset + chunk_size] + literals = ", ".join(f"'{_escape_cypher_string(slug)}'" for slug in chunk) + query = f""" + MATCH (n:{entity_type} {{graph_id: '{self._graph_id}', knowledge_graph_id: '{kg}'}}) + WHERE n.slug IN [{literals}] + RETURN n.slug AS slug + """ + result = self._client.execute_cypher(query) + for row in result.rows: + if row and row[0] is not None: + existing.add(str(row[0])) + return existing + def count_nodes_by_label( self, node_type: str, diff --git a/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py b/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py index 4d797ee47..592697249 100644 --- a/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py +++ b/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py @@ -41,18 +41,24 @@ def ontology_config_to_define_operations( def node_type_metadata(node_type) -> dict: """Serialize node-type authoring metadata for canonical storage.""" - return { + metadata = { "prepopulated": node_type.prepopulated, "prepopulated_instance_count": node_type.prepopulated_instance_count, } + if node_type.instance_generator: + metadata["instance_generator"] = node_type.instance_generator + return metadata def edge_type_metadata(edge_type) -> dict: """Serialize edge-type authoring metadata for canonical storage.""" - return { + metadata = { "source_labels": list(edge_type.source_labels), "target_labels": list(edge_type.target_labels), "properties": list(edge_type.properties), "prepopulated": edge_type.prepopulated, "prepopulated_instance_count": edge_type.prepopulated_instance_count, } + if edge_type.instance_generator: + metadata["instance_generator"] = edge_type.instance_generator + return metadata diff --git a/src/api/infrastructure/canonical_schema/ontology_projection.py b/src/api/infrastructure/canonical_schema/ontology_projection.py index 7f00c5e39..52879022d 100644 --- a/src/api/infrastructure/canonical_schema/ontology_projection.py +++ b/src/api/infrastructure/canonical_schema/ontology_projection.py @@ -12,6 +12,13 @@ ) +def _optional_metadata_str(value: object) -> str | None: + if value is None: + return None + text = str(value).strip() + return text or None + + def stored_definitions_to_ontology_config( stored_definitions: list[StoredKnowledgeGraphTypeDefinition], ) -> OntologyConfig: @@ -31,6 +38,9 @@ def stored_definitions_to_ontology_config( prepopulated_instance_count=int( stored.metadata.get("prepopulated_instance_count", 0) ), + instance_generator=_optional_metadata_str( + stored.metadata.get("instance_generator") + ), ) ) elif stored.entity_type == "edge": @@ -45,6 +55,9 @@ def stored_definitions_to_ontology_config( prepopulated_instance_count=int( stored.metadata.get("prepopulated_instance_count", 0) ), + instance_generator=_optional_metadata_str( + stored.metadata.get("instance_generator") + ), ) ) diff --git a/src/api/infrastructure/extraction_workload/graph_reader.py b/src/api/infrastructure/extraction_workload/graph_reader.py index 791c6af02..2f15c81eb 100644 --- a/src/api/infrastructure/extraction_workload/graph_reader.py +++ b/src/api/infrastructure/extraction_workload/graph_reader.py @@ -2,6 +2,8 @@ from __future__ import annotations +import asyncio + from graph.application.observability import DefaultGraphServiceProbe from graph.application.services import GraphQueryService from graph.infrastructure.age_client import AgeGraphClient @@ -213,3 +215,118 @@ async def count_relationship_instances( ) finally: client.disconnect() + + async def find_existing_node_ids( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + node_ids: tuple[str, ...], + ) -> frozenset[str]: + if not node_ids: + return frozenset() + graph_name = f"tenant_{tenant_id}" + + def _query() -> set[str]: + factory = ConnectionFactory(self._settings, pool=self._pool) + client = AgeGraphClient( + self._settings, connection_factory=factory, graph_name=graph_name + ) + client.connect() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=graph_name, + ) + return repository.find_existing_node_ids( + list(node_ids), + knowledge_graph_id=knowledge_graph_id, + ) + finally: + client.disconnect() + + return frozenset(await asyncio.to_thread(_query)) + + async def find_existing_edge_ids( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + edge_ids: tuple[str, ...], + ) -> frozenset[str]: + if not edge_ids: + return frozenset() + graph_name = f"tenant_{tenant_id}" + + def _query() -> set[str]: + factory = ConnectionFactory(self._settings, pool=self._pool) + client = AgeGraphClient( + self._settings, connection_factory=factory, graph_name=graph_name + ) + client.connect() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=graph_name, + ) + return repository.find_existing_edge_ids( + list(edge_ids), + knowledge_graph_id=knowledge_graph_id, + ) + finally: + client.disconnect() + + return frozenset(await asyncio.to_thread(_query)) + + async def find_existing_slugs_for_entity_type( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + entity_type: str, + slugs: tuple[str, ...], + ) -> frozenset[str]: + if not slugs: + return frozenset() + graph_name = f"tenant_{tenant_id}" + + def _query() -> set[str]: + factory = ConnectionFactory(self._settings, pool=self._pool) + client = AgeGraphClient( + self._settings, connection_factory=factory, graph_name=graph_name + ) + client.connect() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=graph_name, + ) + return repository.find_existing_slugs_for_entity_type( + entity_type, + list(slugs), + knowledge_graph_id=knowledge_graph_id, + ) + finally: + client.disconnect() + + return frozenset(await asyncio.to_thread(_query)) + + async def partition_slugs_by_existence( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + entity_type: str, + slugs: tuple[str, ...], + ) -> tuple[list[str], list[str]]: + if not slugs: + return [], [] + existing = await self.find_existing_slugs_for_entity_type( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + entity_type=entity_type, + slugs=slugs, + ) + existing_sorted = sorted(existing) + missing_sorted = sorted(slug for slug in slugs if slug not in existing) + return existing_sorted, missing_sorted diff --git a/src/api/infrastructure/extraction_workload/mutation_preflight.py b/src/api/infrastructure/extraction_workload/mutation_preflight.py new file mode 100644 index 000000000..2687cb39f --- /dev/null +++ b/src/api/infrastructure/extraction_workload/mutation_preflight.py @@ -0,0 +1,124 @@ +"""Pre-apply validation for workload JSONL mutations (strict CREATE semantics).""" + +from __future__ import annotations + +from graph.domain.value_objects import EntityType, MutationOperation, MutationOperationType +from management.ports.exceptions import CanonicalSchemaMutationError + +from extraction.ports.workload_graph import IWorkloadGraphReader + + +def parse_mutation_jsonl(jsonl_content: str) -> list[MutationOperation]: + from infrastructure.extraction_workload.graph_mutation_writer import ( + GraphWorkloadGraphMutationWriter, + ) + + return GraphWorkloadGraphMutationWriter.parse_jsonl(jsonl_content) + + +async def validate_mutation_jsonl( + *, + jsonl_content: str, + tenant_id: str, + knowledge_graph_id: str, + graph_reader: IWorkloadGraphReader | None, + existing_type_keys: frozenset[tuple[str, str]], +) -> list[str]: + """Return validation errors; empty list means the batch may be applied.""" + try: + operations = parse_mutation_jsonl(jsonl_content) + except CanonicalSchemaMutationError as exc: + return [str(exc)] + + errors: list[str] = [] + seen_create_ids: dict[str, int] = {} + + create_node_ids: list[str] = [] + create_edge_ids: list[str] = [] + slug_checks: dict[str, set[str]] = {} + + for line_num, operation in enumerate(operations, start=1): + if operation.op == MutationOperationType.DEFINE and operation.label: + key = (operation.label, operation.type) + if key in existing_type_keys: + errors.append( + f"Line {line_num}: DEFINE for {operation.type} `{operation.label}` " + "already exists; update the ontology via kartograph_save_schema_ontology " + "instead of DEFINE." + ) + + if operation.op == MutationOperationType.CREATE and operation.id: + if operation.id in seen_create_ids: + errors.append( + f"Line {line_num}: duplicate CREATE id `{operation.id}` " + f"(first seen on line {seen_create_ids[operation.id]})." + ) + else: + seen_create_ids[operation.id] = line_num + + if operation.type == EntityType.NODE.value: + create_node_ids.append(operation.id) + slug = (operation.set_properties or {}).get("slug") + label = operation.label + if slug and label: + slug_checks.setdefault(label, set()).add(str(slug)) + elif operation.type == EntityType.EDGE.value: + create_edge_ids.append(operation.id) + + if graph_reader is not None and not errors: + if create_node_ids: + existing_node_ids = await graph_reader.find_existing_node_ids( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + node_ids=tuple(create_node_ids), + ) + for line_num, operation in enumerate(operations, start=1): + if ( + operation.op == MutationOperationType.CREATE + and operation.type == EntityType.NODE.value + and operation.id in existing_node_ids + ): + errors.append( + f"Line {line_num}: node id `{operation.id}` already exists; " + "use UPDATE to change it." + ) + + if create_edge_ids: + existing_edge_ids = await graph_reader.find_existing_edge_ids( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + edge_ids=tuple(create_edge_ids), + ) + for line_num, operation in enumerate(operations, start=1): + if ( + operation.op == MutationOperationType.CREATE + and operation.type == EntityType.EDGE.value + and operation.id in existing_edge_ids + ): + errors.append( + f"Line {line_num}: edge id `{operation.id}` already exists; " + "use UPDATE to change it." + ) + + for label, slugs in slug_checks.items(): + existing_slugs = await graph_reader.find_existing_slugs_for_entity_type( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + entity_type=label, + slugs=tuple(slugs), + ) + if not existing_slugs: + continue + for line_num, operation in enumerate(operations, start=1): + if operation.op != MutationOperationType.CREATE: + continue + if operation.type != EntityType.NODE.value or operation.label != label: + continue + slug = str((operation.set_properties or {}).get("slug") or "") + if slug in existing_slugs: + errors.append( + f"Line {line_num}: {label} slug `{slug}` already exists; " + "use UPDATE to change it." + ) + + return errors diff --git a/src/api/infrastructure/extraction_workload/schema_service.py b/src/api/infrastructure/extraction_workload/schema_service.py index 94d3b716b..e73806089 100644 --- a/src/api/infrastructure/extraction_workload/schema_service.py +++ b/src/api/infrastructure/extraction_workload/schema_service.py @@ -12,9 +12,14 @@ from infrastructure.extraction_workload.graph_mutation_writer import ( GraphWorkloadGraphMutationWriter, ) +from infrastructure.extraction_workload.mutation_preflight import ( + parse_mutation_jsonl, + validate_mutation_jsonl, +) from infrastructure.extraction_workload.workspace_readiness import ( sync_prepopulated_instance_counts, ) +from graph.domain.value_objects import EntityType from management.domain.value_objects import OntologyConfig from management.ports.exceptions import CanonicalSchemaMutationError @@ -47,6 +52,42 @@ async def replace_ontology( await self._session.commit() return config + async def _existing_type_keys(self, knowledge_graph_id: str) -> frozenset[tuple[str, str]]: + ontology = await self.get_ontology(knowledge_graph_id=knowledge_graph_id) + if ontology is None: + return frozenset() + keys: set[tuple[str, str]] = set() + for node_type in ontology.node_types: + keys.add((node_type.label, EntityType.NODE.value)) + for edge_type in ontology.edge_types: + keys.add((edge_type.label, EntityType.EDGE.value)) + return frozenset(keys) + + async def validate_mutation_jsonl( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + jsonl: str, + ) -> dict[str, object]: + errors = await validate_mutation_jsonl( + jsonl_content=jsonl, + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + graph_reader=self._graph_reader, + existing_type_keys=await self._existing_type_keys(knowledge_graph_id), + ) + operation_count = 0 + try: + operation_count = len(parse_mutation_jsonl(jsonl)) + except CanonicalSchemaMutationError: + operation_count = 0 + return { + "valid": not errors, + "errors": errors, + "operation_count": operation_count, + } + async def apply_mutation_jsonl( self, *, @@ -54,8 +95,18 @@ async def apply_mutation_jsonl( knowledge_graph_id: str, jsonl: str, ) -> dict[str, object]: + preflight_errors = await validate_mutation_jsonl( + jsonl_content=jsonl, + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + graph_reader=self._graph_reader, + existing_type_keys=await self._existing_type_keys(knowledge_graph_id), + ) + if preflight_errors: + return {"applied": False, "errors": preflight_errors} + try: - operations = GraphWorkloadGraphMutationWriter.parse_jsonl(jsonl) + operations = parse_mutation_jsonl(jsonl) define_ops, instance_ops = GraphWorkloadGraphMutationWriter.split_operations( operations ) diff --git a/src/api/management/domain/value_objects.py b/src/api/management/domain/value_objects.py index 5306f0cc8..caeff03f2 100644 --- a/src/api/management/domain/value_objects.py +++ b/src/api/management/domain/value_objects.py @@ -410,6 +410,7 @@ class NodeTypeDefinition: optional_properties: tuple[str, ...] = field(default_factory=tuple) prepopulated: bool = False prepopulated_instance_count: int = 0 + instance_generator: str | None = None def __post_init__(self) -> None: """Validate that label is non-empty.""" @@ -417,10 +418,12 @@ def __post_init__(self) -> None: raise ValueError("NodeTypeDefinition label must not be empty") if self.prepopulated_instance_count < 0: raise ValueError("prepopulated_instance_count must be >= 0") + if self.instance_generator is not None and not self.instance_generator.strip(): + raise ValueError("instance_generator must not be empty or whitespace-only") def to_dict(self) -> dict[str, Any]: """Serialize to a plain dict suitable for JSON persistence.""" - return { + payload = { "label": self.label, "description": self.description, "required_properties": list(self.required_properties), @@ -428,10 +431,15 @@ def to_dict(self) -> dict[str, Any]: "prepopulated": self.prepopulated, "prepopulated_instance_count": self.prepopulated_instance_count, } + if self.instance_generator: + payload["instance_generator"] = self.instance_generator + return payload @classmethod def from_dict(cls, data: dict[str, Any]) -> NodeTypeDefinition: """Deserialize from a plain dict.""" + raw_generator = data.get("instance_generator") + instance_generator = str(raw_generator).strip() if raw_generator else None return cls( label=data["label"], description=data.get("description", ""), @@ -439,6 +447,7 @@ def from_dict(cls, data: dict[str, Any]) -> NodeTypeDefinition: optional_properties=tuple(data.get("optional_properties", [])), prepopulated=bool(data.get("prepopulated", False)), prepopulated_instance_count=int(data.get("prepopulated_instance_count", 0)), + instance_generator=instance_generator or None, ) @@ -465,6 +474,7 @@ class EdgeTypeDefinition: properties: tuple[str, ...] = field(default_factory=tuple) prepopulated: bool = False prepopulated_instance_count: int = 0 + instance_generator: str | None = None def __post_init__(self) -> None: """Validate that label is non-empty.""" @@ -472,10 +482,12 @@ def __post_init__(self) -> None: raise ValueError("EdgeTypeDefinition label must not be empty") if self.prepopulated_instance_count < 0: raise ValueError("prepopulated_instance_count must be >= 0") + if self.instance_generator is not None and not self.instance_generator.strip(): + raise ValueError("instance_generator must not be empty or whitespace-only") def to_dict(self) -> dict[str, Any]: """Serialize to a plain dict suitable for JSON persistence.""" - return { + payload = { "label": self.label, "description": self.description, "source_labels": list(self.source_labels), @@ -484,10 +496,15 @@ def to_dict(self) -> dict[str, Any]: "prepopulated": self.prepopulated, "prepopulated_instance_count": self.prepopulated_instance_count, } + if self.instance_generator: + payload["instance_generator"] = self.instance_generator + return payload @classmethod def from_dict(cls, data: dict[str, Any]) -> EdgeTypeDefinition: """Deserialize from a plain dict.""" + raw_generator = data.get("instance_generator") + instance_generator = str(raw_generator).strip() if raw_generator else None return cls( label=data["label"], description=data.get("description", ""), @@ -496,6 +513,7 @@ def from_dict(cls, data: dict[str, Any]) -> EdgeTypeDefinition: properties=tuple(data.get("properties", [])), prepopulated=bool(data.get("prepopulated", False)), prepopulated_instance_count=int(data.get("prepopulated_instance_count", 0)), + instance_generator=instance_generator or None, ) diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index 91fb735a6..b0a15b935 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -271,6 +271,10 @@ class NodeTypeDefinitionModel(BaseModel): ge=0, description="Current known instance count used for readiness evaluation", ) + instance_generator: str | None = Field( + default=None, + description="Optional workspace-relative script under instance_generators/ for prepopulation", + ) def to_domain(self) -> NodeTypeDefinition: """Convert to domain NodeTypeDefinition value object.""" @@ -281,6 +285,7 @@ def to_domain(self) -> NodeTypeDefinition: optional_properties=tuple(self.optional_properties), prepopulated=self.prepopulated, prepopulated_instance_count=self.prepopulated_instance_count, + instance_generator=self.instance_generator, ) @classmethod @@ -293,6 +298,7 @@ def from_domain(cls, nt: NodeTypeDefinition) -> NodeTypeDefinitionModel: optional_properties=list(nt.optional_properties), prepopulated=nt.prepopulated, prepopulated_instance_count=nt.prepopulated_instance_count, + instance_generator=nt.instance_generator, ) @@ -325,6 +331,10 @@ class EdgeTypeDefinitionModel(BaseModel): ge=0, description="Current known instance count used for readiness evaluation", ) + instance_generator: str | None = Field( + default=None, + description="Optional workspace-relative script under instance_generators/ for prepopulation", + ) def to_domain(self) -> EdgeTypeDefinition: """Convert to domain EdgeTypeDefinition value object.""" @@ -336,6 +346,7 @@ def to_domain(self) -> EdgeTypeDefinition: properties=tuple(self.properties), prepopulated=self.prepopulated, prepopulated_instance_count=self.prepopulated_instance_count, + instance_generator=self.instance_generator, ) @classmethod @@ -349,6 +360,7 @@ def from_domain(cls, et: EdgeTypeDefinition) -> EdgeTypeDefinitionModel: properties=list(et.properties), prepopulated=et.prepopulated, prepopulated_instance_count=et.prepopulated_instance_count, + instance_generator=et.instance_generator, ) diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 39990c3df..5e5b79643 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -34,17 +34,13 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, ) - assert "schema_modeling" in resolved.skills - assert "entity_type_authoring" in resolved.skills - assert "relationship_type_authoring" in resolved.skills - assert "instance_authoring" in resolved.skills - assert "instance_generation" in resolved.skills - assert "relationship_instance_authoring" in resolved.skills - assert "schema_tools" in resolved.skills - assert "kartograph_get_schema_ontology" in resolved.skills["schema_tools"] - assert "kartograph_get_workspace_readiness" in resolved.skills["schema_tools"] - assert "Read" in resolved.skills["schema_tools"] - assert "prepopulation_validation" in resolved.skills + assert set(resolved.skills.keys()) >= { + "capabilities_intake", + "schema_workflow", + "prepopulation", + } + assert "instance_generators" in resolved.skills["prepopulation"] + assert "kartograph_get_schema_authoring_guide" in resolved.skills["schema_workflow"] assert "capabilities_intake" in resolved.skills assert "goal" in resolved.system_prompt.lower() assert len(resolved.prompt_hierarchy) > 0 diff --git a/src/api/tests/unit/extraction/infrastructure/test_json_instances_to_jsonl.py b/src/api/tests/unit/extraction/infrastructure/test_json_instances_to_jsonl.py new file mode 100644 index 000000000..7a18376db --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_json_instances_to_jsonl.py @@ -0,0 +1,71 @@ +"""Unit tests for the json_instances_to_jsonl helper script.""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +SCRIPT = ( + Path(__file__).resolve().parents[4] + / "extraction/infrastructure/instance_generator_templates/json_instances_to_jsonl.py" +) + + +def test_json_instances_to_jsonl_emits_sorted_create_lines(tmp_path: Path) -> None: + instances_path = tmp_path / "instances.json" + instances_path.write_text( + json.dumps( + [ + {"slug": "b-entity", "properties": {"name": "B"}}, + {"slug": "a-entity", "properties": {"name": "A", "file_path": "pkg/a.go"}}, + ] + ), + encoding="utf-8", + ) + output_path = tmp_path / "out.jsonl" + + proc = subprocess.run( + [ + sys.executable, + str(SCRIPT), + "source_file", + "--data-source-id", + "schema-bootstrap", + "--source-path", + "graph-management-assistant", + str(instances_path), + ], + check=True, + capture_output=True, + text=True, + ) + output_path.write_text(proc.stdout, encoding="utf-8") + + lines = [line for line in proc.stdout.splitlines() if line.strip()] + assert len(lines) == 2 + + first = json.loads(lines[0]) + second = json.loads(lines[1]) + assert first["set_properties"]["slug"] == "a-entity" + assert second["set_properties"]["slug"] == "b-entity" + assert first["op"] == "CREATE" + assert first["type"] == "node" + assert first["label"] == "source_file" + assert first["set_properties"]["data_source_id"] == "schema-bootstrap" + assert first["set_properties"]["source_path"] == "graph-management-assistant" + assert first["id"] == second["id"] or first["set_properties"]["slug"] != second["set_properties"]["slug"] + + rerun = subprocess.run( + [ + sys.executable, + str(SCRIPT), + "source_file", + str(instances_path), + ], + check=True, + capture_output=True, + text=True, + ) + assert rerun.stdout == proc.stdout diff --git a/src/api/tests/unit/extraction/infrastructure/test_json_relationships_to_jsonl.py b/src/api/tests/unit/extraction/infrastructure/test_json_relationships_to_jsonl.py new file mode 100644 index 000000000..3ed9413ac --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_json_relationships_to_jsonl.py @@ -0,0 +1,48 @@ +"""Unit tests for json_relationships_to_jsonl helper.""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +SCRIPT = ( + Path(__file__).resolve().parents[4] + / "extraction/infrastructure/instance_generator_templates/json_relationships_to_jsonl.py" +) + + +def test_json_relationships_to_jsonl_emits_edge_create_lines(tmp_path: Path) -> None: + input_path = tmp_path / "relationships.json" + input_path.write_text( + json.dumps( + [ + { + "source_slug": "service-b", + "target_slug": "service-a", + "properties": {"weight": 1}, + } + ] + ), + encoding="utf-8", + ) + proc = subprocess.run( + [ + sys.executable, + str(SCRIPT), + "depends_on", + "service", + "service", + str(input_path), + ], + check=True, + capture_output=True, + text=True, + ) + line = json.loads(proc.stdout.strip()) + assert line["op"] == "CREATE" + assert line["type"] == "edge" + assert line["label"] == "depends_on" + assert line["start_id"].startswith("service:") + assert line["end_id"].startswith("service:") diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index 58f814636..01bc280f9 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -154,3 +154,27 @@ def test_materializer_refresh_preserves_session_root_directory(tmp_path: Path) - assert first_root == second_root assert (second_root / "repository-files" / "hyperfleet-api" / "pkg/api/example.go").exists() + + +def test_materializer_copies_instance_generator_templates(tmp_path: Path) -> None: + materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) + + session_root = materializer.prepare( + session_id="session-generators", + knowledge_graph_id="kg-1", + job_packages=(), + ) + + generators_dir = session_root / "instance_generators" + assert generators_dir.is_dir() + for name in ( + "data_source.py", + "folder.py", + "source_file.py", + "json_instances_to_jsonl.py", + "json_relationships_to_jsonl.py", + "README.md", + ): + assert (generators_dir / name).is_file() + readme = (generators_dir / "README.md").read_text(encoding="utf-8") + assert "repository-files" in readme diff --git a/src/api/tests/unit/extraction/presentation/test_workload_routes.py b/src/api/tests/unit/extraction/presentation/test_workload_routes.py index e5ac7c897..58a60f9d1 100644 --- a/src/api/tests/unit/extraction/presentation/test_workload_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_workload_routes.py @@ -34,6 +34,15 @@ async def replace_ontology( self.saved = config return config + async def validate_mutation_jsonl( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + jsonl: str, + ) -> dict[str, object]: + return {"valid": True, "errors": [], "operation_count": 1} + async def apply_mutation_jsonl( self, *, @@ -42,7 +51,7 @@ async def apply_mutation_jsonl( jsonl: str, ) -> dict[str, object]: self.applied_jsonl = jsonl - return {"applied": True, "errors": []} + return {"applied": True, "errors": [], "operations_applied": 1} class _FakeGraphReader: @@ -92,6 +101,21 @@ async def count_relationship_instances(self, **kwargs): return 1 return 0 + async def find_existing_node_ids(self, **kwargs): + return frozenset() + + async def find_existing_edge_ids(self, **kwargs): + return frozenset() + + async def find_existing_slugs_for_entity_type(self, **kwargs): + return frozenset({"api-gateway"}) + + async def partition_slugs_by_existence(self, **kwargs): + slugs = tuple(kwargs.get("slugs") or ()) + existing = sorted(slug for slug in slugs if slug == "api-gateway") + missing = sorted(slug for slug in slugs if slug != "api-gateway") + return existing, missing + @pytest.fixture def workload_client() -> tuple[TestClient, _FakeSchemaService, str]: @@ -224,6 +248,32 @@ def test_workload_save_schema_ontology(workload_client: tuple[TestClient, _FakeS assert fake.saved.edge_types[0].label == "depends_on" +def test_workload_check_graph_slugs(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, _fake, token = workload_client + response = client.post( + "/extraction/workloads/graph/check-slugs", + headers={"X-Workload-Token": token}, + json={"entity_type": "service", "slugs": ["api-gateway", "new-service"]}, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["existing_slugs"] == ["api-gateway"] + assert payload["missing_slugs"] == ["new-service"] + + +def test_workload_validate_graph_mutations(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, _fake, token = workload_client + response = client.post( + "/extraction/workloads/mutations/validate", + headers={"X-Workload-Token": token}, + json={"jsonl": '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service","set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap","source_path":"assistant"}}'}, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["valid"] is True + assert payload["operation_count"] == 1 + + def test_workload_apply_graph_mutations(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: client, fake, token = workload_client response = client.post( diff --git a/src/api/tests/unit/graph/test_repository_protocol.py b/src/api/tests/unit/graph/test_repository_protocol.py index 289e9024d..d196ba8f4 100644 --- a/src/api/tests/unit/graph/test_repository_protocol.py +++ b/src/api/tests/unit/graph/test_repository_protocol.py @@ -14,10 +14,22 @@ def test_protocol_is_runtime_checkable(self): # Create a minimal implementation to verify protocol class MinimalRepo: def find_nodes_by_slug( - self, slug: str, node_type: str | None = None + self, slug: str, node_type: str | None = None, **kwargs ) -> list[NodeRecord]: return [] + def find_nodes_by_label(self, node_type: str, **kwargs) -> list[NodeRecord]: + return [] + + def count_nodes_by_label(self, node_type: str, **kwargs) -> int: + return 0 + + def find_relationship_instances(self, relationship_label: str, **kwargs) -> list: + return [] + + def count_relationship_instances(self, relationship_label: str, **kwargs) -> int: + return 0 + def get_neighbors(self, node_id: str) -> NodeNeighborsResult: return NodeNeighborsResult( central_node=NodeRecord(id=node_id, label="Node", properties={}), @@ -50,10 +62,22 @@ def test_protocol_requires_generate_id(self): class MissingGenerateId: def find_nodes_by_slug( - self, slug: str, node_type: str | None = None + self, slug: str, node_type: str | None = None, **kwargs ) -> list[NodeRecord]: return [] + def find_nodes_by_label(self, node_type: str, **kwargs) -> list[NodeRecord]: + return [] + + def count_nodes_by_label(self, node_type: str, **kwargs) -> int: + return 0 + + def find_relationship_instances(self, relationship_label: str, **kwargs) -> list: + return [] + + def count_relationship_instances(self, relationship_label: str, **kwargs) -> int: + return 0 + def get_neighbors(self, node_id: str) -> NodeNeighborsResult: return NodeNeighborsResult( central_node=NodeRecord(id=node_id, label="Node", properties={}), diff --git a/src/api/tests/unit/infrastructure/canonical_schema/test_ontology_projection.py b/src/api/tests/unit/infrastructure/canonical_schema/test_ontology_projection.py new file mode 100644 index 000000000..5ed96004a --- /dev/null +++ b/src/api/tests/unit/infrastructure/canonical_schema/test_ontology_projection.py @@ -0,0 +1,40 @@ +"""Unit tests for canonical schema ontology projection.""" + +from __future__ import annotations + +from graph.infrastructure.postgres_kg_type_definition_store import ( + StoredKnowledgeGraphTypeDefinition, +) +from infrastructure.canonical_schema.ontology_projection import ( + stored_definitions_to_ontology_config, +) +from management.domain.value_objects import NodeTypeDefinition + + +def test_stored_definitions_restore_instance_generator_metadata() -> None: + config = stored_definitions_to_ontology_config( + [ + StoredKnowledgeGraphTypeDefinition( + label="service", + entity_type="node", + description="Service", + required_properties=("name",), + optional_properties=(), + metadata={ + "prepopulated": True, + "prepopulated_instance_count": 0, + "instance_generator": "my_service.py", + }, + ) + ] + ) + + assert config.node_types == ( + NodeTypeDefinition( + label="service", + description="Service", + required_properties=("name",), + prepopulated=True, + instance_generator="my_service.py", + ), + ) diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_mutation_preflight.py b/src/api/tests/unit/infrastructure/extraction_workload/test_mutation_preflight.py new file mode 100644 index 000000000..e13d48f88 --- /dev/null +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_mutation_preflight.py @@ -0,0 +1,55 @@ +"""Unit tests for workload mutation preflight validation.""" + +from __future__ import annotations + +import pytest + +from graph.domain.value_objects import EntityType, MutationOperationType +from infrastructure.extraction_workload.mutation_preflight import validate_mutation_jsonl + + +class _FakeGraphReader: + def __init__(self, *, existing_node_ids: frozenset[str] = frozenset()) -> None: + self._existing_node_ids = existing_node_ids + + async def find_existing_node_ids(self, **kwargs) -> frozenset[str]: + return self._existing_node_ids + + async def find_existing_edge_ids(self, **kwargs) -> frozenset[str]: + return frozenset() + + async def find_existing_slugs_for_entity_type(self, **kwargs) -> frozenset[str]: + return frozenset() + + +@pytest.mark.asyncio +async def test_validate_rejects_define_for_existing_type() -> None: + jsonl = ( + '{"op":"DEFINE","type":"node","label":"service","description":"x",' + '"required_properties":["name"]}' + ) + errors = await validate_mutation_jsonl( + jsonl_content=jsonl, + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + graph_reader=None, + existing_type_keys=frozenset({("service", EntityType.NODE.value)}), + ) + assert any("DEFINE" in error for error in errors) + + +@pytest.mark.asyncio +async def test_validate_rejects_create_for_existing_node_id() -> None: + jsonl = ( + '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service",' + '"set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap","source_path":"assistant"}}' + ) + reader = _FakeGraphReader(existing_node_ids=frozenset({"service:0123456789abcdef"})) + errors = await validate_mutation_jsonl( + jsonl_content=jsonl, + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + graph_reader=reader, + existing_type_keys=frozenset(), + ) + assert any("already exists" in error and "UPDATE" in error for error in errors) diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_schema_service.py b/src/api/tests/unit/infrastructure/extraction_workload/test_schema_service.py index acf6b0593..80eeb2c25 100644 --- a/src/api/tests/unit/infrastructure/extraction_workload/test_schema_service.py +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_schema_service.py @@ -21,6 +21,7 @@ async def test_apply_mutation_jsonl_routes_instance_ops_to_graph_writer() -> Non ) service = GraphWorkloadSchemaService(session=session, mutation_writer=mutation_writer) service._repository = MagicMock() + service._repository.get_ontology = AsyncMock(return_value=None) service._repository.apply_mutation_log = AsyncMock() jsonl = ( @@ -42,6 +43,43 @@ async def test_apply_mutation_jsonl_routes_instance_ops_to_graph_writer() -> Non assert await_args.kwargs["operations"][0].op == MutationOperationType.CREATE +@pytest.mark.asyncio +async def test_apply_mutation_jsonl_rejects_duplicate_create_when_reader_reports_existing() -> None: + session = MagicMock() + session.commit = AsyncMock() + session.rollback = AsyncMock() + mutation_writer = MagicMock() + mutation_writer.apply_instance_operations = AsyncMock() + graph_reader = MagicMock() + graph_reader.find_existing_node_ids = AsyncMock( + return_value=frozenset({"service:0123456789abcdef"}) + ) + graph_reader.find_existing_edge_ids = AsyncMock(return_value=frozenset()) + graph_reader.find_existing_slugs_for_entity_type = AsyncMock(return_value=frozenset()) + + service = GraphWorkloadSchemaService( + session=session, + mutation_writer=mutation_writer, + graph_reader=graph_reader, + ) + service._repository = MagicMock() + service._repository.get_ontology = AsyncMock(return_value=None) + + jsonl = ( + '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service",' + '"set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap","source_path":"assistant"}}' + ) + result = await service.apply_mutation_jsonl( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + jsonl=jsonl, + ) + + assert result["applied"] is False + assert result["errors"] + mutation_writer.apply_instance_operations.assert_not_called() + + @pytest.mark.asyncio async def test_apply_mutation_jsonl_routes_define_ops_to_canonical_repo() -> None: session = MagicMock() @@ -51,6 +89,7 @@ async def test_apply_mutation_jsonl_routes_define_ops_to_canonical_repo() -> Non mutation_writer.apply_instance_operations = AsyncMock() service = GraphWorkloadSchemaService(session=session, mutation_writer=mutation_writer) service._repository = MagicMock() + service._repository.get_ontology = AsyncMock(return_value=None) service._repository.apply_mutation_log = AsyncMock() jsonl = ( From 1ee4415eb203029f39b2547b3c43b26874413a6a Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Thu, 4 Jun 2026 23:08:06 -0400 Subject: [PATCH 101/153] feat(graph): bidirectional relationship pairs with twin edge instances (#764) * feat(graph): add bidirectional relationship pairs with twin edge instances Auto-generate inverse relationship types on ontology save, expand primary edge CREATE mutations into twin inverse CREATEs, validate instance parity in workspace readiness, and surface reverse labels in design artifacts. Closes #763 Co-authored-by: Cursor <cursoragent@cursor.com> * docs(spec): link schema authoring to bidirectional relationship pairing Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Cursor <cursoragent@cursor.com> --- .../graph/bidirectional-relationships.spec.md | 158 +++++++++++ specs/graph/schema-authoring.spec.md | 13 + specs/index.spec.md | 1 + .../application/schema_authoring_guide.py | 8 +- .../application/skill_resolution_service.py | 4 +- .../presentation/workload_routes.py | 6 +- .../graph_canonical_schema_repository.py | 8 + .../ontology_mutation_builder.py | 10 + .../canonical_schema/ontology_projection.py | 7 + .../extraction_workload/mutation_preflight.py | 38 ++- .../extraction_workload/schema_service.py | 25 +- .../twin_edge_expansion.py | 27 ++ .../workspace_readiness.py | 46 +++ .../application/design_artifacts.py | 18 +- .../management/domain/relationship_pairing.py | 267 ++++++++++++++++++ src/api/management/domain/value_objects.py | 28 ++ .../presentation/knowledge_graphs/models.py | 24 ++ .../test_twin_edge_expansion.py | 45 +++ .../test_design_artifacts_pairing.py | 34 +++ .../domain/test_relationship_pairing.py | 179 ++++++++++++ 20 files changed, 930 insertions(+), 16 deletions(-) create mode 100644 specs/graph/bidirectional-relationships.spec.md create mode 100644 src/api/infrastructure/extraction_workload/twin_edge_expansion.py create mode 100644 src/api/management/domain/relationship_pairing.py create mode 100644 src/api/tests/unit/infrastructure/extraction_workload/test_twin_edge_expansion.py create mode 100644 src/api/tests/unit/management/application/test_design_artifacts_pairing.py create mode 100644 src/api/tests/unit/management/domain/test_relationship_pairing.py diff --git a/specs/graph/bidirectional-relationships.spec.md b/specs/graph/bidirectional-relationships.spec.md new file mode 100644 index 000000000..4e63881d9 --- /dev/null +++ b/specs/graph/bidirectional-relationships.spec.md @@ -0,0 +1,158 @@ +# Bidirectional Relationships + +## Purpose +Relationship types in Kartograph are directed edges. Many bootstrap and query use cases need traversal from either endpoint without debating arrow direction at authoring time. This spec defines **paired relationship types** (primary + inverse) and **twin edge instances** so that every bidirectional relationship is materialized as two explicit graph edges with distinct labels, validated for parity, and visible in schema design artifacts. + +This complements schema authoring: the Graph Management Assistant authors the primary direction; the platform materializes the inverse type and twin instances by default. + +## Design principles + +- **Explicit over implicit:** Twin edges are separate mutation log lines and separate AGE edges — auditable and idempotent. +- **Opt-out, not opt-in:** New relationship types default to bidirectional pairing. Causal or asymmetric relationships (e.g. `depends_on`, `created_by`) set `bidirectional: false`. +- **Distinct inverse labels:** Primary and inverse use different edge labels (e.g. `contains` / `contained_in`), not the same label reversed. Semantics and UI already assume this (`reverse_relationship_type` in design artifacts). +- **No hyperedge shortcut:** Pairing is always between two node types with one primary direction declared in ontology. + +## Requirements + +### Requirement: Bidirectional pairing metadata on relationship types +The system SHALL store bidirectional pairing metadata on canonical relationship type definitions. + +#### Scenario: Default bidirectional on new relationship type +- GIVEN a new relationship type `Repository → contains → Test` is added to the ontology +- AND `bidirectional` is omitted +- WHEN the ontology is saved +- THEN the relationship type is stored with `bidirectional=true` +- AND an inverse relationship type is created or linked: `Test → contained_in → Repository` (inverse label derived or explicit) +- AND design artifacts expose `reverse_relationship_type` and `reverse_relationship_description` for the primary row + +#### Scenario: Opt out of bidirectional pairing +- GIVEN a relationship type `Service → depends_on → Service` with `bidirectional=false` +- WHEN the ontology is saved +- THEN no inverse relationship type is auto-generated +- AND instance twin validation does not apply to that label + +#### Scenario: Explicit inverse label +- GIVEN a primary relationship type with `bidirectional=true` and `inverse_label="housed_in"` +- WHEN the ontology is saved +- THEN the inverse type uses label `housed_in` with swapped `source_labels` and `target_labels` +- AND metadata links `inverse_of` on the inverse type back to the primary label + +### Requirement: Inverse type materialization on ontology save +The system SHALL ensure every bidirectional primary relationship type has a corresponding inverse type definition before instances are created. + +#### Scenario: Auto-generate missing inverse type +- GIVEN ontology save includes `repository|contains|test` as a bidirectional primary +- AND no inverse type exists yet +- WHEN save completes +- THEN canonical schema includes `test|contained_in|repository` (or explicit `inverse_label`) +- AND both types share pairing metadata (`bidirectional_pair_key`) + +#### Scenario: Reject invalid inverse pairing +- GIVEN a relationship type references `inverse_label` that already exists with incompatible endpoints +- WHEN ontology save is attempted +- THEN validation fails with a clear error + +### Requirement: Twin edge instances on CREATE +The system SHALL create paired edge instances for bidirectional relationship types when a primary edge instance is created. + +#### Scenario: Primary CREATE expands to twin CREATE +- GIVEN bidirectional relationship `contains` from Repository node R to Test node T +- WHEN a CREATE edge mutation is applied for `R -[contains]-> T` +- THEN the mutation batch also CREATEs `T -[contained_in]-> R` in the same atomic apply +- AND both edges receive distinct deterministic ids +- AND inverse edge properties copy non-directional fields from the primary; directional fields may be omitted on the inverse + +#### Scenario: Bulk JSONL primary-only input +- GIVEN a JSONL file with only primary-direction edge CREATE lines for bidirectional types +- WHEN mutations are validated or applied via workload tools +- THEN the preflight/expansion layer adds inverse CREATE lines before apply +- AND validate reports the expanded operation count + +#### Scenario: Idempotent re-apply +- GIVEN twin edges for pair (R, T) already exist +- WHEN the same primary CREATE is submitted again under strict CREATE semantics +- THEN validation rejects the duplicate primary CREATE +- AND no orphan inverse edge is created + +### Requirement: Twin instance validation +The system SHALL validate that bidirectional relationship instances exist in pairs. + +#### Scenario: Readiness reports missing inverse instance +- GIVEN a bidirectional primary edge instance exists without its inverse twin +- WHEN workspace readiness or design artifacts are evaluated +- THEN a blocking or warning reason identifies the orphan primary edge (source slug, target slug, label) +- AND transition eligibility may be blocked when strict pairing mode is enabled for bootstrap + +#### Scenario: Balanced pairing passes validation +- GIVEN every primary `contains` edge has a matching `contained_in` edge between the same node ids (reversed) +- WHEN twin validation runs +- THEN no pairing defects are reported + +### Requirement: Authoring guidance +The system SHALL instruct the Graph Management Assistant to author primary direction only for bidirectional types. + +#### Scenario: GMA authors one direction +- GIVEN schema bootstrap with bidirectional relationship types +- WHEN the assistant plans prepopulation +- THEN it emits generator output for the primary label only +- AND relies on platform twin expansion for inverse instances +- AND does not ask the user to confirm arrow direction when `bidirectional=true` unless `bidirectional=false` is set + +## Data model (canonical type metadata) + +Primary relationship type (`edge`, entity_type=edge): + +| Field | Default | Meaning | +|-------|---------|---------| +| `bidirectional` | `true` | Whether twin inverse type + instances are required | +| `inverse_label` | derived | Label of inverse edge type; default `{primary}_inverse` or linguistic map | +| `bidirectional_pair_key` | derived | Stable key `source\|primary\|target` linking primary and inverse rows | + +Inverse relationship type (auto-generated): + +| Field | Meaning | +|-------|---------| +| `inverse_of` | Primary label this type mirrors | +| `bidirectional` | `true` | +| `auto_generated` | `true` — hide from GMA authoring prompts or show as read-only twin | + +## Inverse label derivation (default) + +When `inverse_label` is not provided and `bidirectional=true`: + +1. Use a small built-in map for common verbs (`contains` → `contained_in`, `defines` → `defined_by`, `implements` → `implemented_by`). +2. Otherwise default to `{primary_label}_inverse` (snake_case). + +Authors MAY override `inverse_label` in ontology JSON. + +## Write path summary + +``` +Ontology save (Management → canonical schema) + → pairing expander adds/updates inverse type definitions + +Edge CREATE (Graph / Extraction workload) + → twin expander adds inverse CREATE to batch + → mutation applier executes both in one transaction + +Readiness (Management / Extraction) + → twin validator checks primary/inverse instance parity +``` + +## Read path summary + +- **Design artifacts:** populate `reverse_relationship_type` from pairing metadata (UI already renders it). +- **Relationship listing:** workload list tools may group primary + inverse counts or report twin balance. +- **Queries:** agents traverse using the label appropriate to start node type; both directions always exist when bidirectional. + +## Out of scope (initial tracer) + +- Automatic linguistic inference beyond the small verb map. +- Symmetric edges with the **same** label in both directions (conflicts with distinct-semantics principle). +- Retroactive twin backfill job for graphs authored before this feature (separate migration spec). +- Graph query MCP auto-expanding undirected traversals (clients use explicit labels). + +## Migration notes + +- Existing ontologies without pairing metadata: treat as `bidirectional=false` until re-saved or migrated. +- Existing orphan edge instances: report in readiness; optional backfill command in a follow-up. diff --git a/specs/graph/schema-authoring.spec.md b/specs/graph/schema-authoring.spec.md index 9aba27707..06d1ba4ce 100644 --- a/specs/graph/schema-authoring.spec.md +++ b/specs/graph/schema-authoring.spec.md @@ -93,3 +93,16 @@ The system SHALL support bulk instance authoring for the Graph Management Assist - THEN example generator scripts and JSONL converter helpers are present - AND the assistant may add custom generator scripts alongside them +### Requirement: Bidirectional Relationship Pairing +The system SHALL default new relationship types to bidirectional pairing. See [Bidirectional Relationships](bidirectional-relationships.spec.md). + +#### Scenario: Ontology save creates inverse type +- GIVEN a primary relationship type with `bidirectional=true` +- WHEN the ontology is saved +- THEN the inverse relationship type is stored with swapped endpoints + +#### Scenario: Primary edge CREATE expands to twin +- GIVEN a bidirectional relationship type exists in the ontology +- WHEN a primary-direction edge CREATE mutation is applied via workload tools +- THEN an inverse edge CREATE is applied in the same batch + diff --git a/specs/index.spec.md b/specs/index.spec.md index cee23c82a..44dcce3c9 100644 --- a/specs/index.spec.md +++ b/specs/index.spec.md @@ -30,6 +30,7 @@ The persistence and query engine for property graph data. | [Queries](graph/queries.spec.md) | Reading nodes, edges, and subgraphs | | [Schema](graph/schema.spec.md) | Type definitions and schema management | | [Schema Authoring](graph/schema-authoring.spec.md) | Bootstrap and ongoing schema authoring lifecycle | +| [Bidirectional Relationships](graph/bidirectional-relationships.spec.md) | Paired inverse relationship types and twin edge instances | | [Bulk Loading](graph/bulk-loading.spec.md) | High-throughput graph ingestion | ### [Management](management/) — Control Plane diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 9728ce39e..315e88638 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -57,10 +57,16 @@ "properties": [], "prepopulated": true, "prepopulated_instance_count": 0, - "instance_generator": "my_edges.py" + "instance_generator": "my_edges.py", + "bidirectional": true, + "inverse_label": "contained_in" } ``` +- `bidirectional`: default `true` for new relationship types — platform auto-creates inverse type and twin edge instances. +- `inverse_label`: optional override; otherwise derived (`contains` → `contained_in`, else `{label}_inverse`). +- Set `bidirectional: false` for asymmetric edges (`depends_on`, `created_by`). +- Author **primary direction only** in generators; inverse instances are created automatically on apply. - `source_labels` / `target_labels`: allowed node type labels for edge endpoints. - `instance_generator`: optional script under `instance_generators/` for relationship prepopulation. - `prepopulated`: when true, bootstrap transition requires at least one instance of this diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index b093ce459..e41cb021c 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -73,7 +73,9 @@ class ResolvedExtractionSkillPack: "prepopulation": ( "For prepopulated types: set instance_generator on the type when helpful, run script " "under instance_generators/ with Bash, convert with json_*_to_jsonl helpers, validate " - "then apply-from-file. CREATE cannot duplicate existing instances — use UPDATE to edit." + "then apply-from-file. CREATE cannot duplicate existing instances — use UPDATE to edit. " + "Bidirectional relationships default on: author primary-direction edges only; platform " + "creates inverse type + twin instances. Set bidirectional=false for asymmetric edges." ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index b65e582f1..4121977af 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -18,7 +18,9 @@ get_workload_schema_service, ) from management.domain.ontology_prepopulation import PrepopulationValidationError +from management.domain.relationship_pairing import ontology_config_from_authoring_payload from management.domain.value_objects import OntologyConfig +from management.ports.exceptions import CanonicalSchemaMutationError router = APIRouter(prefix="/workloads", tags=["extraction-workloads"]) @@ -184,13 +186,13 @@ async def workload_save_schema_ontology( schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., ) -> WorkloadOntologyResponse: _require_chat_scope(auth) - config = OntologyConfig.from_dict(request.model_dump()) + config = ontology_config_from_authoring_payload(request.model_dump()) try: saved = await schema_service.replace_ontology( knowledge_graph_id=auth.knowledge_graph_id, config=config, ) - except PrepopulationValidationError as e: + except (PrepopulationValidationError, CanonicalSchemaMutationError) as e: raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(e), diff --git a/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py b/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py index a688ce5cb..c23c3709d 100644 --- a/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py +++ b/src/api/infrastructure/canonical_schema/graph_canonical_schema_repository.py @@ -24,6 +24,10 @@ stored_definitions_to_ontology_config, ) from management.domain.ontology_prepopulation import validate_ontology_prepopulation +from management.domain.relationship_pairing import ( + RelationshipPairingError, + expand_ontology_bidirectional_pairs, +) from management.domain.value_objects import OntologyConfig from management.ports.canonical_schema import ICanonicalSchemaRepository from management.ports.exceptions import CanonicalSchemaMutationError @@ -49,6 +53,10 @@ async def get_ontology(self, kg_id: str) -> OntologyConfig | None: return stored_definitions_to_ontology_config(rows) async def replace_ontology(self, kg_id: str, config: OntologyConfig) -> None: + try: + config = expand_ontology_bidirectional_pairs(config) + except RelationshipPairingError as exc: + raise CanonicalSchemaMutationError(str(exc)) from exc validate_ontology_prepopulation(config) await self._store.delete_all_for_kg(kg_id) await self._apply_operations( diff --git a/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py b/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py index 592697249..f9a154c08 100644 --- a/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py +++ b/src/api/infrastructure/canonical_schema/ontology_mutation_builder.py @@ -61,4 +61,14 @@ def edge_type_metadata(edge_type) -> dict: } if edge_type.instance_generator: metadata["instance_generator"] = edge_type.instance_generator + if edge_type.bidirectional: + metadata["bidirectional"] = True + if edge_type.inverse_label: + metadata["inverse_label"] = edge_type.inverse_label + if edge_type.inverse_of: + metadata["inverse_of"] = edge_type.inverse_of + if edge_type.auto_generated: + metadata["auto_generated"] = True + if edge_type.bidirectional_pair_key: + metadata["bidirectional_pair_key"] = edge_type.bidirectional_pair_key return metadata diff --git a/src/api/infrastructure/canonical_schema/ontology_projection.py b/src/api/infrastructure/canonical_schema/ontology_projection.py index 52879022d..41026bb0e 100644 --- a/src/api/infrastructure/canonical_schema/ontology_projection.py +++ b/src/api/infrastructure/canonical_schema/ontology_projection.py @@ -58,6 +58,13 @@ def stored_definitions_to_ontology_config( instance_generator=_optional_metadata_str( stored.metadata.get("instance_generator") ), + bidirectional=bool(stored.metadata.get("bidirectional", False)), + inverse_label=_optional_metadata_str(stored.metadata.get("inverse_label")), + inverse_of=_optional_metadata_str(stored.metadata.get("inverse_of")), + auto_generated=bool(stored.metadata.get("auto_generated", False)), + bidirectional_pair_key=_optional_metadata_str( + stored.metadata.get("bidirectional_pair_key") + ), ) ) diff --git a/src/api/infrastructure/extraction_workload/mutation_preflight.py b/src/api/infrastructure/extraction_workload/mutation_preflight.py index 2687cb39f..1295a1d16 100644 --- a/src/api/infrastructure/extraction_workload/mutation_preflight.py +++ b/src/api/infrastructure/extraction_workload/mutation_preflight.py @@ -6,6 +6,10 @@ from management.ports.exceptions import CanonicalSchemaMutationError from extraction.ports.workload_graph import IWorkloadGraphReader +from infrastructure.extraction_workload.twin_edge_expansion import ( + expand_twin_edge_mutation_operations, +) +from management.domain.value_objects import OntologyConfig def parse_mutation_jsonl(jsonl_content: str) -> list[MutationOperation]: @@ -16,6 +20,27 @@ def parse_mutation_jsonl(jsonl_content: str) -> list[MutationOperation]: return GraphWorkloadGraphMutationWriter.parse_jsonl(jsonl_content) +async def prepare_mutation_operations( + *, + jsonl_content: str, + tenant_id: str, + ontology: OntologyConfig | None, +) -> tuple[list[MutationOperation] | None, list[str]]: + """Parse JSONL and expand bidirectional twin edge CREATE operations.""" + try: + operations = parse_mutation_jsonl(jsonl_content) + except CanonicalSchemaMutationError as exc: + return None, [str(exc)] + + if ontology is not None: + operations = expand_twin_edge_mutation_operations( + operations, + ontology=ontology, + tenant_id=tenant_id, + ) + return operations, [] + + async def validate_mutation_jsonl( *, jsonl_content: str, @@ -23,12 +48,17 @@ async def validate_mutation_jsonl( knowledge_graph_id: str, graph_reader: IWorkloadGraphReader | None, existing_type_keys: frozenset[tuple[str, str]], + ontology: OntologyConfig | None = None, ) -> list[str]: """Return validation errors; empty list means the batch may be applied.""" - try: - operations = parse_mutation_jsonl(jsonl_content) - except CanonicalSchemaMutationError as exc: - return [str(exc)] + operations, errors = await prepare_mutation_operations( + jsonl_content=jsonl_content, + tenant_id=tenant_id, + ontology=ontology, + ) + if errors: + return errors + assert operations is not None errors: list[str] = [] seen_create_ids: dict[str, int] = {} diff --git a/src/api/infrastructure/extraction_workload/schema_service.py b/src/api/infrastructure/extraction_workload/schema_service.py index e73806089..442a8dfe9 100644 --- a/src/api/infrastructure/extraction_workload/schema_service.py +++ b/src/api/infrastructure/extraction_workload/schema_service.py @@ -14,6 +14,7 @@ ) from infrastructure.extraction_workload.mutation_preflight import ( parse_mutation_jsonl, + prepare_mutation_operations, validate_mutation_jsonl, ) from infrastructure.extraction_workload.workspace_readiness import ( @@ -70,18 +71,23 @@ async def validate_mutation_jsonl( knowledge_graph_id: str, jsonl: str, ) -> dict[str, object]: + ontology = await self.get_ontology(knowledge_graph_id=knowledge_graph_id) errors = await validate_mutation_jsonl( jsonl_content=jsonl, tenant_id=tenant_id, knowledge_graph_id=knowledge_graph_id, graph_reader=self._graph_reader, existing_type_keys=await self._existing_type_keys(knowledge_graph_id), + ontology=ontology, ) operation_count = 0 - try: - operation_count = len(parse_mutation_jsonl(jsonl)) - except CanonicalSchemaMutationError: - operation_count = 0 + expanded_ops, prep_errors = await prepare_mutation_operations( + jsonl_content=jsonl, + tenant_id=tenant_id, + ontology=ontology, + ) + if not prep_errors and expanded_ops is not None: + operation_count = len(expanded_ops) return { "valid": not errors, "errors": errors, @@ -95,18 +101,27 @@ async def apply_mutation_jsonl( knowledge_graph_id: str, jsonl: str, ) -> dict[str, object]: + ontology = await self.get_ontology(knowledge_graph_id=knowledge_graph_id) preflight_errors = await validate_mutation_jsonl( jsonl_content=jsonl, tenant_id=tenant_id, knowledge_graph_id=knowledge_graph_id, graph_reader=self._graph_reader, existing_type_keys=await self._existing_type_keys(knowledge_graph_id), + ontology=ontology, ) if preflight_errors: return {"applied": False, "errors": preflight_errors} + operations, prep_errors = await prepare_mutation_operations( + jsonl_content=jsonl, + tenant_id=tenant_id, + ontology=ontology, + ) + if prep_errors: + return {"applied": False, "errors": prep_errors} + assert operations is not None try: - operations = parse_mutation_jsonl(jsonl) define_ops, instance_ops = GraphWorkloadGraphMutationWriter.split_operations( operations ) diff --git a/src/api/infrastructure/extraction_workload/twin_edge_expansion.py b/src/api/infrastructure/extraction_workload/twin_edge_expansion.py new file mode 100644 index 000000000..00cad20e5 --- /dev/null +++ b/src/api/infrastructure/extraction_workload/twin_edge_expansion.py @@ -0,0 +1,27 @@ +"""Expand workload mutation operations with bidirectional twin edge CREATE lines.""" + +from __future__ import annotations + +from graph.domain.value_objects import MutationOperation +from management.domain.relationship_pairing import expand_twin_edge_creates +from management.domain.value_objects import OntologyConfig + + +def expand_twin_edge_mutation_operations( + operations: list[MutationOperation], + *, + ontology: OntologyConfig, + tenant_id: str, +) -> list[MutationOperation]: + """Append inverse edge CREATE MutationOperations for bidirectional types.""" + dict_rows = [ + operation.model_dump(mode="json", exclude_none=True) for operation in operations + ] + expanded_rows = expand_twin_edge_creates( + dict_rows, + ontology=ontology, + tenant_id=tenant_id, + ) + if len(expanded_rows) == len(operations): + return operations + return [MutationOperation.model_validate(row) for row in expanded_rows] diff --git a/src/api/infrastructure/extraction_workload/workspace_readiness.py b/src/api/infrastructure/extraction_workload/workspace_readiness.py index 734355b39..d36ae3d78 100644 --- a/src/api/infrastructure/extraction_workload/workspace_readiness.py +++ b/src/api/infrastructure/extraction_workload/workspace_readiness.py @@ -9,6 +9,11 @@ prepopulated_gaps_from_live_counts, ) from management.domain.ontology_prepopulation import relationship_readiness_key +from management.domain.relationship_pairing import ( + bidirectional_pair_key, + resolve_inverse_label_for_primary, + twin_validation_errors, +) from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig @@ -99,6 +104,47 @@ async def build_workload_readiness_snapshot( + ", ".join(live_relationship_gaps) ) + if ontology is not None and graph_reader is not None: + bidirectional_counts: dict[str, int] = {} + for edge_type in ontology.edge_types: + if edge_type.auto_generated or edge_type.inverse_of or not edge_type.bidirectional: + continue + if not edge_type.source_labels or not edge_type.target_labels: + continue + source_label = edge_type.source_labels[0] + target_label = edge_type.target_labels[0] + primary_key = bidirectional_pair_key( + source_label=source_label, + relationship_label=edge_type.label, + target_label=target_label, + ) + inverse_label = resolve_inverse_label_for_primary(edge_type) + inverse_key = bidirectional_pair_key( + source_label=target_label, + relationship_label=inverse_label, + target_label=source_label, + ) + bidirectional_counts[primary_key] = await graph_reader.count_relationship_instances( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + relationship_type=edge_type.label, + source_entity_type=source_label, + target_entity_type=target_label, + ) + bidirectional_counts[inverse_key] = await graph_reader.count_relationship_instances( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + relationship_type=inverse_label, + source_entity_type=target_label, + target_entity_type=source_label, + ) + blocking_reasons.extend( + twin_validation_errors( + ontology=ontology, + relationship_counts=bidirectional_counts, + ) + ) + transition_eligible = ( metadata_readiness.has_minimum_entity_types and metadata_readiness.has_minimum_relationship_types diff --git a/src/api/management/application/design_artifacts.py b/src/api/management/application/design_artifacts.py index b60d4dd29..6724f4f00 100644 --- a/src/api/management/application/design_artifacts.py +++ b/src/api/management/application/design_artifacts.py @@ -5,7 +5,8 @@ from collections import defaultdict from typing import Any -from management.domain.value_objects import OntologyConfig +from management.domain.relationship_pairing import resolve_inverse_label_for_primary +from management.domain.value_objects import EdgeTypeDefinition, OntologyConfig _SYSTEM_NODE_PROPERTIES = frozenset( { @@ -28,6 +29,14 @@ def _instance_properties(raw: dict[str, Any]) -> dict[str, Any]: } +def _reverse_relationship_label(edge_type: EdgeTypeDefinition) -> str | None: + if edge_type.auto_generated or edge_type.inverse_of: + return None + if not edge_type.bidirectional: + return None + return resolve_inverse_label_for_primary(edge_type) + + def build_design_artifacts( *, knowledge_graph_id: str, @@ -156,14 +165,17 @@ def build_design_artifacts( composite_key = key type_instances = instances break + reverse_label = _reverse_relationship_label(edge_type) relationships.append( { "key": composite_key, "source_entity_type": source_label, "target_entity_type": target_label, "relationship_type": edge_type.label, - "reverse_relationship_type": None, - "reverse_relationship_description": None, + "reverse_relationship_type": reverse_label, + "reverse_relationship_description": ( + f"Inverse of `{edge_type.label}`" if reverse_label else None + ), "prepopulated_instances": edge_type.prepopulated, "description": edge_type.description or None, "instance_count": len(type_instances), diff --git a/src/api/management/domain/relationship_pairing.py b/src/api/management/domain/relationship_pairing.py new file mode 100644 index 000000000..257b9131a --- /dev/null +++ b/src/api/management/domain/relationship_pairing.py @@ -0,0 +1,267 @@ +"""Bidirectional relationship pairing for ontology authoring and edge instances.""" + +from __future__ import annotations + +import hashlib +from typing import Any + +from management.domain.value_objects import EdgeTypeDefinition, OntologyConfig + +_INVERSE_LABEL_MAP: dict[str, str] = { + "contains": "contained_in", + "defines": "defined_by", + "implements": "implemented_by", + "covers": "covered_by", + "owns": "owned_by", + "uses": "used_by", +} + + +class RelationshipPairingError(ValueError): + """Raised when bidirectional pairing metadata is inconsistent.""" + + +def derive_inverse_label(primary_label: str) -> str: + """Derive a default inverse edge label from the primary label.""" + normalized = primary_label.strip().lower() + return _INVERSE_LABEL_MAP.get(normalized, f"{normalized}_inverse") + + +def bidirectional_pair_key(*, source_label: str, relationship_label: str, target_label: str) -> str: + """Stable identifier for a directed relationship type in design artifacts.""" + return f"{source_label}|{relationship_label}|{target_label}" + + +def resolve_inverse_label_for_primary(edge_type: EdgeTypeDefinition) -> str: + """Return the inverse label for a primary bidirectional edge type.""" + if edge_type.inverse_label: + return edge_type.inverse_label.strip().lower() + return derive_inverse_label(edge_type.label) + + +def build_inverse_edge_type(primary: EdgeTypeDefinition) -> EdgeTypeDefinition: + """Build the auto-generated inverse edge type for a primary relationship.""" + if not primary.source_labels or not primary.target_labels: + raise RelationshipPairingError( + f"Relationship type `{primary.label}` requires source_labels and target_labels " + "for bidirectional pairing" + ) + inverse_label = resolve_inverse_label_for_primary(primary) + source = primary.source_labels[0] + target = primary.target_labels[0] + description = ( + f"Inverse of `{primary.label}` ({target} → {source}); auto-generated for bidirectional pairing." + ) + return EdgeTypeDefinition( + label=inverse_label, + description=description, + source_labels=(target,), + target_labels=(source,), + properties=primary.properties, + prepopulated=primary.prepopulated, + prepopulated_instance_count=0, + instance_generator=primary.instance_generator, + bidirectional=True, + inverse_of=primary.label, + auto_generated=True, + bidirectional_pair_key=bidirectional_pair_key( + source_label=target, + relationship_label=inverse_label, + target_label=source, + ), + ) + + +def _is_primary_bidirectional_edge(edge_type: EdgeTypeDefinition) -> bool: + return edge_type.bidirectional and not edge_type.auto_generated and not edge_type.inverse_of + + +def expand_ontology_bidirectional_pairs(config: OntologyConfig) -> OntologyConfig: + """Ensure every primary bidirectional edge type has a linked inverse type definition.""" + edge_types = list(config.edge_types) + by_label = {edge.label: edge for edge in edge_types} + + for primary in list(edge_types): + if not _is_primary_bidirectional_edge(primary): + continue + if not primary.source_labels or not primary.target_labels: + raise RelationshipPairingError( + f"Relationship type `{primary.label}` cannot be bidirectional without " + "source_labels and target_labels" + ) + + inverse_label = resolve_inverse_label_for_primary(primary) + source = primary.source_labels[0] + target = primary.target_labels[0] + pair_key = bidirectional_pair_key( + source_label=source, + relationship_label=primary.label, + target_label=target, + ) + + existing_inverse = by_label.get(inverse_label) + if existing_inverse is not None: + if existing_inverse.inverse_of and existing_inverse.inverse_of != primary.label: + raise RelationshipPairingError( + f"inverse_label `{inverse_label}` already exists and is paired with " + f"`{existing_inverse.inverse_of}`, not `{primary.label}`" + ) + continue + + inverse = build_inverse_edge_type(primary) + edge_types.append(inverse) + by_label[inverse.label] = inverse + + # Rebuild primary with pairing metadata (frozen dataclass) + index = edge_types.index(primary) + edge_types[index] = EdgeTypeDefinition( + label=primary.label, + description=primary.description, + source_labels=primary.source_labels, + target_labels=primary.target_labels, + properties=primary.properties, + prepopulated=primary.prepopulated, + prepopulated_instance_count=primary.prepopulated_instance_count, + instance_generator=primary.instance_generator, + bidirectional=True, + inverse_label=inverse_label, + auto_generated=False, + inverse_of=None, + bidirectional_pair_key=pair_key, + ) + by_label[primary.label] = edge_types[index] + + return OntologyConfig( + node_types=config.node_types, + edge_types=tuple(edge_types), + approved_at=config.approved_at, + ) + + +def deterministic_twin_edge_id( + *, + relationship_label: str, + start_id: str, + end_id: str, + tenant_id: str = "", +) -> str: + """Match json_relationships_to_jsonl deterministic edge id rules.""" + normalized_label = relationship_label.strip().lower() + combined = f"{tenant_id}:{start_id.strip()}:{normalized_label}:{end_id.strip()}" + digest = hashlib.sha256(combined.encode()).hexdigest()[:16] + return f"{normalized_label}:{digest}" + + +def _primary_edge_by_label(ontology: OntologyConfig) -> dict[str, EdgeTypeDefinition]: + return { + edge.label: edge + for edge in ontology.edge_types + if _is_primary_bidirectional_edge(edge) + } + + +def normalize_authoring_edge_type_dicts(edge_types: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Default bidirectional=true for newly authored primary relationship types.""" + normalized: list[dict[str, Any]] = [] + for row in edge_types: + payload = dict(row) + if ( + "bidirectional" not in payload + and not payload.get("auto_generated") + and not payload.get("inverse_of") + ): + payload["bidirectional"] = True + normalized.append(payload) + return normalized + + +def expand_twin_edge_creates( + operations: list[dict[str, Any]], + *, + ontology: OntologyConfig, + tenant_id: str, +) -> list[dict[str, Any]]: + """Append inverse edge CREATE operations for bidirectional relationship types.""" + primary_edges = _primary_edge_by_label(ontology) + if not primary_edges: + return list(operations) + + expanded: list[dict[str, Any]] = [] + for operation in operations: + expanded.append(operation) + if operation.get("op") != "CREATE" or operation.get("type") != "edge": + continue + + label = str(operation.get("label") or "").strip().lower() + primary = primary_edges.get(label) + if primary is None: + continue + + start_id = str(operation.get("start_id") or "").strip() + end_id = str(operation.get("end_id") or "").strip() + if not start_id or not end_id: + continue + + inverse_label = resolve_inverse_label_for_primary(primary) + properties = dict(operation.get("set_properties") or {}) + twin = { + "op": "CREATE", + "type": "edge", + "id": deterministic_twin_edge_id( + relationship_label=inverse_label, + start_id=end_id, + end_id=start_id, + tenant_id=tenant_id, + ), + "label": inverse_label, + "start_id": end_id, + "end_id": start_id, + "set_properties": properties, + } + expanded.append(twin) + + return expanded + + +def ontology_config_from_authoring_payload(data: dict[str, Any]) -> OntologyConfig: + """Build OntologyConfig from API/workload payload with authoring defaults.""" + payload = dict(data) + payload["edge_types"] = normalize_authoring_edge_type_dicts( + list(payload.get("edge_types") or []) + ) + return OntologyConfig.from_dict(payload) + + +def twin_validation_errors( + *, + ontology: OntologyConfig, + relationship_counts: dict[str, int], +) -> list[str]: + """Report primary/inverse relationship instance count mismatches.""" + errors: list[str] = [] + for primary in ontology.edge_types: + if not _is_primary_bidirectional_edge(primary): + continue + if not primary.source_labels or not primary.target_labels: + continue + source = primary.source_labels[0] + target = primary.target_labels[0] + primary_key = bidirectional_pair_key( + source_label=source, + relationship_label=primary.label, + target_label=target, + ) + inverse_label = resolve_inverse_label_for_primary(primary) + inverse_key = bidirectional_pair_key( + source_label=target, + relationship_label=inverse_label, + target_label=source, + ) + primary_count = relationship_counts.get(primary_key, 0) + inverse_count = relationship_counts.get(inverse_key, 0) + if primary_count != inverse_count: + errors.append( + f"Bidirectional pair `{primary.label}` / `{inverse_label}` is unbalanced: " + f"primary={primary_count}, inverse={inverse_count}" + ) + return errors diff --git a/src/api/management/domain/value_objects.py b/src/api/management/domain/value_objects.py index caeff03f2..06667e905 100644 --- a/src/api/management/domain/value_objects.py +++ b/src/api/management/domain/value_objects.py @@ -475,6 +475,11 @@ class EdgeTypeDefinition: prepopulated: bool = False prepopulated_instance_count: int = 0 instance_generator: str | None = None + bidirectional: bool = False + inverse_label: str | None = None + inverse_of: str | None = None + auto_generated: bool = False + bidirectional_pair_key: str | None = None def __post_init__(self) -> None: """Validate that label is non-empty.""" @@ -484,6 +489,10 @@ def __post_init__(self) -> None: raise ValueError("prepopulated_instance_count must be >= 0") if self.instance_generator is not None and not self.instance_generator.strip(): raise ValueError("instance_generator must not be empty or whitespace-only") + if self.inverse_label is not None and not self.inverse_label.strip(): + raise ValueError("inverse_label must not be empty or whitespace-only") + if self.inverse_of is not None and not self.inverse_of.strip(): + raise ValueError("inverse_of must not be empty or whitespace-only") def to_dict(self) -> dict[str, Any]: """Serialize to a plain dict suitable for JSON persistence.""" @@ -495,9 +504,17 @@ def to_dict(self) -> dict[str, Any]: "properties": list(self.properties), "prepopulated": self.prepopulated, "prepopulated_instance_count": self.prepopulated_instance_count, + "bidirectional": self.bidirectional, + "auto_generated": self.auto_generated, } if self.instance_generator: payload["instance_generator"] = self.instance_generator + if self.inverse_label: + payload["inverse_label"] = self.inverse_label + if self.inverse_of: + payload["inverse_of"] = self.inverse_of + if self.bidirectional_pair_key: + payload["bidirectional_pair_key"] = self.bidirectional_pair_key return payload @classmethod @@ -505,6 +522,12 @@ def from_dict(cls, data: dict[str, Any]) -> EdgeTypeDefinition: """Deserialize from a plain dict.""" raw_generator = data.get("instance_generator") instance_generator = str(raw_generator).strip() if raw_generator else None + raw_inverse_label = data.get("inverse_label") + inverse_label = str(raw_inverse_label).strip() if raw_inverse_label else None + raw_inverse_of = data.get("inverse_of") + inverse_of = str(raw_inverse_of).strip() if raw_inverse_of else None + raw_pair_key = data.get("bidirectional_pair_key") + pair_key = str(raw_pair_key).strip() if raw_pair_key else None return cls( label=data["label"], description=data.get("description", ""), @@ -514,6 +537,11 @@ def from_dict(cls, data: dict[str, Any]) -> EdgeTypeDefinition: prepopulated=bool(data.get("prepopulated", False)), prepopulated_instance_count=int(data.get("prepopulated_instance_count", 0)), instance_generator=instance_generator or None, + bidirectional=bool(data.get("bidirectional", False)), + inverse_label=inverse_label or None, + inverse_of=inverse_of or None, + auto_generated=bool(data.get("auto_generated", False)), + bidirectional_pair_key=pair_key or None, ) diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index b0a15b935..5d19e2127 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -335,6 +335,22 @@ class EdgeTypeDefinitionModel(BaseModel): default=None, description="Optional workspace-relative script under instance_generators/ for prepopulation", ) + bidirectional: bool = Field( + default=True, + description="When true, platform auto-generates inverse type and twin edge instances", + ) + inverse_label: str | None = Field( + default=None, + description="Optional explicit inverse relationship label (primary types only)", + ) + inverse_of: str | None = Field( + default=None, + description="Primary label this auto-generated inverse type mirrors", + ) + auto_generated: bool = Field( + default=False, + description="True when this edge type was created by bidirectional pairing", + ) def to_domain(self) -> EdgeTypeDefinition: """Convert to domain EdgeTypeDefinition value object.""" @@ -347,6 +363,10 @@ def to_domain(self) -> EdgeTypeDefinition: prepopulated=self.prepopulated, prepopulated_instance_count=self.prepopulated_instance_count, instance_generator=self.instance_generator, + bidirectional=self.bidirectional, + inverse_label=self.inverse_label, + inverse_of=self.inverse_of, + auto_generated=self.auto_generated, ) @classmethod @@ -361,6 +381,10 @@ def from_domain(cls, et: EdgeTypeDefinition) -> EdgeTypeDefinitionModel: prepopulated=et.prepopulated, prepopulated_instance_count=et.prepopulated_instance_count, instance_generator=et.instance_generator, + bidirectional=et.bidirectional, + inverse_label=et.inverse_label, + inverse_of=et.inverse_of, + auto_generated=et.auto_generated, ) diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_twin_edge_expansion.py b/src/api/tests/unit/infrastructure/extraction_workload/test_twin_edge_expansion.py new file mode 100644 index 000000000..829882969 --- /dev/null +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_twin_edge_expansion.py @@ -0,0 +1,45 @@ +"""Unit tests for bidirectional twin edge expansion in mutation preflight.""" + +from __future__ import annotations + +import pytest + +from graph.domain.value_objects import EntityType, MutationOperation, MutationOperationType +from infrastructure.extraction_workload.mutation_preflight import prepare_mutation_operations +from management.domain.relationship_pairing import expand_ontology_bidirectional_pairs +from management.domain.value_objects import EdgeTypeDefinition, OntologyConfig + + +@pytest.mark.asyncio +async def test_prepare_mutation_operations_expands_twin_edge_creates() -> None: + ontology = expand_ontology_bidirectional_pairs( + OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("repository",), + target_labels=("test",), + bidirectional=True, + ), + ) + ) + ) + jsonl = ( + '{"op":"CREATE","type":"edge","id":"contains:0123456789abcdef",' + '"label":"contains","start_id":"repository:aaaaaaaaaaaaaaaa",' + '"end_id":"test:bbbbbbbbbbbbbbbb","set_properties":{' + '"data_source_id":"ds","source_path":"bootstrap","knowledge_graph_id":"kg"}}' + ) + + operations, errors = await prepare_mutation_operations( + jsonl_content=jsonl, + tenant_id="tenant-1", + ontology=ontology, + ) + + assert errors == [] + assert operations is not None + assert len(operations) == 2 + assert operations[1].label == "contained_in" + assert operations[1].start_id == "test:bbbbbbbbbbbbbbbb" + assert operations[1].end_id == "repository:aaaaaaaaaaaaaaaa" diff --git a/src/api/tests/unit/management/application/test_design_artifacts_pairing.py b/src/api/tests/unit/management/application/test_design_artifacts_pairing.py new file mode 100644 index 000000000..4e3ea479c --- /dev/null +++ b/src/api/tests/unit/management/application/test_design_artifacts_pairing.py @@ -0,0 +1,34 @@ +"""Unit tests for bidirectional metadata in design artifacts.""" + +from __future__ import annotations + +from management.application.design_artifacts import build_design_artifacts +from management.domain.relationship_pairing import expand_ontology_bidirectional_pairs +from management.domain.value_objects import EdgeTypeDefinition, OntologyConfig + + +def test_design_artifacts_exposes_reverse_relationship_type() -> None: + ontology = expand_ontology_bidirectional_pairs( + OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("repository",), + target_labels=("test",), + bidirectional=True, + ), + ) + ) + ) + + artifacts = build_design_artifacts( + knowledge_graph_id="kg-1", + ontology=ontology, + graph_data={"nodes": [], "edges": []}, + limit=100, + ) + + contains = next( + row for row in artifacts["relationships"] if row["relationship_type"] == "contains" + ) + assert contains["reverse_relationship_type"] == "contained_in" diff --git a/src/api/tests/unit/management/domain/test_relationship_pairing.py b/src/api/tests/unit/management/domain/test_relationship_pairing.py new file mode 100644 index 000000000..b25a75c24 --- /dev/null +++ b/src/api/tests/unit/management/domain/test_relationship_pairing.py @@ -0,0 +1,179 @@ +"""Unit tests for bidirectional relationship pairing.""" + +from __future__ import annotations + +import pytest + +from management.domain.relationship_pairing import ( + bidirectional_pair_key, + build_inverse_edge_type, + derive_inverse_label, + expand_ontology_bidirectional_pairs, + expand_twin_edge_creates, + resolve_inverse_label_for_primary, +) +from management.domain.value_objects import EdgeTypeDefinition, OntologyConfig + + +class TestDeriveInverseLabel: + def test_contains_maps_to_contained_in(self) -> None: + assert derive_inverse_label("contains") == "contained_in" + + def test_defines_maps_to_defined_by(self) -> None: + assert derive_inverse_label("defines") == "defined_by" + + def test_unknown_uses_suffix(self) -> None: + assert derive_inverse_label("relates_to") == "relates_to_inverse" + + +class TestExpandOntologyBidirectionalPairs: + def test_auto_generates_inverse_type(self) -> None: + config = OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="contains", + description="Repository contains test", + source_labels=("repository",), + target_labels=("test",), + bidirectional=True, + ), + ) + ) + + expanded = expand_ontology_bidirectional_pairs(config) + + labels = {edge.label for edge in expanded.edge_types} + assert labels == {"contains", "contained_in"} + inverse = next(edge for edge in expanded.edge_types if edge.label == "contained_in") + assert inverse.source_labels == ("test",) + assert inverse.target_labels == ("repository",) + assert inverse.inverse_of == "contains" + assert inverse.auto_generated is True + + def test_skips_when_bidirectional_false(self) -> None: + config = OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="depends_on", + source_labels=("service",), + target_labels=("service",), + bidirectional=False, + ), + ) + ) + + expanded = expand_ontology_bidirectional_pairs(config) + + assert len(expanded.edge_types) == 1 + assert expanded.edge_types[0].label == "depends_on" + + def test_respects_explicit_inverse_label(self) -> None: + config = OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("repository",), + target_labels=("test",), + bidirectional=True, + inverse_label="housed_in", + ), + ) + ) + + expanded = expand_ontology_bidirectional_pairs(config) + inverse = next(edge for edge in expanded.edge_types if edge.label == "housed_in") + + assert inverse.inverse_of == "contains" + + def test_legacy_edge_without_bidirectional_flag_is_unchanged(self) -> None: + config = OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("repository",), + target_labels=("test",), + ), + ) + ) + + expanded = expand_ontology_bidirectional_pairs(config) + + assert len(expanded.edge_types) == 1 + + +class TestExpandTwinEdgeCreates: + def test_primary_create_expands_to_inverse(self) -> None: + ontology = expand_ontology_bidirectional_pairs( + OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="contains", + source_labels=("repository",), + target_labels=("test",), + bidirectional=True, + ), + ) + ) + ) + operations = [ + { + "op": "CREATE", + "type": "edge", + "id": "contains:0123456789abcdef", + "label": "contains", + "start_id": "repository:aaaaaaaaaaaaaaaa", + "end_id": "test:bbbbbbbbbbbbbbbb", + "set_properties": { + "data_source_id": "ds-1", + "source_path": "bootstrap", + "knowledge_graph_id": "kg-1", + }, + } + ] + + expanded = expand_twin_edge_creates( + operations, + ontology=ontology, + tenant_id="tenant-1", + ) + + assert len(expanded) == 2 + inverse = expanded[1] + assert inverse["label"] == "contained_in" + assert inverse["start_id"] == "test:bbbbbbbbbbbbbbbb" + assert inverse["end_id"] == "repository:aaaaaaaaaaaaaaaa" + + def test_non_bidirectional_edge_is_unchanged(self) -> None: + ontology = OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="depends_on", + source_labels=("service",), + target_labels=("service",), + bidirectional=False, + ), + ) + ) + operations = [ + { + "op": "CREATE", + "type": "edge", + "id": "depends_on:0123456789abcdef", + "label": "depends_on", + "start_id": "service:aaaaaaaaaaaaaaaa", + "end_id": "service:bbbbbbbbbbbbbbbb", + "set_properties": { + "data_source_id": "ds-1", + "source_path": "bootstrap", + "knowledge_graph_id": "kg-1", + }, + } + ] + + expanded = expand_twin_edge_creates( + operations, + ontology=ontology, + tenant_id="tenant-1", + ) + + assert len(expanded) == 1 From 825b0c2ebdbc718dbd51e145a48f6ba0ca67782f Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Thu, 4 Jun 2026 23:17:12 -0400 Subject: [PATCH 102/153] feat(extraction): allow one-hour GMA sticky turn timeout (#760) Raise turn timeout caps to 3600s and set the dev compose default so long bootstrap prepopulation turns are not cut off at ~17 minutes. Closes #750 Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 2 +- specs/extraction/chat-turns.spec.md | 9 +++++++++ src/agent-runtime/kartograph_agent_runtime/settings.py | 2 +- src/agent-runtime/tests/test_thinking_stream.py | 8 ++++++++ .../infrastructure/workload_runtime_settings.py | 2 +- .../infrastructure/test_workload_runtime_settings.py | 5 +++++ 6 files changed, 25 insertions(+), 3 deletions(-) diff --git a/compose.dev.yaml b/compose.dev.yaml index 8c77419d8..372227556 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -24,7 +24,7 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_SKILLS_DIR: ${PWD}/skills KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_UID: ${HOST_UID} KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID: ${HOST_GID} - KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_TURN_TIMEOUT_SECONDS: "1000" + KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_TURN_TIMEOUT_SECONDS: "3600" KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_MAX_TURNS: "500" # Vertex AI for Claude Agent SDK in sticky assistant containers CLAUDE_CODE_USE_VERTEX: "1" diff --git a/specs/extraction/chat-turns.spec.md b/specs/extraction/chat-turns.spec.md index 6cb080f76..50e5f65ba 100644 --- a/specs/extraction/chat-turns.spec.md +++ b/specs/extraction/chat-turns.spec.md @@ -94,3 +94,12 @@ The system SHALL expose schema, mutation, and workspace tooling appropriate for - WHEN a follow-up chat message is processed - THEN the system prompt omits the full skill prose block - AND still includes live workspace readiness and a short tools summary + +### Requirement: Sticky Turn Timeout +The system SHALL allow configuring a per-turn execution timeout for graph-management chat in sticky session containers. + +#### Scenario: One-hour dev timeout +- GIVEN development runtime settings set `STICKY_TURN_TIMEOUT_SECONDS` to 3600 +- WHEN a chat turn runs in the sticky agent runtime +- THEN the agent turn may execute for up to 3600 seconds before timing out +- AND the API sticky HTTP client read timeout exceeds the configured turn timeout diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py index 4936c8492..1bcd11c79 100644 --- a/src/agent-runtime/kartograph_agent_runtime/settings.py +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -28,7 +28,7 @@ class AgentRuntimeSettings(BaseSettings): gcloud_config_dir: str = Field(default="", alias="CLOUDSDK_CONFIG") google_application_credentials: str = Field(default="", alias="GOOGLE_APPLICATION_CREDENTIALS") home_dir: str = Field(default="/tmp", alias="HOME") - turn_timeout_seconds: float = Field(default=1000.0, ge=30.0, le=1200.0, alias="KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS") + turn_timeout_seconds: float = Field(default=1000.0, ge=30.0, le=3600.0, alias="KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS") max_turns: int = Field(default=500, ge=1, le=1000, alias="KARTOGRAPH_AGENT_MAX_TURNS") def vertex_enabled(self) -> bool: diff --git a/src/agent-runtime/tests/test_thinking_stream.py b/src/agent-runtime/tests/test_thinking_stream.py index c5da65b8a..f4728b11f 100644 --- a/src/agent-runtime/tests/test_thinking_stream.py +++ b/src/agent-runtime/tests/test_thinking_stream.py @@ -61,6 +61,14 @@ def test_agent_runtime_settings_default_max_turns() -> None: assert settings.max_turns == 500 +def test_agent_runtime_settings_accepts_one_hour_turn_timeout() -> None: + from kartograph_agent_runtime.settings import AgentRuntimeSettings + + settings = AgentRuntimeSettings(KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS="3600") + + assert settings.turn_timeout_seconds == 3600.0 + + def test_push_thinking_deduplicates_and_caps_recent_lines() -> None: recent: list[str] = [] for index in range(5): diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 6aa0bcb8b..5be9c6d77 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -50,7 +50,7 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): ), ) sticky_health_timeout_seconds: float = Field(default=90.0, ge=5.0, le=600.0) - sticky_turn_timeout_seconds: float = Field(default=1000.0, ge=30.0, le=1200.0) + sticky_turn_timeout_seconds: float = Field(default=1000.0, ge=30.0, le=3600.0) sticky_max_turns: int = Field(default=500, ge=1, le=1000) vertex_project_id: str = Field(default="") vertex_region: str = Field(default="us-east5") diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py index e769c1e99..042488d23 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py @@ -34,3 +34,8 @@ def test_resolve_workload_token_signing_key_falls_back_to_dev_default(self) -> N settings = ExtractionWorkloadRuntimeSettings(workload_token_signing_key="") assert resolve_workload_token_signing_key(settings) + + def test_sticky_turn_timeout_accepts_one_hour(self) -> None: + settings = ExtractionWorkloadRuntimeSettings(sticky_turn_timeout_seconds=3600.0) + + assert settings.sticky_turn_timeout_seconds == 3600.0 From e7824f7179de025acb0db7de900728efa883ad68 Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Thu, 4 Jun 2026 23:18:55 -0400 Subject: [PATCH 103/153] feat(extraction): complete GMA bootstrap guidance and runtime improvements (#765) Land remaining GMA work: one-hour turn timeout, multi-deliverable pacing, six-phase bootstrap workflow, schema modeling rules, workspace discovery, writable path guidance, bulk JSONL guardrails, readiness reporting, and sanitized composing-reply thinking lines. Closes #750 Closes #751 Closes #752 Closes #753 Closes #754 Closes #755 Closes #756 Closes #757 Closes #758 Closes #759 Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/extraction/chat-turns.spec.md | 9 +++ specs/graph/schema-authoring.spec.md | 28 ++++++++ .../kartograph_agent_runtime/executor.py | 10 +-- .../thinking_stream.py | 25 ++++++- src/agent-runtime/tests/test_executor.py | 2 + .../tests/test_thinking_stream.py | 17 +++++ .../application/schema_authoring_guide.py | 38 ++++++++++- .../application/skill_resolution_service.py | 66 ++++++++++++++++--- .../test_schema_authoring_guide.py | 14 ++++ .../test_skill_resolution_service.py | 18 +++-- 10 files changed, 202 insertions(+), 25 deletions(-) create mode 100644 src/api/tests/unit/extraction/application/test_schema_authoring_guide.py diff --git a/specs/extraction/chat-turns.spec.md b/specs/extraction/chat-turns.spec.md index 50e5f65ba..61995d400 100644 --- a/specs/extraction/chat-turns.spec.md +++ b/specs/extraction/chat-turns.spec.md @@ -95,6 +95,15 @@ The system SHALL expose schema, mutation, and workspace tooling appropriate for - THEN the system prompt omits the full skill prose block - AND still includes live workspace readiness and a short tools summary +### Requirement: Multi-Deliverable Turn Pacing +The system SHALL instruct the Graph Management Assistant to pace multi-item bootstrap requests across turns. + +#### Scenario: One phase per turn by default +- GIVEN the user sends one message with multiple bootstrap deliverables +- WHEN the Graph Management Assistant processes the turn +- THEN schema bootstrap guardrails require completing at most one bootstrap phase +- AND the assistant asks whether to continue automatically or one phase at a time + ### Requirement: Sticky Turn Timeout The system SHALL allow configuring a per-turn execution timeout for graph-management chat in sticky session containers. diff --git a/specs/graph/schema-authoring.spec.md b/specs/graph/schema-authoring.spec.md index 06d1ba4ce..cf2628c13 100644 --- a/specs/graph/schema-authoring.spec.md +++ b/specs/graph/schema-authoring.spec.md @@ -68,6 +68,34 @@ The system SHALL enforce `prepopulated=true` as a transition-blocking readiness - WHEN readiness is evaluated - THEN validation fails and transition to extraction mode is blocked +### Requirement: Opinionated Bootstrap Workflow +The system SHALL guide the Graph Management Assistant through a six-phase schema bootstrap workflow. + +#### Scenario: Goals before schema +- GIVEN a new schema bootstrap conversation +- WHEN the assistant begins intake +- THEN it asks for questions the graph must answer before proposing entity types + +#### Scenario: Phased bootstrap guidance +- GIVEN schema bootstrap skills are resolved for a graph-management turn +- WHEN the agent system prompt is assembled +- THEN it includes the six phases: goals, discovery, schema Q&A, prepopulation planning, confirmed save, bulk implementation + +#### Scenario: Confirmed ontology save +- GIVEN the assistant has drafted a schema but the user has not confirmed it +- WHEN the assistant considers persisting types +- THEN guardrails require waiting for explicit user confirmation before `kartograph_save_schema_ontology` + +#### Scenario: Property versus entity modeling guidance +- GIVEN schema bootstrap skills are resolved +- WHEN the assistant models attributes +- THEN skills distinguish categorize/distinguish → property from track-which/needs-relationships → entity type + +#### Scenario: Workspace discovery before prepopulation +- GIVEN the assistant enters prepopulation planning +- WHEN skills are resolved +- THEN prepopulation guidance requires Glob/Grep discovery on `repository-files/` first + ### Requirement: Workload Bulk Instance Authoring The system SHALL support bulk instance authoring for the Graph Management Assistant via workspace files and strict CREATE semantics. diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index cb198e924..e7fc69e57 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -59,10 +59,12 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: f"Workspace mount: `{settings.workspace_dir}`", ( "Prepared repository files live under " - "`repository-files/<data_source_name>/`. " - "Prebuilt instance generator scripts are in `instance_generators/` " - "(run with Bash: `python3 instance_generators/<script>.py repository-files`). " - "Use Read, Grep, Glob, and Bash against the workspace mount only." + "`repository-files/<data_source_name>/` (read-only). " + "`ingestion-context/` is read-only. " + "Writable outputs: `instance_generators/` only (scripts, JSON, JSONL under " + "`instance_generators/out/`). " + "Run generators with Bash: `python3 instance_generators/<script>.py repository-files`. " + "Use Read, Grep, Glob on repository-files; Bash for generators." ), ] for source in sources[:12]: diff --git a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py index 555148019..74fc53561 100644 --- a/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py +++ b/src/agent-runtime/kartograph_agent_runtime/thinking_stream.py @@ -53,11 +53,30 @@ def replace_last_thinking( return {"type": "thinking", "recent": list(recent)} -def update_composing_line(recent: list[str], preview_tail: str) -> dict[str, Any] | None: +def _compose_reply_line(preview_tail: str) -> str: preview_tail = normalize_activity_line(preview_tail.replace("\n", " ")) - line = normalize_activity_line( - f"Composing reply · {preview_tail}" if preview_tail else "Composing reply…", + if not preview_tail: + return "Composing reply…" + lowered = preview_tail.lower() + noisy_prefixes = ( + "need.", + "need ", + "let me", + "now let me", + "i'll ", + "i will ", + "creating task", + "first, let", ) + if any(lowered.startswith(prefix) for prefix in noisy_prefixes): + return "Composing reply…" + if len(preview_tail) < 12: + return "Composing reply…" + return f"Composing reply · {preview_tail}" + + +def update_composing_line(recent: list[str], preview_tail: str) -> dict[str, Any] | None: + line = _compose_reply_line(preview_tail) prefix = "Composing reply" if recent and str(recent[-1]).startswith(prefix): recent[-1] = line diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index e718b131d..6defdd80d 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -53,6 +53,8 @@ def test_build_workspace_prompt_appendix_prefers_sources_index(tmp_path: Path) - assert "Hyperfleet API" in appendix assert "142 file(s)" in appendix assert "pkg/api/adapter_status_types_test.go" in appendix + assert "read-only" in appendix + assert "instance_generators/" in appendix def test_build_workspace_prompt_appendix_includes_extension_counts(tmp_path: Path) -> None: diff --git a/src/agent-runtime/tests/test_thinking_stream.py b/src/agent-runtime/tests/test_thinking_stream.py index f4728b11f..13f59dda5 100644 --- a/src/agent-runtime/tests/test_thinking_stream.py +++ b/src/agent-runtime/tests/test_thinking_stream.py @@ -5,10 +5,12 @@ from dataclasses import dataclass from kartograph_agent_runtime.thinking_stream import ( + _compose_reply_line, initial_sdk_thinking_lines, push_thinking, replace_last_thinking, thinking_events_from_sdk_message, + update_composing_line, ) @@ -53,6 +55,21 @@ def test_initial_sdk_thinking_lines_include_connected_message() -> None: assert any("Schema tools" in line for line in lines) +def test_compose_reply_line_sanitizes_noisy_planning_text() -> None: + assert _compose_reply_line("need. Let me create tasks") == "Composing reply…" + assert _compose_reply_line("Short") == "Composing reply…" + assert _compose_reply_line("Summarizing ontology changes for review.") == ( + "Composing reply · Summarizing ontology changes for review." + ) + + +def test_update_composing_line_uses_sanitized_preview() -> None: + recent: list[str] = [] + event = update_composing_line(recent, "Now let me start with the ontology schema") + assert event is not None + assert recent[-1] == "Composing reply…" + + def test_agent_runtime_settings_default_max_turns() -> None: from kartograph_agent_runtime.settings import AgentRuntimeSettings diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 315e88638..97780987b 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -5,9 +5,40 @@ Use the Kartograph schema tools — never probe undocumented HTTP routes. Use Read, Grep, Glob, and Bash against the session workspace mount. Prebuilt generator scripts -live under `instance_generators/` (see README there). +live under `instance_generators/` (see README there). Write scripts and JSON/JSONL outputs +only under `instance_generators/` — `repository-files/` and `ingestion-context/` are read-only. -## Workflow +## Bootstrap workflow (6 phases) + +Complete these in order. Do not mix schema design, prepopulation planning, and bulk +implementation in the same turn when the user gave multiple deliverables. + +1. **Understand goals** — Ask what questions the graph must answer; collect 3–5 stakeholder use cases. +2. **Workspace discovery** — Glob/Grep under `repository-files/`; report file counts, extensions, and code patterns. +3. **Draft schema + validation Q&A** — Propose entity types, properties, and relationships; cite workspace examples. +4. **Prepopulation planning** — Decide prepopulated vs manual per type; required properties; generator strategy. +5. **Save ontology** — `kartograph_save_schema_ontology` only after the user confirms the full schema. +6. **Implement prepopulation** — Bash generators → `json_*_to_jsonl.py` → validate-from-file → apply-from-file; entities before edges; verify readiness. + +## Schema modeling rules + +- **Property vs entity:** distinguish/categorize (e.g. tier0/tier1) → property on an existing type; + track which/what or needs relationships → entity type + edges. +- **Bidirectional relationships** default on — author primary direction only; platform creates inverse type + and twin edge instances. Set `bidirectional: false` for asymmetric edges (`depends_on`, `created_by`). +- For asymmetric edges, confirm direction explicitly (X → rel → Y). + +## Workspace discovery patterns + +| Target | Glob / Grep hints | +|--------|-------------------| +| Tests | `**/*_test.go`, `**/test_*.go`, `**/*_test.py`, `**/tests/**` | +| API endpoints / handlers | `Grep` for route registrations, `@app.`, `HandleFunc`, OpenAPI paths | +| Source files | `Glob **/*.{go,py,ts,java,yaml,md}` per data source folder | + +Cite the session workspace appendix for per-repo file counts and extension summaries before prepopulation Q&A. + +## Tool workflow 1. Call `kartograph_get_schema_authoring_guide` (this document). 2. Call `kartograph_get_workspace_readiness` to see prepopulated gaps and live instance counts. @@ -94,7 +125,8 @@ - CREATE requires `data_source_id` and `source_path` in `set_properties`. - Node CREATE requires `slug` in `set_properties` (kebab-case, unique per type). - `knowledge_graph_id` is stamped by the platform — do not set it. -- For large sets: Bash + custom script under `instance_generators/` → JSONL file → apply-from-file tool. +- For large sets: Bash + custom script under `instance_generators/` → `json_*_to_jsonl.py` → apply-from-file. + Never hand-author bulk CREATE ids in chat — use converter scripts for deterministic ids. - CREATE is strict: existing types/instances must be changed with UPDATE, not CREATE again. - Dry-run before apply: `kartograph_validate_graph_mutations` or `kartograph_validate_graph_mutations_from_file`. - Create all entity nodes before relationship edges. diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index e41cb021c..d80c23f01 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -24,8 +24,9 @@ class ResolvedExtractionSkillPack: "You are the Graph Management Assistant for schema bootstrap. " "Use Kartograph schema tools to read and write entity/relationship types " "and instances — do not discover or call raw HTTP API routes. " - "Start by understanding user goals, then model the ontology and apply changes " - "with kartograph_get_schema_ontology and kartograph_save_schema_ontology." + "Follow the six-phase bootstrap workflow (goals → discovery → schema Q&A → " + "prepopulation planning → confirmed ontology save → bulk implementation). " + "Do not conflate schema design, prepopulation planning, and implementation." ), "prompt_hierarchy": ( "platform_security_constraints", @@ -39,6 +40,30 @@ class ResolvedExtractionSkillPack: "Keep recommendations scoped to the active knowledge graph.", "Use kartograph_* schema tools for ontology and JSONL mutations; never probe /management or /graph HTTP routes manually.", "Format user-facing replies in GitHub-flavored Markdown (headings, lists, fenced code blocks, tables) for readability in the chat UI.", + ( + "When the user gives multiple deliverables in one message (three or more bullets, " + "or any mix of ontology edits + bulk prepopulation + relationships), do not execute " + "the full list in one turn. Complete one phase only, summarize what finished, then " + "ask whether to continue through the rest automatically or one phase at a time. " + "Default to one phase per turn unless the user explicitly requests doing everything." + ), + ( + "Bootstrap phases (in order): (1) ontology/types/properties, (2) entity instances " + "in dependency order, (3) relationship instances, (4) readiness verification via " + "kartograph_get_workspace_readiness. Stop after each phase when multiple deliverables " + "were requested." + ), + ( + "Do not call kartograph_save_schema_ontology until the user confirms the full " + "proposed schema (types, properties, relationship directions, prepopulation flags). " + "Exception: the user explicitly says to save/apply or continues after reviewing your draft." + ), + ( + "For bulk prepopulation never hand-author CREATE ids in chat. Use Bash generators → " + "json_*_to_jsonl.py → validate-from-file → apply-from-file. On ontology save errors, " + "read kartograph_get_schema_ontology and kartograph_get_schema_authoring_guide, merge " + "a fix, then retry once." + ), ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { @@ -64,18 +89,40 @@ class ResolvedExtractionSkillPack: _GLOBAL_SKILL_TEMPLATES: dict[ExtractionSessionMode, dict[str, str]] = { ExtractionSessionMode.SCHEMA_BOOTSTRAP: { "capabilities_intake": ( - "Ask for goals once, then co-design or propose a first-pass schema." + "Phase 1 — Understand goals: ask what questions the graph must answer; collect " + "3–5 concrete stakeholder use cases before proposing types." + ), + "bootstrap_workflow": ( + "Opinionated schema bootstrap phases (complete in order; one phase per turn when " + "the user gave multiple deliverables): " + "(1) Understand goals — 3–5 questions the graph must answer. " + "(2) Workspace discovery — Glob/Grep on repository-files/, cite file counts and patterns. " + "(3) Draft schema + Q&A — propose types/properties/relationships; show workspace examples. " + "(4) Prepopulation planning — which types/relationships are prepopulated vs manual. " + "(5) Save ontology — kartograph_save_schema_ontology only after user confirms the full schema. " + "(6) Implement prepopulation — generators → json_*_to_jsonl → validate-from-file → " + "apply-from-file; entities first, then edges; verify with kartograph_get_workspace_readiness." + ), + "schema_modeling": ( + "Property vs entity: distinguish/categorize → property on an existing type; " + "track which/what or needs relationships → entity type + edges. " + "Relationships default bidirectional — author primary direction only; platform creates " + "inverse type + twin instances. Set bidirectional=false for asymmetric edges " + "(depends_on, created_by). For asymmetric edges, confirm X → rel → Y direction explicitly." ), "schema_workflow": ( - "Call kartograph_get_schema_authoring_guide when you need shapes or mutation rules. " + "Call kartograph_get_schema_authoring_guide when you need shapes, phases, or mutation rules. " "Read/save ontology via kartograph_get_schema_ontology and kartograph_save_schema_ontology." ), "prepopulation": ( - "For prepopulated types: set instance_generator on the type when helpful, run script " - "under instance_generators/ with Bash, convert with json_*_to_jsonl helpers, validate " - "then apply-from-file. CREATE cannot duplicate existing instances — use UPDATE to edit. " - "Bidirectional relationships default on: author primary-direction edges only; platform " - "creates inverse type + twin instances. Set bidirectional=false for asymmetric edges." + "Before prepopulation planning: Glob/Grep repository-files/ and cite counts from the workspace " + "appendix. Write scripts/JSON/JSONL only under instance_generators/ (repository-files/ is " + "read-only). For prepopulated types: run script with Bash, convert with json_*_to_jsonl, " + "validate then apply-from-file. Bidirectional edges: primary direction only in generators." + ), + "readiness_reporting": ( + "After schema or prepopulation work, call kartograph_get_workspace_readiness and cite " + "blocking_reasons, prepopulated gaps, and transition_eligible in your reply." ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { @@ -174,4 +221,3 @@ async def resolve_for_graph_management_turn( guardrails=base.guardrails, skills=merged_skills, ) - diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py new file mode 100644 index 000000000..c1ec6534a --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -0,0 +1,14 @@ +"""Unit tests for schema authoring guide content.""" + +from __future__ import annotations + +from extraction.application.schema_authoring_guide import SCHEMA_AUTHORING_GUIDE + + +def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: + assert "## Bootstrap workflow (6 phases)" in SCHEMA_AUTHORING_GUIDE + assert "## Schema modeling rules" in SCHEMA_AUTHORING_GUIDE + assert "## Workspace discovery patterns" in SCHEMA_AUTHORING_GUIDE + assert "read-only" in SCHEMA_AUTHORING_GUIDE + assert "Property vs entity" in SCHEMA_AUTHORING_GUIDE + assert "Never hand-author bulk CREATE ids" in SCHEMA_AUTHORING_GUIDE diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 5e5b79643..4e3e03182 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -36,15 +36,23 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): assert set(resolved.skills.keys()) >= { "capabilities_intake", + "bootstrap_workflow", + "schema_modeling", "schema_workflow", "prepopulation", + "readiness_reporting", } - assert "instance_generators" in resolved.skills["prepopulation"] - assert "kartograph_get_schema_authoring_guide" in resolved.skills["schema_workflow"] - assert "capabilities_intake" in resolved.skills - assert "goal" in resolved.system_prompt.lower() + assert "3–5" in resolved.skills["capabilities_intake"] + assert "Workspace discovery" in resolved.skills["bootstrap_workflow"] + assert "Property vs entity" in resolved.skills["schema_modeling"] + assert "read-only" in resolved.skills["prepopulation"] + assert "blocking_reasons" in resolved.skills["readiness_reporting"] + assert "six-phase" in resolved.system_prompt.lower() + guardrails_text = " ".join(resolved.guardrails) + assert "one phase per turn" in guardrails_text + assert "kartograph_save_schema_ontology" in guardrails_text + assert "never hand-author CREATE ids" in guardrails_text assert len(resolved.prompt_hierarchy) > 0 - assert len(resolved.guardrails) > 0 async def test_extraction_mode_uses_extraction_defaults(self): service = ExtractionSkillResolutionService( From 0859188fe309c353c4c287f16ab93f0ad24d6bd1 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 4 Jun 2026 23:45:16 -0400 Subject: [PATCH 104/153] feat(extraction): default GMA to execute-first prepopulation via generators When readiness shows prepopulated gaps after schema save, instruct the assistant to write/run one generator task per turn across all data sources without asking permission unless strategy or strict CREATE blocks progress. Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/graph/schema-authoring.spec.md | 6 ++++ .../application/schema_authoring_guide.py | 22 ++++++++++++- .../application/skill_resolution_service.py | 33 ++++++++++++++----- .../test_schema_authoring_guide.py | 3 ++ .../test_skill_resolution_service.py | 9 +++-- 5 files changed, 62 insertions(+), 11 deletions(-) diff --git a/specs/graph/schema-authoring.spec.md b/specs/graph/schema-authoring.spec.md index cf2628c13..b04e81cd9 100644 --- a/specs/graph/schema-authoring.spec.md +++ b/specs/graph/schema-authoring.spec.md @@ -96,6 +96,12 @@ The system SHALL guide the Graph Management Assistant through a six-phase schema - WHEN skills are resolved - THEN prepopulation guidance requires Glob/Grep discovery on `repository-files/` first +#### Scenario: Execute-first prepopulation after schema save +- GIVEN the ontology is saved and readiness shows prepopulated entity or relationship gaps +- WHEN the Graph Management Assistant continues schema bootstrap +- THEN it executes one prepopulation task per turn via generator script and apply-from-file +- AND does not ask the user for permission to proceed unless strategy is ambiguous or CREATE is rejected + ### Requirement: Workload Bulk Instance Authoring The system SHALL support bulk instance authoring for the Graph Management Assistant via workspace files and strict CREATE semantics. diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 97780987b..f2c816b11 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -18,7 +18,27 @@ 3. **Draft schema + validation Q&A** — Propose entity types, properties, and relationships; cite workspace examples. 4. **Prepopulation planning** — Decide prepopulated vs manual per type; required properties; generator strategy. 5. **Save ontology** — `kartograph_save_schema_ontology` only after the user confirms the full schema. -6. **Implement prepopulation** — Bash generators → `json_*_to_jsonl.py` → validate-from-file → apply-from-file; entities before edges; verify readiness. +6. **Implement prepopulation** — one task per turn (see below); entities before edges; verify readiness. + +## Prepopulation execution (default) + +When `kartograph_get_workspace_readiness` shows prepopulated gaps **after the ontology is saved**, +**execute immediately** — do not ask whether to proceed. + +**One prepopulation task per turn** = one entity type **or** one relationship type, end-to-end: + +1. Write or reuse `instance_generators/<script>.py` (use `instance_generator` from ontology when set). +2. Script must scan **every** data source under `repository-files/` (all top-level folders). +3. `python3 instance_generators/<script>.py repository-files > instance_generators/out/<label>.json` +4. `json_instances_to_jsonl.py` (entities) or `json_relationships_to_jsonl.py` (edges after nodes exist). +5. `kartograph_validate_graph_mutations_from_file` → `kartograph_apply_graph_mutations_from_file` +6. `kartograph_get_workspace_readiness` — report counts and the **next** gap you will tackle. + +**Order:** entity types with gaps first (e.g. `api_endpoint`, `test`), then relationship types +(e.g. `repository → defines → api_endpoint`) once endpoint node slugs exist. + +**Only ask the user when:** generator strategy is ambiguous, discovery cannot support a reliable +script, or strict CREATE validation reports duplicates (then use UPDATE or skip-existing slugs). ## Schema modeling rules diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index d80c23f01..8cbf70bfb 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -64,6 +64,15 @@ class ResolvedExtractionSkillPack: "read kartograph_get_schema_ontology and kartograph_get_schema_authoring_guide, merge " "a fix, then retry once." ), + ( + "When kartograph_get_workspace_readiness shows prepopulated gaps after schema is saved, " + "default to executing prepopulation — do not ask whether to proceed. Complete one " + "prepopulation task per turn (one entity type or one relationship type): write or reuse " + "instance_generators/<script>.py, scan all repository-files/ data sources, run the " + "pipeline through apply-from-file, report results, then stop. Only ask the user when " + "generator strategy is ambiguous, discovery cannot support a script, or strict CREATE " + "reports duplicates." + ), ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { @@ -98,10 +107,12 @@ class ResolvedExtractionSkillPack: "(1) Understand goals — 3–5 questions the graph must answer. " "(2) Workspace discovery — Glob/Grep on repository-files/, cite file counts and patterns. " "(3) Draft schema + Q&A — propose types/properties/relationships; show workspace examples. " - "(4) Prepopulation planning — which types/relationships are prepopulated vs manual. " + "(4) Prepopulation planning — which types/relationships are prepopulated vs manual (during " + "schema design only; do not re-ask once schema is saved). " "(5) Save ontology — kartograph_save_schema_ontology only after user confirms the full schema. " - "(6) Implement prepopulation — generators → json_*_to_jsonl → validate-from-file → " - "apply-from-file; entities first, then edges; verify with kartograph_get_workspace_readiness." + "(6) Implement prepopulation — one task per turn: write/run generator for one gap, full " + "pipeline through apply-from-file; all repository-files/ data sources; entities before " + "relationships; verify readiness; proceed to next gap without asking permission." ), "schema_modeling": ( "Property vs entity: distinguish/categorize → property on an existing type; " @@ -115,14 +126,20 @@ class ResolvedExtractionSkillPack: "Read/save ontology via kartograph_get_schema_ontology and kartograph_save_schema_ontology." ), "prepopulation": ( - "Before prepopulation planning: Glob/Grep repository-files/ and cite counts from the workspace " - "appendix. Write scripts/JSON/JSONL only under instance_generators/ (repository-files/ is " - "read-only). For prepopulated types: run script with Bash, convert with json_*_to_jsonl, " - "validate then apply-from-file. Bidirectional edges: primary direction only in generators." + "Execute-first prepopulation: when readiness lists prepopulated gaps, pick the next entity " + "gap (before relationships), then relationship gaps after entity nodes exist. Per task: " + "(1) copy/adapt a template or write instance_generators/<label>.py that scans every folder " + "under repository-files/ (all data sources); (2) Bash run → JSON stdout; " + "(3) json_instances_to_jsonl.py or json_relationships_to_jsonl.py; (4) validate-from-file; " + "(5) apply-from-file; (6) re-check readiness. Use instance_generator from ontology when set. " + "Do not ask 'should we proceed' — execute unless strategy is unclear or CREATE is rejected. " + "Bidirectional edges: primary direction only in generators." ), "readiness_reporting": ( "After schema or prepopulation work, call kartograph_get_workspace_readiness and cite " - "blocking_reasons, prepopulated gaps, and transition_eligible in your reply." + "blocking_reasons, prepopulated gaps, and transition_eligible. When gaps remain after " + "schema save, state which single prepopulation task you are executing next — do not poll " + "the user for permission to start." ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py index c1ec6534a..c9b7d585c 100644 --- a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -12,3 +12,6 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: assert "read-only" in SCHEMA_AUTHORING_GUIDE assert "Property vs entity" in SCHEMA_AUTHORING_GUIDE assert "Never hand-author bulk CREATE ids" in SCHEMA_AUTHORING_GUIDE + assert "## Prepopulation execution (default)" in SCHEMA_AUTHORING_GUIDE + assert "do not ask whether to proceed" in SCHEMA_AUTHORING_GUIDE + assert "every" in SCHEMA_AUTHORING_GUIDE.lower() diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 4e3e03182..2f3ac7257 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -45,11 +45,16 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): assert "3–5" in resolved.skills["capabilities_intake"] assert "Workspace discovery" in resolved.skills["bootstrap_workflow"] assert "Property vs entity" in resolved.skills["schema_modeling"] - assert "read-only" in resolved.skills["prepopulation"] - assert "blocking_reasons" in resolved.skills["readiness_reporting"] + assert "Execute-first" in resolved.skills["prepopulation"] + assert "should we proceed" in resolved.skills["prepopulation"] + assert "every folder" in resolved.skills["prepopulation"] + assert "repository-files/" in resolved.skills["prepopulation"] + assert "do not poll" in resolved.skills["readiness_reporting"] assert "six-phase" in resolved.system_prompt.lower() guardrails_text = " ".join(resolved.guardrails) assert "one phase per turn" in guardrails_text + assert "do not ask whether to proceed" in guardrails_text + assert "one prepopulation task per turn" in guardrails_text assert "kartograph_save_schema_ontology" in guardrails_text assert "never hand-author CREATE ids" in guardrails_text assert len(resolved.prompt_hierarchy) > 0 From d51f70caa0e166da9bf8760006594440b004ac27 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 4 Jun 2026 23:51:30 -0400 Subject: [PATCH 105/153] feat(extraction): require entities-first script-driven prepopulation GMA must author Python scanner scripts for every entity gap before relationship scripts, using creative discovery across all data sources. Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/graph/schema-authoring.spec.md | 6 ++++ .../application/schema_authoring_guide.py | 26 +++++++++----- .../application/skill_resolution_service.py | 35 ++++++++++--------- .../test_schema_authoring_guide.py | 4 ++- .../test_skill_resolution_service.py | 16 +++++---- 5 files changed, 54 insertions(+), 33 deletions(-) diff --git a/specs/graph/schema-authoring.spec.md b/specs/graph/schema-authoring.spec.md index b04e81cd9..84b9e8109 100644 --- a/specs/graph/schema-authoring.spec.md +++ b/specs/graph/schema-authoring.spec.md @@ -102,6 +102,12 @@ The system SHALL guide the Graph Management Assistant through a six-phase schema - THEN it executes one prepopulation task per turn via generator script and apply-from-file - AND does not ask the user for permission to proceed unless strategy is ambiguous or CREATE is rejected +#### Scenario: Entities before relationships during prepopulation +- GIVEN readiness shows both prepopulated entity gaps and prepopulated relationship gaps +- WHEN the assistant implements prepopulation +- THEN it authors and runs entity scanner scripts for every entity gap before any relationship scanner +- AND each scanner discovers instances across all `repository-files/` data sources + ### Requirement: Workload Bulk Instance Authoring The system SHALL support bulk instance authoring for the Graph Management Assistant via workspace files and strict CREATE semantics. diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index f2c816b11..6a3a1f6cd 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -25,20 +25,28 @@ When `kartograph_get_workspace_readiness` shows prepopulated gaps **after the ontology is saved**, **execute immediately** — do not ask whether to proceed. -**One prepopulation task per turn** = one entity type **or** one relationship type, end-to-end: +**Prepopulation is script writing.** For each gap, author a Python scanner under `instance_generators/` +that discovers **every** instance of that type across **all** data sources under `repository-files/`. +Use Glob, Grep, and Read creatively per entity (HTTP route registration, test file naming, directory +layout, OpenAPI specs, import graphs, etc.). Copy template scripts only as a starting point — customize +the discovery logic for the type. -1. Write or reuse `instance_generators/<script>.py` (use `instance_generator` from ontology when set). -2. Script must scan **every** data source under `repository-files/` (all top-level folders). +**One prepopulation task per turn** = one entity label **or** one relationship label, end-to-end: + +1. Explore `repository-files/` with Glob/Grep; design the scanner. +2. Write `instance_generators/<label>.py` (honor `instance_generator` from ontology when set). 3. `python3 instance_generators/<script>.py repository-files > instance_generators/out/<label>.json` -4. `json_instances_to_jsonl.py` (entities) or `json_relationships_to_jsonl.py` (edges after nodes exist). +4. `json_instances_to_jsonl.py` (entities) or `json_relationships_to_jsonl.py` (relationships). 5. `kartograph_validate_graph_mutations_from_file` → `kartograph_apply_graph_mutations_from_file` -6. `kartograph_get_workspace_readiness` — report counts and the **next** gap you will tackle. +6. `kartograph_get_workspace_readiness` — report counts and the **next** task. -**Order:** entity types with gaps first (e.g. `api_endpoint`, `test`), then relationship types -(e.g. `repository → defines → api_endpoint`) once endpoint node slugs exist. +**Order (strict):** complete **all** prepopulated **entity** types with gaps before **any** +prepopulated **relationship** type. Example sequence: `repository` → `test` → `api_endpoint` → then +`repository → contains → test` → `repository → defines → api_endpoint`. Relationship scripts output +`source_slug` / `target_slug` pairs and require entity slugs to already exist. -**Only ask the user when:** generator strategy is ambiguous, discovery cannot support a reliable -script, or strict CREATE validation reports duplicates (then use UPDATE or skip-existing slugs). +**Only ask the user when:** scanner strategy is ambiguous, the codebase cannot support reliable +discovery, or strict CREATE validation reports duplicates (then use UPDATE or skip-existing slugs). ## Schema modeling rules diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 8cbf70bfb..381a98611 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -66,12 +66,13 @@ class ResolvedExtractionSkillPack: ), ( "When kartograph_get_workspace_readiness shows prepopulated gaps after schema is saved, " - "default to executing prepopulation — do not ask whether to proceed. Complete one " - "prepopulation task per turn (one entity type or one relationship type): write or reuse " - "instance_generators/<script>.py, scan all repository-files/ data sources, run the " - "pipeline through apply-from-file, report results, then stop. Only ask the user when " - "generator strategy is ambiguous, discovery cannot support a script, or strict CREATE " - "reports duplicates." + "default to executing prepopulation — do not ask whether to proceed. Prepopulation means " + "authoring Python scanner scripts under instance_generators/ that find every instance " + "across all repository-files/ data sources (use Glob/Grep/AST creatively per type). " + "Finish all entity-type gaps before any relationship-type gaps. One script task per turn " + "(one entity label or one relationship label): write/run script → JSONL → apply-from-file, " + "then stop. Only ask when scanner strategy is ambiguous, code cannot support a script, " + "or strict CREATE reports duplicates." ), ), }, @@ -110,9 +111,9 @@ class ResolvedExtractionSkillPack: "(4) Prepopulation planning — which types/relationships are prepopulated vs manual (during " "schema design only; do not re-ask once schema is saved). " "(5) Save ontology — kartograph_save_schema_ontology only after user confirms the full schema. " - "(6) Implement prepopulation — one task per turn: write/run generator for one gap, full " - "pipeline through apply-from-file; all repository-files/ data sources; entities before " - "relationships; verify readiness; proceed to next gap without asking permission." + "(6) Implement prepopulation — script-first, one entity or relationship per turn: author " + "instance_generators/<label>.py to discover all instances creatively, run full pipeline; " + "complete every entity gap before starting relationship scripts; verify readiness between tasks." ), "schema_modeling": ( "Property vs entity: distinguish/categorize → property on an existing type; " @@ -126,14 +127,14 @@ class ResolvedExtractionSkillPack: "Read/save ontology via kartograph_get_schema_ontology and kartograph_save_schema_ontology." ), "prepopulation": ( - "Execute-first prepopulation: when readiness lists prepopulated gaps, pick the next entity " - "gap (before relationships), then relationship gaps after entity nodes exist. Per task: " - "(1) copy/adapt a template or write instance_generators/<label>.py that scans every folder " - "under repository-files/ (all data sources); (2) Bash run → JSON stdout; " - "(3) json_instances_to_jsonl.py or json_relationships_to_jsonl.py; (4) validate-from-file; " - "(5) apply-from-file; (6) re-check readiness. Use instance_generator from ontology when set. " - "Do not ask 'should we proceed' — execute unless strategy is unclear or CREATE is rejected. " - "Bidirectional edges: primary direction only in generators." + "Execute-first, script-first prepopulation: gaps are solved by writing Python under " + "instance_generators/, not by manual instance listing. Use Read/Grep/Glob on repository-files/ " + "to design each scanner — find every instance across all data sources in creative, " + "type-specific ways (route tables, test file patterns, package paths, OpenAPI, etc.). " + "Ordering: exhaust all prepopulated entity-type gaps before any relationship-type gap. " + "Per turn, one label only: (1) write/adapt <label>.py; (2) Bash → JSON; (3) json_*_to_jsonl; " + "(4) validate/apply-from-file; (5) readiness. Relationship scripts emit source_slug/target_slug " + "JSON after entity nodes exist. Do not ask to proceed. Bidirectional: primary edges only." ), "readiness_reporting": ( "After schema or prepopulation work, call kartograph_get_workspace_readiness and cite " diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py index c9b7d585c..63189d1f8 100644 --- a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -13,5 +13,7 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: assert "Property vs entity" in SCHEMA_AUTHORING_GUIDE assert "Never hand-author bulk CREATE ids" in SCHEMA_AUTHORING_GUIDE assert "## Prepopulation execution (default)" in SCHEMA_AUTHORING_GUIDE + assert "Prepopulation is script writing" in SCHEMA_AUTHORING_GUIDE assert "do not ask whether to proceed" in SCHEMA_AUTHORING_GUIDE - assert "every" in SCHEMA_AUTHORING_GUIDE.lower() + assert "all" in SCHEMA_AUTHORING_GUIDE and "entity" in SCHEMA_AUTHORING_GUIDE + assert "creatively" in SCHEMA_AUTHORING_GUIDE diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 2f3ac7257..2c59e704d 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -45,16 +45,20 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): assert "3–5" in resolved.skills["capabilities_intake"] assert "Workspace discovery" in resolved.skills["bootstrap_workflow"] assert "Property vs entity" in resolved.skills["schema_modeling"] - assert "Execute-first" in resolved.skills["prepopulation"] - assert "should we proceed" in resolved.skills["prepopulation"] - assert "every folder" in resolved.skills["prepopulation"] - assert "repository-files/" in resolved.skills["prepopulation"] + assert "script-first" in resolved.skills["prepopulation"] + assert "Python" in resolved.skills["prepopulation"] + assert "all prepopulated entity-type gaps before any relationship" in resolved.skills[ + "prepopulation" + ] + assert "creative" in resolved.skills["prepopulation"] + guardrails_text = " ".join(resolved.guardrails) + assert "Finish all entity-type gaps before any relationship-type gaps" in guardrails_text + assert "Python scanner scripts" in guardrails_text assert "do not poll" in resolved.skills["readiness_reporting"] assert "six-phase" in resolved.system_prompt.lower() - guardrails_text = " ".join(resolved.guardrails) assert "one phase per turn" in guardrails_text assert "do not ask whether to proceed" in guardrails_text - assert "one prepopulation task per turn" in guardrails_text + assert "One script task per turn" in guardrails_text assert "kartograph_save_schema_ontology" in guardrails_text assert "never hand-author CREATE ids" in guardrails_text assert len(resolved.prompt_hierarchy) > 0 From ae1850293916c07e73f363805a3a02f9f77d58bb Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 5 Jun 2026 10:44:55 -0400 Subject: [PATCH 106/153] feat(extraction): streamline GMA prepopulation and fix writable workspace Replace generic generator templates with a single scanner example plus entities_to_jsonl and relationships_to_jsonl converters, standardize {label}_instances.json(l) naming, and mount instance_generators/ writable while keeping repository snapshots read-only. Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/extraction/chat-turns.spec.md | 2 +- specs/graph/schema-authoring.spec.md | 15 +- .../kartograph_agent_runtime/agent_prompt.py | 8 +- .../kartograph_agent_runtime/executor.py | 11 +- src/agent-runtime/tests/test_agent_prompt.py | 5 +- src/agent-runtime/tests/test_executor.py | 3 +- .../application/schema_authoring_guide.py | 223 ++++++------------ .../application/skill_resolution_service.py | 34 +-- .../container_workload_runtime.py | 12 +- .../instance_generator_templates/README.md | 69 +++--- .../instance_generator_templates/__init__.py | 10 +- .../_entity_scanner.example.py | 46 ++++ .../data_source.py | 32 --- ...ances_to_jsonl.py => entities_to_jsonl.py} | 57 ++--- .../instance_generator_templates/folder.py | 49 ---- ..._to_jsonl.py => relationships_to_jsonl.py} | 24 +- .../source_file.py | 59 ----- .../sticky_session_workdir_materializer.py | 1 + .../sticky_session_workspace_binds.py | 35 +++ .../management/domain/relationship_pairing.py | 2 +- .../test_schema_authoring_guide.py | 19 +- .../test_skill_resolution_service.py | 75 +++--- ..._to_jsonl.py => test_entities_to_jsonl.py} | 25 +- ...sonl.py => test_relationships_to_jsonl.py} | 8 +- ...test_sticky_session_container_bootstrap.py | 4 +- ...est_sticky_session_workdir_materializer.py | 9 +- .../test_sticky_session_workspace_binds.py | 21 ++ 27 files changed, 334 insertions(+), 524 deletions(-) create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/_entity_scanner.example.py delete mode 100644 src/api/extraction/infrastructure/instance_generator_templates/data_source.py rename src/api/extraction/infrastructure/instance_generator_templates/{json_instances_to_jsonl.py => entities_to_jsonl.py} (64%) delete mode 100644 src/api/extraction/infrastructure/instance_generator_templates/folder.py rename src/api/extraction/infrastructure/instance_generator_templates/{json_relationships_to_jsonl.py => relationships_to_jsonl.py} (87%) delete mode 100644 src/api/extraction/infrastructure/instance_generator_templates/source_file.py create mode 100644 src/api/extraction/infrastructure/sticky_session_workspace_binds.py rename src/api/tests/unit/extraction/infrastructure/{test_json_instances_to_jsonl.py => test_entities_to_jsonl.py} (60%) rename src/api/tests/unit/extraction/infrastructure/{test_json_relationships_to_jsonl.py => test_relationships_to_jsonl.py} (77%) create mode 100644 src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_binds.py diff --git a/specs/extraction/chat-turns.spec.md b/specs/extraction/chat-turns.spec.md index 61995d400..b4bd84458 100644 --- a/specs/extraction/chat-turns.spec.md +++ b/specs/extraction/chat-turns.spec.md @@ -87,7 +87,7 @@ The system SHALL expose schema, mutation, and workspace tooling appropriate for - GIVEN an active graph-management chat turn in schema bootstrap mode - WHEN the agent runtime starts - THEN Bash is an allowed tool scoped to the session workspace -- AND `instance_generators/` contains example scripts the agent may copy or extend +- AND `instance_generators/` contains `_entity_scanner.example.py`, `entities_to_jsonl.py`, and `relationships_to_jsonl.py` #### Scenario: Compact follow-up prompts - GIVEN a graph-management session with prior user messages in the turn history diff --git a/specs/graph/schema-authoring.spec.md b/specs/graph/schema-authoring.spec.md index 84b9e8109..897439c7f 100644 --- a/specs/graph/schema-authoring.spec.md +++ b/specs/graph/schema-authoring.spec.md @@ -122,16 +122,17 @@ The system SHALL support bulk instance authoring for the Graph Management Assist - WHEN the assistant applies mutations from that file path - THEN the system reads the full file and applies all valid operations in one request -#### Scenario: Optional instance generator metadata -- GIVEN an entity type with `instance_generator` set to a script name under `instance_generators/` -- WHEN the ontology is saved and read back -- THEN the script name is preserved as authoring metadata for the assistant - #### Scenario: Session workspace generator templates - GIVEN a sticky session work directory is prepared - WHEN the assistant lists `instance_generators/` -- THEN example generator scripts and JSONL converter helpers are present -- AND the assistant may add custom generator scripts alongside them +- THEN `_entity_scanner.example.py`, `entities_to_jsonl.py`, and `relationships_to_jsonl.py` are present +- AND the assistant authors `{label}.py` scanners that emit `out/{label}_instances.json` + +#### Scenario: Batch entity prepopulation pipeline +- GIVEN a prepopulated entity type with a readiness gap +- WHEN the assistant runs `{label}.py` and `entities_to_jsonl.py` +- THEN it produces `instance_generators/out/{label}_instances.jsonl` +- AND applies all CREATE lines in one validate/apply-from-file batch ### Requirement: Bidirectional Relationship Pairing The system SHALL default new relationship types to bidirectional pairing. See [Bidirectional Relationships](bidirectional-relationships.spec.md). diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 92311a70c..3598ece23 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -44,7 +44,7 @@ 1. `kartograph_get_schema_authoring_guide` 2. `kartograph_get_workspace_readiness` 3. `kartograph_get_schema_ontology` -4. For large prepopulation: Bash `python3 instance_generators/<template>.py repository-files` +4. Prepopulation: `{label}.py` → `out/{label}_instances.json` → `entities_to_jsonl.py` → apply-from-file 5. Model types → `kartograph_save_schema_ontology` 6. Apply CREATE mutations → `kartograph_apply_graph_mutations` (small fixes inline; bulk via generator output) 7. Create relationship edges after entity IDs are known @@ -54,9 +54,9 @@ """.strip() _TOOLS_COMPACT_REFERENCE = ( - "Tools: kartograph_* schema MCP tools, plus Read/Grep/Glob/Bash on the workspace. " - "Bulk prepopulation: Bash generator → `json_instances_to_jsonl.py` → validate-from-file → apply-from-file. " - "CREATE is strict (use UPDATE to edit existing instances)." + "Tools: kartograph_* schema MCP tools, plus Read/Grep/Glob/Bash. " + "Prepopulation: {label}.py → out/{label}_instances.json → entities_to_jsonl.py or " + "relationships_to_jsonl.py → validate/apply out/{label}_instances.jsonl. Never /tmp." ) diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index e7fc69e57..a3ef0ada9 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -58,13 +58,10 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: "## Session workspace", f"Workspace mount: `{settings.workspace_dir}`", ( - "Prepared repository files live under " - "`repository-files/<data_source_name>/` (read-only). " - "`ingestion-context/` is read-only. " - "Writable outputs: `instance_generators/` only (scripts, JSON, JSONL under " - "`instance_generators/out/`). " - "Run generators with Bash: `python3 instance_generators/<script>.py repository-files`. " - "Use Read, Grep, Glob on repository-files; Bash for generators." + "Read-only: `repository-files/`, `ingestion-context/`. " + "Writable: `instance_generators/` — `{label}.py` and `out/{label}_instances.json(l)`. " + "Platform converters: `entities_to_jsonl.py`, `relationships_to_jsonl.py`. " + "Never `/tmp`. One batch per gap via apply-from-file." ), ] for source in sources[:12]: diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index b74b734c8..ebf7b1d33 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -88,6 +88,5 @@ def test_build_agent_system_prompt_compact_omits_skills_and_full_tools_table() - assert "**prepopulation**" not in prompt assert "Quick workflow" not in prompt - assert "json_instances_to_jsonl.py" in prompt - assert "validate-from-file" in prompt - assert "apply-from-file" in prompt + assert "entities_to_jsonl.py" in prompt + assert "never /tmp" in prompt.lower() or "Never /tmp" in prompt diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index 6defdd80d..e3cf617ce 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -53,8 +53,9 @@ def test_build_workspace_prompt_appendix_prefers_sources_index(tmp_path: Path) - assert "Hyperfleet API" in appendix assert "142 file(s)" in appendix assert "pkg/api/adapter_status_types_test.go" in appendix - assert "read-only" in appendix + assert "Read-only" in appendix assert "instance_generators/" in appendix + assert "entities_to_jsonl.py" in appendix def test_build_workspace_prompt_appendix_includes_extension_counts(tmp_path: Path) -> None: diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 6a3a1f6cd..b2b92c14d 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -4,202 +4,121 @@ # Kartograph schema authoring (Graph Management Assistant) Use the Kartograph schema tools — never probe undocumented HTTP routes. -Use Read, Grep, Glob, and Bash against the session workspace mount. Prebuilt generator scripts -live under `instance_generators/` (see README there). Write scripts and JSON/JSONL outputs -only under `instance_generators/` — `repository-files/` and `ingestion-context/` are read-only. -## Bootstrap workflow (6 phases) +## Workspace layout + +| Path | Access | Purpose | +|------|--------|---------| +| `repository-files/<data_source>/` | read-only | Source repos for Glob/Grep/Read | +| `ingestion-context/` | read-only | Sync metadata | +| `instance_generators/` | **writable** | `{label}.py` scanners + `out/*_instances.json(l)` | + +Never write to `/tmp`. Apply-from-file paths must be under `instance_generators/out/` +(e.g. `instance_generators/out/test_instances.jsonl`). + +Bundled platform scripts (do not edit): `entities_to_jsonl.py`, `relationships_to_jsonl.py`. +Copy `_entity_scanner.example.py` to `{label}.py` for each prepopulated type. -Complete these in order. Do not mix schema design, prepopulation planning, and bulk -implementation in the same turn when the user gave multiple deliverables. +## Bootstrap workflow (6 phases) -1. **Understand goals** — Ask what questions the graph must answer; collect 3–5 stakeholder use cases. -2. **Workspace discovery** — Glob/Grep under `repository-files/`; report file counts, extensions, and code patterns. -3. **Draft schema + validation Q&A** — Propose entity types, properties, and relationships; cite workspace examples. -4. **Prepopulation planning** — Decide prepopulated vs manual per type; required properties; generator strategy. -5. **Save ontology** — `kartograph_save_schema_ontology` only after the user confirms the full schema. -6. **Implement prepopulation** — one task per turn (see below); entities before edges; verify readiness. +1. **Understand goals** — 3–5 questions the graph must answer. +2. **Workspace discovery** — Glob/Grep under `repository-files/`. +3. **Draft schema + Q&A** — types, properties, relationships; mark `prepopulated: true` where needed. +4. **Prepopulation planning** — which types get scanners (during design only). +5. **Save ontology** — after user confirms the full schema. +6. **Implement prepopulation** — one prepopulated label per turn (below). -## Prepopulation execution (default) +## Prepopulation execution -When `kartograph_get_workspace_readiness` shows prepopulated gaps **after the ontology is saved**, -**execute immediately** — do not ask whether to proceed. +When `kartograph_get_workspace_readiness` shows gaps after ontology save, **execute immediately**. -**Prepopulation is script writing.** For each gap, author a Python scanner under `instance_generators/` -that discovers **every** instance of that type across **all** data sources under `repository-files/`. -Use Glob, Grep, and Read creatively per entity (HTTP route registration, test file naming, directory -layout, OpenAPI specs, import graphs, etc.). Copy template scripts only as a starting point — customize -the discovery logic for the type. +**Entities** (all entity gaps before any relationship gap): -**One prepopulation task per turn** = one entity label **or** one relationship label, end-to-end: +```bash +python3 instance_generators/test.py repository-files > instance_generators/out/test_instances.json +python3 instance_generators/entities_to_jsonl.py test \\ + --data-source-id schema-bootstrap --source-path graph-management-assistant \\ + instance_generators/out/test_instances.json > instance_generators/out/test_instances.jsonl +# validate-from-file → apply-from-file path=instance_generators/out/test_instances.jsonl +``` -1. Explore `repository-files/` with Glob/Grep; design the scanner. -2. Write `instance_generators/<label>.py` (honor `instance_generator` from ontology when set). -3. `python3 instance_generators/<script>.py repository-files > instance_generators/out/<label>.json` -4. `json_instances_to_jsonl.py` (entities) or `json_relationships_to_jsonl.py` (relationships). -5. `kartograph_validate_graph_mutations_from_file` → `kartograph_apply_graph_mutations_from_file` -6. `kartograph_get_workspace_readiness` — report counts and the **next** task. +**Relationships** (after entity slugs exist; name files `{source}_{rel}_{target}_instances.*`): -**Order (strict):** complete **all** prepopulated **entity** types with gaps before **any** -prepopulated **relationship** type. Example sequence: `repository` → `test` → `api_endpoint` → then -`repository → contains → test` → `repository → defines → api_endpoint`. Relationship scripts output -`source_slug` / `target_slug` pairs and require entity slugs to already exist. +```bash +python3 instance_generators/repository_defines_test.py repository-files \\ + > instance_generators/out/repository_defines_test_instances.json +python3 instance_generators/relationships_to_jsonl.py defines repository test \\ + instance_generators/out/repository_defines_test_instances.json \\ + > instance_generators/out/repository_defines_test_instances.jsonl +``` -**Only ask the user when:** scanner strategy is ambiguous, the codebase cannot support reliable -discovery, or strict CREATE validation reports duplicates (then use UPDATE or skip-existing slugs). +Scanner stdout contract: +- Entities: `[{"slug": "...", "properties": {...}}]` +- Relationships: `[{"source_slug": "...", "target_slug": "...", "properties": {}}]` ## Schema modeling rules -- **Property vs entity:** distinguish/categorize (e.g. tier0/tier1) → property on an existing type; - track which/what or needs relationships → entity type + edges. -- **Bidirectional relationships** default on — author primary direction only; platform creates inverse type - and twin edge instances. Set `bidirectional: false` for asymmetric edges (`depends_on`, `created_by`). -- For asymmetric edges, confirm direction explicitly (X → rel → Y). +- **Property vs entity:** categorize → property; track instances/relationships → entity + edges. +- **Bidirectional relationships** default on — author primary direction only; platform creates inverse + twins. +- Set `bidirectional: false` for asymmetric edges (`depends_on`, `created_by`). ## Workspace discovery patterns | Target | Glob / Grep hints | |--------|-------------------| -| Tests | `**/*_test.go`, `**/test_*.go`, `**/*_test.py`, `**/tests/**` | -| API endpoints / handlers | `Grep` for route registrations, `@app.`, `HandleFunc`, OpenAPI paths | -| Source files | `Glob **/*.{go,py,ts,java,yaml,md}` per data source folder | - -Cite the session workspace appendix for per-repo file counts and extension summaries before prepopulation Q&A. +| Tests | `**/*_test.go`, `**/test_*.go`, `**/*_test.py` | +| API endpoints | route registrations, `@app.`, `HandleFunc`, OpenAPI paths | +| Source files | `Glob **/*.{go,py,ts,java,yaml,md}` per data source | ## Tool workflow -1. Call `kartograph_get_schema_authoring_guide` (this document). -2. Call `kartograph_get_workspace_readiness` to see prepopulated gaps and live instance counts. -3. Call `kartograph_get_schema_ontology` to read the current entity/relationship types. -4. Edit the ontology JSON (full replace) and call `kartograph_save_schema_ontology`. -5. For prepopulated types at scale: run a script under `instance_generators/` (examples: - `data_source.py`, `folder.py`, `source_file.py`, or your own), then - `python3 instance_generators/json_instances_to_jsonl.py <entity_label> out/instances.json`. -6. After entity nodes exist, convert relationship JSON with - `json_relationships_to_jsonl.py <edge_label> <source_entity> <target_entity> out/relationships.json`. -7. Optional: `kartograph_check_graph_slugs` to batch-check which slugs already exist before CREATE. -8. Dry-run with `kartograph_validate_graph_mutations_from_file`, then apply with - `kartograph_apply_graph_mutations_from_file` (or inline tools for small fixes). -9. Verify with `kartograph_list_instances_by_type` and `kartograph_get_workspace_readiness`. - -## Entity type (node type) shape +1. `kartograph_get_schema_authoring_guide` · `kartograph_get_workspace_readiness` · `kartograph_get_schema_ontology` +2. `kartograph_save_schema_ontology` when schema is confirmed +3. Prepopulation pipeline above per gap +4. `kartograph_validate_graph_mutations_from_file` → `kartograph_apply_graph_mutations_from_file` +5. Verify with `kartograph_list_instances_by_type` and readiness -Each entry in `node_types`: +## Entity type shape ```json { - "label": "service", - "description": "Deployable software service", + "label": "test", + "description": "Automated test file", "required_properties": ["name"], - "optional_properties": ["team"], - "prepopulated": false, - "prepopulated_instance_count": 0, - "instance_generator": "source_file.py" + "optional_properties": ["file_path"], + "prepopulated": true, + "prepopulated_instance_count": 0 } ``` -- `label`: lowercase snake_case type name (required). -- `prepopulated`: when true, bootstrap transition requires at least one instance. -- `instance_generator`: optional script name under `instance_generators/` (example templates or your own). -- Saving replaces the entire ontology — read first, merge your edits, then save. - -## Relationship type (edge type) shape +Scanner script convention: `instance_generators/{label}.py` → `out/{label}_instances.json`. -Each entry in `edge_types`: +## Relationship type shape ```json { - "label": "contains", - "description": "Test exercises an API endpoint", - "source_labels": ["test"], + "label": "defines", + "source_labels": ["repository"], "target_labels": ["api_endpoint"], - "properties": [], "prepopulated": true, - "prepopulated_instance_count": 0, - "instance_generator": "my_edges.py", - "bidirectional": true, - "inverse_label": "contained_in" + "bidirectional": true } ``` -- `bidirectional`: default `true` for new relationship types — platform auto-creates inverse type and twin edge instances. -- `inverse_label`: optional override; otherwise derived (`contains` → `contained_in`, else `{label}_inverse`). -- Set `bidirectional: false` for asymmetric edges (`depends_on`, `created_by`). -- Author **primary direction only** in generators; inverse instances are created automatically on apply. -- `source_labels` / `target_labels`: allowed node type labels for edge endpoints. -- `instance_generator`: optional script under `instance_generators/` for relationship prepopulation. -- `prepopulated`: when true, bootstrap transition requires at least one instance of this - relationship type. Every listed source and target entity type must also have - `prepopulated: true`. +Relationship scanner convention: `out/{source}_{label}_{target}_instances.json`. ## Instance mutations (JSONL) -Apply after types exist. One JSON object per line. - -Create entity instance: - -```json -{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service","set_properties":{"name":"api-gateway","slug":"api-gateway","data_source_id":"schema-bootstrap","source_path":"graph-management-assistant"}} -``` - -Create relationship instance (requires entity node IDs from prior CREATE or list tool): - -```json -{"op":"CREATE","type":"edge","id":"depends_on:0123456789abc001","label":"depends_on","start_id":"service:0123456789abcdef","end_id":"service:fedcba9876543210","set_properties":{"data_source_id":"schema-bootstrap","source_path":"graph-management-assistant"}} -``` - -Rules: -- `id` format: `{label}:{16 lowercase hex chars}` — generate with `secrets.token_hex(8)`. -- CREATE requires `data_source_id` and `source_path` in `set_properties`. -- Node CREATE requires `slug` in `set_properties` (kebab-case, unique per type). -- `knowledge_graph_id` is stamped by the platform — do not set it. -- For large sets: Bash + custom script under `instance_generators/` → `json_*_to_jsonl.py` → apply-from-file. - Never hand-author bulk CREATE ids in chat — use converter scripts for deterministic ids. -- CREATE is strict: existing types/instances must be changed with UPDATE, not CREATE again. -- Dry-run before apply: `kartograph_validate_graph_mutations` or `kartograph_validate_graph_mutations_from_file`. +- CREATE requires `data_source_id`, `source_path`, and `slug` on nodes. +- CREATE is strict — use UPDATE for existing instances. +- Never hand-author bulk CREATE lines in chat; use `entities_to_jsonl.py` / `relationships_to_jsonl.py`. - Create all entity nodes before relationship edges. -- Sort instances deterministically (by slug or path) before emitting CREATE lines. - -## Instance generation cookbook - -Scan prepared files under `repository-files/<data_source_slug>/` (see session workspace appendix). - -| Pattern | When to use | Scan strategy | Slug rule | Key properties | -|---------|-------------|---------------|-----------|----------------| -| **data_source** | One instance per connected repo | Top-level folders under `repository-files/` | folder name | `name`, `source_type`, `file_count` | -| **folder** | Directory hierarchy anchors | `Glob **/*` dirs per data source | `folder-{path-kebab}` | `folder_path`, `data_source`, child counts | -| **source_file** | File-level extraction jobs | `Glob **/*.{go,py,yaml,md,json,...}` | path → kebab (`pkg-api-foo-go`) | `file_path`, `source_path`, `name` | - -Workflow for bulk prepopulation: -1. Mark the entity type `prepopulated: true` and save ontology. -2. Use Glob to list candidate paths (exclude dot-directories). -3. Derive slugs deterministically from relative paths. -4. Call `kartograph_search_graph_by_slug` for a sample slug to avoid duplicates. -5. Emit JSONL CREATE batches via `kartograph_apply_graph_mutations`. -6. Confirm coverage with `kartograph_list_instances_by_type`. -7. For prepopulated relationships: use `kartograph_list_relationship_instances` or entity lists to resolve `start_id`/`end_id`, then CREATE edges. ## Readiness checklist -Bootstrap transition needs: -- At least one entity type and one relationship type. -- Every `prepopulated=true` entity type must have at least one live instance. -- Every `prepopulated=true` relationship type must have at least one live edge instance. -- A prepopulated relationship type may only reference entity types that are also prepopulated. - -Call `kartograph_get_workspace_readiness` for: -- `prepopulated_entity_types_without_instances_live` — entity types still needing CREATE lines. -- `prepopulated_relationship_types_without_instances_live` — relationship keys still needing edge CREATE lines. -- `prepopulated_entity_types` / `prepopulated_relationship_types` — metadata vs live counts. -- `blocking_reasons` — transition blockers. - -After applying instance mutations, ontology `prepopulated_instance_count` metadata is refreshed automatically from live graph totals. - -## Repository context +- Every `prepopulated=true` entity type needs ≥1 live instance. +- Every `prepopulated=true` relationship type needs ≥1 live edge. +- Prepopulated relationships may only reference prepopulated entity types. -Prepared JobPackage files live under `repository-files/<data_source_name>/` relative to the -workspace mount (one folder per connected data source; names are slugified data source names -such as `hyperfleet-api`). Use Read, Grep, and Glob on those paths — not HTTP discovery. -The session workspace appendix lists data sources, file counts, sample paths, and extension -hints when available. +Call `kartograph_get_workspace_readiness` for gaps and `blocking_reasons`. """.strip() diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 381a98611..24176d756 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -59,19 +59,15 @@ class ResolvedExtractionSkillPack: "Exception: the user explicitly says to save/apply or continues after reviewing your draft." ), ( - "For bulk prepopulation never hand-author CREATE ids in chat. Use Bash generators → " - "json_*_to_jsonl.py → validate-from-file → apply-from-file. On ontology save errors, " - "read kartograph_get_schema_ontology and kartograph_get_schema_authoring_guide, merge " - "a fix, then retry once." + "Prepopulation (prepopulated=true types): write instance_generators/{label}.py → " + "out/{label}_instances.json → entities_to_jsonl.py or relationships_to_jsonl.py → " + "validate/apply instance_generators/out/{label}_instances.jsonl in one batch. " + "Never /tmp, never hand-author CREATE lines. All entity gaps before relationship gaps." ), ( - "When kartograph_get_workspace_readiness shows prepopulated gaps after schema is saved, " - "default to executing prepopulation — do not ask whether to proceed. Prepopulation means " - "authoring Python scanner scripts under instance_generators/ that find every instance " - "across all repository-files/ data sources (use Glob/Grep/AST creatively per type). " - "Finish all entity-type gaps before any relationship-type gaps. One script task per turn " - "(one entity label or one relationship label): write/run script → JSONL → apply-from-file, " - "then stop. Only ask when scanner strategy is ambiguous, code cannot support a script, " + "When readiness shows prepopulated gaps after schema save, execute immediately — do not ask " + "permission. One label per turn: copy _entity_scanner.example.py to {label}.py, customize " + "scan(), run pipeline, re-check readiness. Only ask when discovery strategy is ambiguous " "or strict CREATE reports duplicates." ), ), @@ -111,9 +107,8 @@ class ResolvedExtractionSkillPack: "(4) Prepopulation planning — which types/relationships are prepopulated vs manual (during " "schema design only; do not re-ask once schema is saved). " "(5) Save ontology — kartograph_save_schema_ontology only after user confirms the full schema. " - "(6) Implement prepopulation — script-first, one entity or relationship per turn: author " - "instance_generators/<label>.py to discover all instances creatively, run full pipeline; " - "complete every entity gap before starting relationship scripts; verify readiness between tasks." + "(6) Implement prepopulation — one prepopulated label per turn via {label}.py → " + "{label}_instances.json(l) → apply-from-file; all entities before relationships." ), "schema_modeling": ( "Property vs entity: distinguish/categorize → property on an existing type; " @@ -127,14 +122,9 @@ class ResolvedExtractionSkillPack: "Read/save ontology via kartograph_get_schema_ontology and kartograph_save_schema_ontology." ), "prepopulation": ( - "Execute-first, script-first prepopulation: gaps are solved by writing Python under " - "instance_generators/, not by manual instance listing. Use Read/Grep/Glob on repository-files/ " - "to design each scanner — find every instance across all data sources in creative, " - "type-specific ways (route tables, test file patterns, package paths, OpenAPI, etc.). " - "Ordering: exhaust all prepopulated entity-type gaps before any relationship-type gap. " - "Per turn, one label only: (1) write/adapt <label>.py; (2) Bash → JSON; (3) json_*_to_jsonl; " - "(4) validate/apply-from-file; (5) readiness. Relationship scripts emit source_slug/target_slug " - "JSON after entity nodes exist. Do not ask to proceed. Bidirectional: primary edges only." + "Per prepopulated gap: {label}.py scans repository-files/ → out/{label}_instances.json → " + "entities_to_jsonl.py or relationships_to_jsonl.py → out/{label}_instances.jsonl → " + "validate/apply-from-file (one batch). Entities before relationships. Primary edges only." ), "readiness_reporting": ( "After schema or prepopulation work, call kartograph_get_workspace_readiness and cite " diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index e2ac4b789..a39b853ba 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -8,6 +8,9 @@ from ulid import ULID +from extraction.infrastructure.sticky_session_workspace_binds import ( + build_sticky_session_workspace_binds, +) from extraction.infrastructure.vertex_runtime_env import build_vertex_container_env from extraction.ports.runtime import ( EphemeralWorkerLaunchRequest, @@ -306,11 +309,12 @@ def _start_runtime( "KARTOGRAPH_API_BASE_URL": bootstrap.api_base_url, } ) + binds.append(f"{bootstrap.host_skills_dir}:{self._container_skills_mount}:ro") binds.extend( - [ - f"{bootstrap.host_skills_dir}:{self._container_skills_mount}:ro", - f"{bootstrap.host_session_work_dir}:{self._container_work_mount}:ro", - ] + build_sticky_session_workspace_binds( + host_session_work_dir=bootstrap.host_session_work_dir, + container_work_mount=self._container_work_mount, + ) ) if self._vertex_enabled: diff --git a/src/api/extraction/infrastructure/instance_generator_templates/README.md b/src/api/extraction/infrastructure/instance_generator_templates/README.md index da33aefd4..58c678016 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/README.md +++ b/src/api/extraction/infrastructure/instance_generator_templates/README.md @@ -1,50 +1,49 @@ -# Instance generators (examples) +# Instance generators -These scripts are **starting examples**, not fixed entity types. Copy or author your own -`instance_generators/<your_script>.py` for each prepopulated entity type you define in the ontology. +Prepopulation for `prepopulated: true` types uses **three kinds of files**: -## Usage +| File | Who writes it | Purpose | +|------|---------------|---------| +| `{label}.py` | Agent | Scans `repository-files/` → JSON array on stdout | +| `entities_to_jsonl.py` | Platform | `{label}_instances.json` → `{label}_instances.jsonl` | +| `relationships_to_jsonl.py` | Platform | `{key}_instances.json` → `{key}_instances.jsonl` | -From the session workspace root (`/workspace` in the agent container): +Copy `_entity_scanner.example.py` to `{entity_label}.py` and replace the `scan()` body. -```bash -python3 instance_generators/data_source.py repository-files -python3 instance_generators/folder.py repository-files -python3 instance_generators/source_file.py repository-files -``` - -Bulk pipeline (generator → JSONL → validate → apply): +## Entity prepopulation (one type per turn) ```bash -mkdir -p instance_generators/out -python3 instance_generators/source_file.py repository-files \ - > instance_generators/out/source_file.json -python3 instance_generators/json_instances_to_jsonl.py source_file \ +python3 instance_generators/test.py repository-files \ + > instance_generators/out/test_instances.json + +python3 instance_generators/entities_to_jsonl.py test \ --data-source-id schema-bootstrap \ --source-path graph-management-assistant \ - instance_generators/out/source_file.json \ - > instance_generators/out/source_file.jsonl -# kartograph_validate_graph_mutations_from_file → kartograph_apply_graph_mutations_from_file + instance_generators/out/test_instances.json \ + > instance_generators/out/test_instances.jsonl ``` -## Contract +Then `kartograph_validate_graph_mutations_from_file` and +`kartograph_apply_graph_mutations_from_file` with path +`instance_generators/out/test_instances.jsonl` (one batch for all instances). + +## Relationship prepopulation (after all entity gaps) -- **Input:** path to `repository-files/` (one folder per connected data source). -- **Output:** JSON array on stdout: `[{"slug": "...", "properties": {...}}, ...]` -- **Deterministic:** sorted iteration, no timestamps in output. -- **Customize:** copy a template script for your entity type label, adjust property names to match your ontology, then run and convert output to graph CREATE mutations. +Naming: `out/{source}_{relationship}_{target}_instances.json` (e.g. `repository_defines_test_instances.json`). + +```bash +python3 instance_generators/repository_defines_test.py repository-files \ + > instance_generators/out/repository_defines_test_instances.json + +python3 instance_generators/relationships_to_jsonl.py defines repository test \ + instance_generators/out/repository_defines_test_instances.json \ + > instance_generators/out/repository_defines_test_instances.jsonl +``` -## Templates +## Scanner JSON contract -| Script | Use when | -|--------|----------| -| `data_source.py` | One instance per top-level folder under `repository-files/` | -| `folder.py` | Directory hierarchy anchors per data source | -| `source_file.py` | One instance per source file (common code/doc extensions) | -| `json_instances_to_jsonl.py` | Convert any generator JSON array to CREATE JSONL for one entity label | -| `json_relationships_to_jsonl.py` | Convert relationship JSON (`source_slug`/`target_slug`) to edge CREATE JSONL | +**Entities:** `[{"slug": "kebab-case", "properties": {...}}]` -Set `instance_generator` on the entity or relationship type in the ontology (e.g. `"source_file.py"` or -`"my_custom_tests.py"`) to document which script the assistant should run. +**Relationships:** `[{"source_slug": "...", "target_slug": "...", "properties": {}}]` -After generating slugs, convert to JSONL, dry-run validate, then apply from file. +Never write output to `/tmp` — only `instance_generators/out/` is valid for apply-from-file. diff --git a/src/api/extraction/infrastructure/instance_generator_templates/__init__.py b/src/api/extraction/infrastructure/instance_generator_templates/__init__.py index b4aea78b2..79bae8585 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/__init__.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/__init__.py @@ -1,14 +1,12 @@ -"""Bundled deterministic instance generator scripts for sticky session workspaces.""" +"""Bundled instance generator scripts for sticky session workspaces.""" from pathlib import Path TEMPLATES_DIR = Path(__file__).resolve().parent TEMPLATE_SCRIPT_NAMES = ( - "data_source.py", - "folder.py", - "source_file.py", - "json_instances_to_jsonl.py", - "json_relationships_to_jsonl.py", + "_entity_scanner.example.py", + "entities_to_jsonl.py", + "relationships_to_jsonl.py", "README.md", ) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/_entity_scanner.example.py b/src/api/extraction/infrastructure/instance_generator_templates/_entity_scanner.example.py new file mode 100644 index 000000000..a47d944ab --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/_entity_scanner.example.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +"""Example entity scanner — copy to ``{entity_label}.py`` and customize discovery logic. + +Contract: +- argv[1]: path to ``repository-files/`` (one folder per data source) +- stdout: JSON array of ``{"slug": "...", "properties": {...}}`` sorted deterministically +- stderr: optional progress logging only + +Output file convention: ``instance_generators/out/{entity_label}_instances.json`` +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + + +def scan(repository_files: Path) -> list[dict]: + """Find every instance of this entity type across all data sources.""" + instances: list[dict] = [] + for data_source_dir in sorted(repository_files.iterdir()): + if not data_source_dir.is_dir() or data_source_dir.name.startswith("."): + continue + # Example: one instance per *_test.go file — replace with your entity's discovery rules. + for file_path in sorted(data_source_dir.rglob("*_test.go")): + if not file_path.is_file(): + continue + rel = file_path.relative_to(data_source_dir) + slug = f"{data_source_dir.name}-{str(rel).replace('/', '-').replace('_', '-')}".lower() + instances.append( + { + "slug": slug, + "properties": { + "name": file_path.name, + "file_path": str(rel), + "data_source": data_source_dir.name, + }, + } + ) + return sorted(instances, key=lambda row: row["slug"]) + + +if __name__ == "__main__": + root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") + print(json.dumps(scan(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/data_source.py b/src/api/extraction/infrastructure/instance_generator_templates/data_source.py deleted file mode 100644 index 2803aee60..000000000 --- a/src/api/extraction/infrastructure/instance_generator_templates/data_source.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 -"""Generate one entity instance per data-source folder under repository-files.""" - -from __future__ import annotations - -import json -import sys -from pathlib import Path - - -def generate_instances(data_dir: Path) -> list[dict]: - instances: list[dict] = [] - for source_dir in sorted(data_dir.iterdir()): - if not source_dir.is_dir() or source_dir.name.startswith("."): - continue - file_count = sum(1 for path in source_dir.rglob("*") if path.is_file()) - instances.append( - { - "slug": source_dir.name, - "properties": { - "name": source_dir.name, - "source_type": "repository", - "file_count": file_count, - }, - } - ) - return instances - - -if __name__ == "__main__": - root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") - print(json.dumps(generate_instances(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/json_instances_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py similarity index 64% rename from src/api/extraction/infrastructure/instance_generator_templates/json_instances_to_jsonl.py rename to src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py index 0e37cee8b..d5415ec0e 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/json_instances_to_jsonl.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py @@ -1,26 +1,22 @@ #!/usr/bin/env python3 -"""Convert generator JSON output to Kartograph CREATE JSONL (entity nodes). +"""Convert entity scanner JSON to Kartograph CREATE JSONL (batch apply). -Reads a JSON array from a file or stdin: +Input JSON array (from ``{label}.py`` scanner stdout): [{"slug": "my-entity", "properties": {"name": "My Entity", ...}}, ...] -Writes one CREATE line per instance, sorted by slug. Node ids are deterministic from -entity label + slug (SHA256, same algorithm as the platform EntityIdGenerator with -an empty tenant scope unless --tenant-id is passed). - Example: - python3 instance_generators/source_file.py repository-files \\ - > instance_generators/out/files.json + python3 instance_generators/test.py repository-files \\ + > instance_generators/out/test_instances.json - python3 instance_generators/json_instances_to_jsonl.py source_file \\ + python3 instance_generators/entities_to_jsonl.py test \\ --data-source-id schema-bootstrap \\ --source-path graph-management-assistant \\ - instance_generators/out/files.json \\ - > instance_generators/out/files.jsonl + instance_generators/out/test_instances.json \\ + > instance_generators/out/test_instances.jsonl - # Then validate and apply via Kartograph schema tools (from-file). + # kartograph_validate_graph_mutations_from_file → apply-from-file (one batch). """ from __future__ import annotations @@ -86,39 +82,16 @@ def load_instances(payload: Any) -> list[dict[str, Any]]: def main() -> int: parser = argparse.ArgumentParser( - description="Convert generator JSON array to Kartograph node CREATE JSONL.", - ) - parser.add_argument( - "entity_label", - help="Entity type label in the ontology (e.g. source_file, folder).", - ) - parser.add_argument( - "input", - nargs="?", - help="Path to JSON file; omit to read stdin.", - ) - parser.add_argument( - "--tenant-id", - default="", - help="Tenant id for deterministic node ids (optional).", - ) - parser.add_argument( - "--data-source-id", - default="schema-bootstrap", - help="data_source_id stamped on each CREATE line.", - ) - parser.add_argument( - "--source-path", - default="graph-management-assistant", - help="source_path stamped on each CREATE line.", + description="Convert entity scanner JSON to Kartograph node CREATE JSONL.", ) + parser.add_argument("entity_label", help="Entity type label (e.g. test, api_endpoint).") + parser.add_argument("input", nargs="?", help="Path to JSON file; omit to read stdin.") + parser.add_argument("--tenant-id", default="", help="Tenant id for deterministic node ids.") + parser.add_argument("--data-source-id", default="schema-bootstrap") + parser.add_argument("--source-path", default="graph-management-assistant") args = parser.parse_args() - if args.input: - raw = Path(args.input).read_text(encoding="utf-8") - else: - raw = sys.stdin.read() - + raw = Path(args.input).read_text(encoding="utf-8") if args.input else sys.stdin.read() instances = load_instances(json.loads(raw)) for row in instances: line = instance_to_create_line( diff --git a/src/api/extraction/infrastructure/instance_generator_templates/folder.py b/src/api/extraction/infrastructure/instance_generator_templates/folder.py deleted file mode 100644 index b576a3c56..000000000 --- a/src/api/extraction/infrastructure/instance_generator_templates/folder.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -"""Generate folder instances from directory structure under repository-files.""" - -from __future__ import annotations - -import json -import sys -from pathlib import Path - - -def _folder_instance(folder: Path, data_dir: Path, source_name: str, *, is_root: bool) -> dict: - rel_path = folder.relative_to(data_dir) - if is_root: - slug = f"root-{source_name}" - else: - slug = f"folder-{str(rel_path).replace('/', '-').replace('_', '-').lower()}" - child_folders = sum( - 1 for entry in folder.iterdir() if entry.is_dir() and not entry.name.startswith(".") - ) - child_files = sum( - 1 for entry in folder.iterdir() if entry.is_file() and not entry.name.startswith(".") - ) - return { - "slug": slug, - "properties": { - "folder_path": str(rel_path), - "data_source": source_name, - "child_folder_count": child_folders, - "child_file_count": child_files, - }, - } - - -def generate_instances(data_dir: Path) -> list[dict]: - instances: list[dict] = [] - for source_dir in sorted(data_dir.iterdir()): - if not source_dir.is_dir() or source_dir.name.startswith("."): - continue - source_name = source_dir.name - instances.append(_folder_instance(source_dir, data_dir, source_name, is_root=True)) - for subdir in sorted(source_dir.rglob("*")): - if subdir.is_dir() and not any(part.startswith(".") for part in subdir.parts): - instances.append(_folder_instance(subdir, data_dir, source_name, is_root=False)) - return instances - - -if __name__ == "__main__": - root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") - print(json.dumps(generate_instances(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/json_relationships_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py similarity index 87% rename from src/api/extraction/infrastructure/instance_generator_templates/json_relationships_to_jsonl.py rename to src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py index 8eaf26d0f..b6541b6d2 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/json_relationships_to_jsonl.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py @@ -1,24 +1,20 @@ #!/usr/bin/env python3 -"""Convert relationship generator JSON to Kartograph edge CREATE JSONL. +"""Convert relationship scanner JSON to Kartograph edge CREATE JSONL (batch apply). Input JSON array: - [ - { - "source_slug": "entity-a", - "target_slug": "entity-b", - "properties": {"confidence": 0.9} - } - ] + [{"source_slug": "repo-a", "target_slug": "test-b", "properties": {}}] -Endpoint node ids are derived deterministically from source/target entity type labels -and slugs (same hashing rules as the platform). Run after entity nodes exist. +Run after entity nodes exist. Author primary direction only; platform creates twin inverse edges. Example: - python3 instance_generators/json_relationships_to_jsonl.py depends_on service service \\ - instance_generators/out/depends_on.json \\ - > instance_generators/out/depends_on.jsonl + python3 instance_generators/repository_defines_test.py repository-files \\ + > instance_generators/out/repository_defines_test_instances.json + + python3 instance_generators/relationships_to_jsonl.py defines repository test \\ + instance_generators/out/repository_defines_test_instances.json \\ + > instance_generators/out/repository_defines_test_instances.jsonl """ from __future__ import annotations @@ -123,7 +119,7 @@ def load_relationships(payload: Any) -> list[dict[str, Any]]: def main() -> int: parser = argparse.ArgumentParser( - description="Convert relationship generator JSON to Kartograph edge CREATE JSONL.", + description="Convert relationship scanner JSON to Kartograph edge CREATE JSONL.", ) parser.add_argument("relationship_label", help="Relationship type label in the ontology.") parser.add_argument("source_entity_type", help="Source endpoint entity type label.") diff --git a/src/api/extraction/infrastructure/instance_generator_templates/source_file.py b/src/api/extraction/infrastructure/instance_generator_templates/source_file.py deleted file mode 100644 index 34b845d11..000000000 --- a/src/api/extraction/infrastructure/instance_generator_templates/source_file.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 -"""Generate one entity instance per source file under repository-files.""" - -from __future__ import annotations - -import json -import sys -from pathlib import Path - -FILE_EXTENSIONS = ( - ".md", - ".go", - ".py", - ".yaml", - ".yml", - ".json", - ".ts", - ".tsx", - ".js", - ".java", - ".rs", - ".rb", - ".sh", -) - - -def _path_to_slug(rel_path: Path) -> str: - return str(rel_path).replace("/", "-").replace("_", "-").replace(".", "-").lower() - - -def generate_instances(data_dir: Path) -> list[dict]: - instances: list[dict] = [] - for source_dir in sorted(data_dir.iterdir()): - if not source_dir.is_dir() or source_dir.name.startswith("."): - continue - for file_path in sorted(source_dir.rglob("*")): - if not file_path.is_file(): - continue - if file_path.suffix.lower() not in FILE_EXTENSIONS: - continue - if any(part.startswith(".") for part in file_path.parts): - continue - rel_path = file_path.relative_to(data_dir) - instances.append( - { - "slug": _path_to_slug(rel_path), - "properties": { - "file_path": str(rel_path), - "name": file_path.name, - "source_path": str(rel_path), - }, - } - ) - return instances - - -if __name__ == "__main__": - root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") - print(json.dumps(generate_instances(root), indent=2)) diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index 73097db26..3da8989ba 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -113,6 +113,7 @@ def _materialize_instance_generators(session_root: Path) -> None: source = TEMPLATES_DIR / name if source.is_file(): shutil.copy2(source, target_dir / name) + (target_dir / "out").mkdir(parents=True, exist_ok=True) @staticmethod def _extension_counts(root: Path) -> dict[str, int]: diff --git a/src/api/extraction/infrastructure/sticky_session_workspace_binds.py b/src/api/extraction/infrastructure/sticky_session_workspace_binds.py new file mode 100644 index 000000000..4c5647c11 --- /dev/null +++ b/src/api/extraction/infrastructure/sticky_session_workspace_binds.py @@ -0,0 +1,35 @@ +"""Docker bind mounts for sticky session workspaces. + +Repository snapshots stay read-only; ``instance_generators/`` must be writable so the +agent can author scanner scripts and JSON/JSONL outputs for bulk prepopulation. +""" + +from __future__ import annotations + +WORKSPACE_READONLY_SUBDIRS: tuple[str, ...] = ( + "repository-files", + "ingestion-context", +) +WORKSPACE_WRITABLE_SUBDIRS: tuple[str, ...] = ("instance_generators",) +WORKSPACE_READONLY_ROOT_FILES: tuple[str, ...] = ( + "sources-index.json", + "knowledge-graph-id", +) + + +def build_sticky_session_workspace_binds( + *, + host_session_work_dir: str, + container_work_mount: str, +) -> tuple[str, ...]: + """Return bind specs that expose a split read/write workspace layout.""" + host_root = host_session_work_dir.rstrip("/") + container_root = container_work_mount.rstrip("/") + binds: list[str] = [] + for subdir in WORKSPACE_READONLY_SUBDIRS: + binds.append(f"{host_root}/{subdir}:{container_root}/{subdir}:ro") + for subdir in WORKSPACE_WRITABLE_SUBDIRS: + binds.append(f"{host_root}/{subdir}:{container_root}/{subdir}") + for filename in WORKSPACE_READONLY_ROOT_FILES: + binds.append(f"{host_root}/{filename}:{container_root}/{filename}:ro") + return tuple(binds) diff --git a/src/api/management/domain/relationship_pairing.py b/src/api/management/domain/relationship_pairing.py index 257b9131a..8d3bf3445 100644 --- a/src/api/management/domain/relationship_pairing.py +++ b/src/api/management/domain/relationship_pairing.py @@ -145,7 +145,7 @@ def deterministic_twin_edge_id( end_id: str, tenant_id: str = "", ) -> str: - """Match json_relationships_to_jsonl deterministic edge id rules.""" + """Match relationships_to_jsonl deterministic edge id rules.""" normalized_label = relationship_label.strip().lower() combined = f"{tenant_id}:{start_id.strip()}:{normalized_label}:{end_id.strip()}" digest = hashlib.sha256(combined.encode()).hexdigest()[:16] diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py index 63189d1f8..997a3a184 100644 --- a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -6,14 +6,11 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: - assert "## Bootstrap workflow (6 phases)" in SCHEMA_AUTHORING_GUIDE - assert "## Schema modeling rules" in SCHEMA_AUTHORING_GUIDE - assert "## Workspace discovery patterns" in SCHEMA_AUTHORING_GUIDE - assert "read-only" in SCHEMA_AUTHORING_GUIDE - assert "Property vs entity" in SCHEMA_AUTHORING_GUIDE - assert "Never hand-author bulk CREATE ids" in SCHEMA_AUTHORING_GUIDE - assert "## Prepopulation execution (default)" in SCHEMA_AUTHORING_GUIDE - assert "Prepopulation is script writing" in SCHEMA_AUTHORING_GUIDE - assert "do not ask whether to proceed" in SCHEMA_AUTHORING_GUIDE - assert "all" in SCHEMA_AUTHORING_GUIDE and "entity" in SCHEMA_AUTHORING_GUIDE - assert "creatively" in SCHEMA_AUTHORING_GUIDE + assert "## Workspace layout" in SCHEMA_AUTHORING_GUIDE + assert "entities_to_jsonl.py" in SCHEMA_AUTHORING_GUIDE + assert "relationships_to_jsonl.py" in SCHEMA_AUTHORING_GUIDE + assert "_entity_scanner.example.py" in SCHEMA_AUTHORING_GUIDE + assert "test_instances.json" in SCHEMA_AUTHORING_GUIDE + assert "prepopulated" in SCHEMA_AUTHORING_GUIDE + assert "/tmp" in SCHEMA_AUTHORING_GUIDE + assert "data_source.py" not in SCHEMA_AUTHORING_GUIDE diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 2c59e704d..958d771e8 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -42,25 +42,14 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): "prepopulation", "readiness_reporting", } - assert "3–5" in resolved.skills["capabilities_intake"] - assert "Workspace discovery" in resolved.skills["bootstrap_workflow"] - assert "Property vs entity" in resolved.skills["schema_modeling"] - assert "script-first" in resolved.skills["prepopulation"] - assert "Python" in resolved.skills["prepopulation"] - assert "all prepopulated entity-type gaps before any relationship" in resolved.skills[ - "prepopulation" - ] - assert "creative" in resolved.skills["prepopulation"] + assert "entities_to_jsonl.py" in resolved.skills["prepopulation"] + assert "_instances.json" in resolved.skills["prepopulation"] + assert "Entities before relationships" in resolved.skills["prepopulation"] guardrails_text = " ".join(resolved.guardrails) - assert "Finish all entity-type gaps before any relationship-type gaps" in guardrails_text - assert "Python scanner scripts" in guardrails_text - assert "do not poll" in resolved.skills["readiness_reporting"] - assert "six-phase" in resolved.system_prompt.lower() - assert "one phase per turn" in guardrails_text - assert "do not ask whether to proceed" in guardrails_text - assert "One script task per turn" in guardrails_text + assert "entities_to_jsonl.py" in guardrails_text + assert "never /tmp" in guardrails_text or "Never /tmp" in guardrails_text + assert "do not ask" in guardrails_text assert "kartograph_save_schema_ontology" in guardrails_text - assert "never hand-author CREATE ids" in guardrails_text assert len(resolved.prompt_hierarchy) > 0 async def test_extraction_mode_uses_extraction_defaults(self): @@ -78,49 +67,43 @@ async def test_extraction_mode_uses_extraction_defaults(self): assert "schema_edits_secondary" in resolved.skills assert "extraction" in resolved.system_prompt.lower() assert len(resolved.prompt_hierarchy) > 0 - assert len(resolved.guardrails) > 0 async def test_kg_overrides_replace_matching_template_and_append_new(self): - repo = _InMemorySkillOverrideRepository( - overrides={ - ( - "kg-1", - ExtractionSessionMode.EXTRACTION_OPERATIONS, - ): { - "job_setup": "KG-specific job setup instructions", - "custom_review": "Custom review flow", - } + overrides = { + ("kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP): { + "prepopulation": "Custom prepopulation guidance.", + "custom_skill": "Extra skill text.", } + } + service = ExtractionSkillResolutionService( + override_repository=_InMemorySkillOverrideRepository(overrides) ) - service = ExtractionSkillResolutionService(override_repository=repo) resolved = await service.resolve_for_session( knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, ) - assert resolved.skills["job_setup"] == "KG-specific job setup instructions" - assert resolved.skills["custom_review"] == "Custom review flow" + assert resolved.skills["prepopulation"] == "Custom prepopulation guidance." + assert resolved.skills["custom_skill"] == "Extra skill text." + assert "bootstrap_workflow" in resolved.skills async def test_override_merge_is_deterministic(self): - repo = _InMemorySkillOverrideRepository( - overrides={ - ( - "kg-1", - ExtractionSessionMode.SCHEMA_BOOTSTRAP, - ): { - "z_last": "z", - "a_first": "a", - } - } + overrides = { + ("kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP): {"prepopulation": "A"}, + ("kg-2", ExtractionSessionMode.SCHEMA_BOOTSTRAP): {"prepopulation": "B"}, + } + service = ExtractionSkillResolutionService( + override_repository=_InMemorySkillOverrideRepository(overrides) ) - service = ExtractionSkillResolutionService(override_repository=repo) - resolved = await service.resolve_for_session( + first = await service.resolve_for_session( + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + second = await service.resolve_for_session( knowledge_graph_id="kg-1", mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, ) - # Additional override keys are merged in sorted order for determinism. - assert list(resolved.skills.keys())[-2:] == ["a_first", "z_last"] - + assert first.skills == second.skills diff --git a/src/api/tests/unit/extraction/infrastructure/test_json_instances_to_jsonl.py b/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py similarity index 60% rename from src/api/tests/unit/extraction/infrastructure/test_json_instances_to_jsonl.py rename to src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py index 7a18376db..627a11c9b 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_json_instances_to_jsonl.py +++ b/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py @@ -1,4 +1,4 @@ -"""Unit tests for the json_instances_to_jsonl helper script.""" +"""Unit tests for the entities_to_jsonl helper script.""" from __future__ import annotations @@ -9,12 +9,12 @@ SCRIPT = ( Path(__file__).resolve().parents[4] - / "extraction/infrastructure/instance_generator_templates/json_instances_to_jsonl.py" + / "extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py" ) -def test_json_instances_to_jsonl_emits_sorted_create_lines(tmp_path: Path) -> None: - instances_path = tmp_path / "instances.json" +def test_entities_to_jsonl_emits_sorted_create_lines(tmp_path: Path) -> None: + instances_path = tmp_path / "test_instances.json" instances_path.write_text( json.dumps( [ @@ -24,13 +24,12 @@ def test_json_instances_to_jsonl_emits_sorted_create_lines(tmp_path: Path) -> No ), encoding="utf-8", ) - output_path = tmp_path / "out.jsonl" proc = subprocess.run( [ sys.executable, str(SCRIPT), - "source_file", + "test", "--data-source-id", "schema-bootstrap", "--source-path", @@ -41,7 +40,6 @@ def test_json_instances_to_jsonl_emits_sorted_create_lines(tmp_path: Path) -> No capture_output=True, text=True, ) - output_path.write_text(proc.stdout, encoding="utf-8") lines = [line for line in proc.stdout.splitlines() if line.strip()] assert len(lines) == 2 @@ -50,20 +48,11 @@ def test_json_instances_to_jsonl_emits_sorted_create_lines(tmp_path: Path) -> No second = json.loads(lines[1]) assert first["set_properties"]["slug"] == "a-entity" assert second["set_properties"]["slug"] == "b-entity" - assert first["op"] == "CREATE" - assert first["type"] == "node" - assert first["label"] == "source_file" + assert first["label"] == "test" assert first["set_properties"]["data_source_id"] == "schema-bootstrap" - assert first["set_properties"]["source_path"] == "graph-management-assistant" - assert first["id"] == second["id"] or first["set_properties"]["slug"] != second["set_properties"]["slug"] rerun = subprocess.run( - [ - sys.executable, - str(SCRIPT), - "source_file", - str(instances_path), - ], + [sys.executable, str(SCRIPT), "test", str(instances_path)], check=True, capture_output=True, text=True, diff --git a/src/api/tests/unit/extraction/infrastructure/test_json_relationships_to_jsonl.py b/src/api/tests/unit/extraction/infrastructure/test_relationships_to_jsonl.py similarity index 77% rename from src/api/tests/unit/extraction/infrastructure/test_json_relationships_to_jsonl.py rename to src/api/tests/unit/extraction/infrastructure/test_relationships_to_jsonl.py index 3ed9413ac..acdf46c29 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_json_relationships_to_jsonl.py +++ b/src/api/tests/unit/extraction/infrastructure/test_relationships_to_jsonl.py @@ -1,4 +1,4 @@ -"""Unit tests for json_relationships_to_jsonl helper.""" +"""Unit tests for relationships_to_jsonl helper.""" from __future__ import annotations @@ -9,12 +9,12 @@ SCRIPT = ( Path(__file__).resolve().parents[4] - / "extraction/infrastructure/instance_generator_templates/json_relationships_to_jsonl.py" + / "extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py" ) -def test_json_relationships_to_jsonl_emits_edge_create_lines(tmp_path: Path) -> None: - input_path = tmp_path / "relationships.json" +def test_relationships_to_jsonl_emits_edge_create_lines(tmp_path: Path) -> None: + input_path = tmp_path / "repository_defines_test_instances.json" input_path.write_text( json.dumps( [ diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py index a4087605d..d047aea3c 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py @@ -53,7 +53,9 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: assert spec.network == "kartograph_kartograph" assert spec.env["KARTOGRAPH_WORKLOAD_TOKEN"] == credentials.token assert "/tmp/skills:/app/skills:ro" in spec.binds - assert "/tmp/session-work:/workspace:ro" in spec.binds + assert "/tmp/session-work/repository-files:/workspace/repository-files:ro" in spec.binds + assert "/tmp/session-work/instance_generators:/workspace/instance_generators" in spec.binds + assert "/tmp/session-work/sources-index.json:/workspace/sources-index.json:ro" in spec.binds assert "/host/.config/gcloud:/gcloud/config:ro" in spec.binds assert spec.env["CLOUDSDK_CONFIG"] == "/gcloud/config" assert spec.env["GOOGLE_APPLICATION_CREDENTIALS"] == ( diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index 01bc280f9..9436cf91c 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -168,13 +168,12 @@ def test_materializer_copies_instance_generator_templates(tmp_path: Path) -> Non generators_dir = session_root / "instance_generators" assert generators_dir.is_dir() for name in ( - "data_source.py", - "folder.py", - "source_file.py", - "json_instances_to_jsonl.py", - "json_relationships_to_jsonl.py", + "_entity_scanner.example.py", + "entities_to_jsonl.py", + "relationships_to_jsonl.py", "README.md", ): assert (generators_dir / name).is_file() readme = (generators_dir / "README.md").read_text(encoding="utf-8") assert "repository-files" in readme + assert (generators_dir / "out").is_dir() diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_binds.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_binds.py new file mode 100644 index 000000000..3fee70ac9 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_binds.py @@ -0,0 +1,21 @@ +"""Unit tests for sticky session workspace bind layout.""" + +from __future__ import annotations + +from extraction.infrastructure.sticky_session_workspace_binds import ( + build_sticky_session_workspace_binds, +) + + +def test_workspace_binds_split_read_only_and_writable_paths() -> None: + binds = build_sticky_session_workspace_binds( + host_session_work_dir="/host/session", + container_work_mount="/workspace", + ) + + assert "/host/session/repository-files:/workspace/repository-files:ro" in binds + assert "/host/session/ingestion-context:/workspace/ingestion-context:ro" in binds + assert "/host/session/instance_generators:/workspace/instance_generators" in binds + assert "/host/session/sources-index.json:/workspace/sources-index.json:ro" in binds + assert "/host/session/knowledge-graph-id:/workspace/knowledge-graph-id:ro" in binds + assert not any(bind.endswith("/workspace:ro") for bind in binds) From 1856330f501743698cba22852262a07b7a2c6f15 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 5 Jun 2026 10:55:27 -0400 Subject: [PATCH 107/153] feat(graph): make source_path optional on node and edge CREATE Remove source_path from platform system properties so callers add provenance only when needed; per-type required_properties can still enforce it. Co-authored-by: Cursor <cursoragent@cursor.com> --- scripts/export_system_properties.py | 2 +- specs/graph/mutations.spec.md | 10 ++++++---- .../application/schema_authoring_guide.py | 2 +- .../instance_generator_templates/README.md | 1 - .../entities_to_jsonl.py | 9 +++++++-- .../relationships_to_jsonl.py | 9 +++++++-- src/api/graph/domain/value_objects.py | 4 +--- .../infrastructure/test_entities_to_jsonl.py | 20 +++++++++++++++++-- .../application/test_mutation_service.py | 16 +++------------ .../graph/application/test_schema_learning.py | 11 +++++----- .../unit/graph/test_domain_value_objects.py | 8 ++++---- .../unit/graph/test_system_properties.py | 10 +++------- .../docs/guides/extraction-mutations.mdx | 5 ++++- .../reference/mutation-operation-schema.json | 8 +++----- .../docs/reference/mutation-schema.mdx | 4 ++-- website/src/data/system-properties.json | 12 ++++------- 16 files changed, 69 insertions(+), 62 deletions(-) diff --git a/scripts/export_system_properties.py b/scripts/export_system_properties.py index 2ddea5f4b..6140477b4 100644 --- a/scripts/export_system_properties.py +++ b/scripts/export_system_properties.py @@ -31,7 +31,7 @@ def export_system_properties(): # Property descriptions for documentation property_descriptions = { "data_source_id": "Identifies which data source this entity came from (e.g., 'ds-123')", - "source_path": "The file path within the data source where this entity was extracted from", + "source_path": "Optional provenance path within the data source (not a platform-required system property)", "slug": "Unique human-readable identifier for the node (e.g., 'alice-smith', 'kartograph')", } diff --git a/specs/graph/mutations.spec.md b/specs/graph/mutations.spec.md index 22dce6d9e..cd3937437 100644 --- a/specs/graph/mutations.spec.md +++ b/specs/graph/mutations.spec.md @@ -55,12 +55,12 @@ The system SHALL support declaring node and edge types with property schemas. - GIVEN a DEFINE operation with label "person", description, and required properties - WHEN the mutation is applied - THEN a type definition is stored with the label, description, required properties, and empty optional properties -- AND system properties (`data_source_id`, `source_path`, `slug`) are automatically added to required properties +- AND system properties (`data_source_id`, `slug`) are automatically added to required properties #### Scenario: Define an edge type - GIVEN a DEFINE operation with entity type "edge" - WHEN the mutation is applied -- THEN system properties for edges (`data_source_id`, `source_path`) are automatically added +- THEN system properties for edges (`data_source_id`) are automatically added ### Requirement: CREATE Operation The system SHALL support idempotent entity creation with property accumulation. @@ -133,11 +133,13 @@ The system SHALL require specific system-managed properties on all CREATE operat #### Scenario: Node system properties - GIVEN a CREATE operation for a node -- THEN `data_source_id`, `source_path`, `slug`, and `knowledge_graph_id` MUST be present in `set_properties` +- THEN `data_source_id`, `slug`, and `knowledge_graph_id` MUST be present in `set_properties` +- AND `source_path` MAY be present when the caller or type definition requires provenance #### Scenario: Edge system properties - GIVEN a CREATE operation for an edge -- THEN `data_source_id`, `source_path`, and `knowledge_graph_id` MUST be present in `set_properties` +- THEN `data_source_id` and `knowledge_graph_id` MUST be present in `set_properties` +- AND `source_path` MAY be present when the caller or type definition requires provenance ### Requirement: Deterministic Entity IDs The system SHALL use deterministic IDs for idempotent mutation replay. diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index b2b92c14d..78b6c31b7 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -109,7 +109,7 @@ ## Instance mutations (JSONL) -- CREATE requires `data_source_id`, `source_path`, and `slug` on nodes. +- CREATE requires `data_source_id` and `slug` on nodes. Add `source_path` only when provenance matters. - CREATE is strict — use UPDATE for existing instances. - Never hand-author bulk CREATE lines in chat; use `entities_to_jsonl.py` / `relationships_to_jsonl.py`. - Create all entity nodes before relationship edges. diff --git a/src/api/extraction/infrastructure/instance_generator_templates/README.md b/src/api/extraction/infrastructure/instance_generator_templates/README.md index 58c678016..73e8fce37 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/README.md +++ b/src/api/extraction/infrastructure/instance_generator_templates/README.md @@ -18,7 +18,6 @@ python3 instance_generators/test.py repository-files \ python3 instance_generators/entities_to_jsonl.py test \ --data-source-id schema-bootstrap \ - --source-path graph-management-assistant \ instance_generators/out/test_instances.json \ > instance_generators/out/test_instances.jsonl ``` diff --git a/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py index d5415ec0e..6754c9831 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py @@ -49,7 +49,8 @@ def instance_to_create_line( set_properties.setdefault("slug", slug) set_properties.setdefault("name", slug) set_properties["data_source_id"] = data_source_id - set_properties["source_path"] = source_path + if source_path.strip(): + set_properties["source_path"] = source_path.strip() return { "op": "CREATE", "type": "node", @@ -88,7 +89,11 @@ def main() -> int: parser.add_argument("input", nargs="?", help="Path to JSON file; omit to read stdin.") parser.add_argument("--tenant-id", default="", help="Tenant id for deterministic node ids.") parser.add_argument("--data-source-id", default="schema-bootstrap") - parser.add_argument("--source-path", default="graph-management-assistant") + parser.add_argument( + "--source-path", + default="", + help="Optional provenance path stamped on each CREATE when set.", + ) args = parser.parse_args() raw = Path(args.input).read_text(encoding="utf-8") if args.input else sys.stdin.read() diff --git a/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py index b6541b6d2..fff0c4e49 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py @@ -71,7 +71,8 @@ def relationship_to_create_line( ) set_properties = dict(properties) set_properties["data_source_id"] = data_source_id - set_properties["source_path"] = source_path + if source_path.strip(): + set_properties["source_path"] = source_path.strip() return { "op": "CREATE", "type": "edge", @@ -127,7 +128,11 @@ def main() -> int: parser.add_argument("input", nargs="?", help="Path to JSON file; omit to read stdin.") parser.add_argument("--tenant-id", default="", help="Tenant id for deterministic ids.") parser.add_argument("--data-source-id", default="schema-bootstrap") - parser.add_argument("--source-path", default="graph-management-assistant") + parser.add_argument( + "--source-path", + default="", + help="Optional provenance path stamped on each CREATE when set.", + ) args = parser.parse_args() raw = Path(args.input).read_text(encoding="utf-8") if args.input else sys.stdin.read() diff --git a/src/api/graph/domain/value_objects.py b/src/api/graph/domain/value_objects.py index 1b9f2d40f..372cca791 100644 --- a/src/api/graph/domain/value_objects.py +++ b/src/api/graph/domain/value_objects.py @@ -42,7 +42,7 @@ class MutationOperationType(str, Enum): # These are automatically added by the system and should not be tracked as optional properties # Properties required for ALL entities (nodes and edges) -COMMON_SYSTEM_PROPERTIES: frozenset[str] = frozenset({"data_source_id", "source_path"}) +COMMON_SYSTEM_PROPERTIES: frozenset[str] = frozenset({"data_source_id"}) # Node-specific system properties (in addition to common) # These are REQUIRED in CREATE operations and excluded from optional property tracking. @@ -308,8 +308,6 @@ def validate_operation(self) -> None: raise ValueError("CREATE requires 'set_properties'") if "data_source_id" not in self.set_properties: raise ValueError("CREATE requires 'data_source_id' in set_properties") - if "source_path" not in self.set_properties: - raise ValueError("CREATE requires 'source_path' in set_properties") if self.type == "node": if "slug" not in self.set_properties: diff --git a/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py b/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py index 627a11c9b..b64bb554b 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py +++ b/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py @@ -32,8 +32,6 @@ def test_entities_to_jsonl_emits_sorted_create_lines(tmp_path: Path) -> None: "test", "--data-source-id", "schema-bootstrap", - "--source-path", - "graph-management-assistant", str(instances_path), ], check=True, @@ -58,3 +56,21 @@ def test_entities_to_jsonl_emits_sorted_create_lines(tmp_path: Path) -> None: text=True, ) assert rerun.stdout == proc.stdout + + +def test_entities_to_jsonl_omits_source_path_when_not_configured(tmp_path: Path) -> None: + instances_path = tmp_path / "test_instances.json" + instances_path.write_text( + json.dumps([{"slug": "a-entity", "properties": {"name": "A"}}]), + encoding="utf-8", + ) + + proc = subprocess.run( + [sys.executable, str(SCRIPT), "test", str(instances_path)], + check=True, + capture_output=True, + text=True, + ) + + line = json.loads(proc.stdout.strip()) + assert "source_path" not in line["set_properties"] diff --git a/src/api/tests/unit/graph/application/test_mutation_service.py b/src/api/tests/unit/graph/application/test_mutation_service.py index 2e868bfb6..4e9558ee0 100644 --- a/src/api/tests/unit/graph/application/test_mutation_service.py +++ b/src/api/tests/unit/graph/application/test_mutation_service.py @@ -104,21 +104,12 @@ def test_apply_mutations_stores_define_operations(self): assert saved_type_def.entity_type == EntityType.NODE assert saved_type_def.description == "A person in the organization" - # System properties must be automatically added to required_properties - # per spec: system properties (data_source_id, source_path, slug) are - # automatically added to required properties for node types. - # Because "slug" was NOT in the caller-provided set, this assertion only - # passes when the service actually injects it — a regression would fail here. assert "data_source_id" in saved_type_def.required_properties - assert "source_path" in saved_type_def.required_properties + assert "source_path" not in saved_type_def.required_properties assert "slug" in saved_type_def.required_properties def test_apply_define_edge_type_adds_edge_system_properties(self): - """DEFINE for edge type adds data_source_id and source_path but NOT slug. - - Spec (DEFINE edge type scenario): system properties for edges - (data_source_id, source_path) are automatically added. slug is a node-only - system property and MUST NOT appear in edge type definitions. + """DEFINE for edge type adds data_source_id but NOT slug or source_path. """ from graph.application.services import GraphMutationService @@ -154,9 +145,8 @@ def test_apply_define_edge_type_adds_edge_system_properties(self): saved_type_def.description == "Dependency relationship between components" ) - # Edge system properties MUST be automatically added assert "data_source_id" in saved_type_def.required_properties - assert "source_path" in saved_type_def.required_properties + assert "source_path" not in saved_type_def.required_properties # slug is a node-only system property — MUST NOT appear on edge types assert "slug" not in saved_type_def.required_properties diff --git a/src/api/tests/unit/graph/application/test_schema_learning.py b/src/api/tests/unit/graph/application/test_schema_learning.py index 34e78f8c3..059d7562e 100644 --- a/src/api/tests/unit/graph/application/test_schema_learning.py +++ b/src/api/tests/unit/graph/application/test_schema_learning.py @@ -94,9 +94,8 @@ def mock_get(label, entity_type): final_type_def = mock_type_repo.save.call_args_list[1][0][0] assert "age" in final_type_def.optional_properties assert "email" in final_type_def.optional_properties - # data_source_id and source_path are system props, should be excluded assert "data_source_id" not in final_type_def.optional_properties - assert "source_path" not in final_type_def.optional_properties + assert "source_path" in final_type_def.optional_properties def test_optional_properties_accumulate(self, service, mock_type_repo): """Should accumulate optional properties across multiple CREATEs.""" @@ -163,7 +162,7 @@ def test_system_properties_excluded_from_optional(self, service, mock_type_repo) updated_type_def = mock_type_repo.save.call_args[0][0] assert "custom_field" in updated_type_def.optional_properties assert "data_source_id" not in updated_type_def.optional_properties - assert "source_path" not in updated_type_def.optional_properties + assert "source_path" in updated_type_def.optional_properties def test_no_update_when_no_extra_properties(self, service, mock_type_repo): """Should not update type def if no new optional properties.""" @@ -172,11 +171,11 @@ def test_no_update_when_no_extra_properties(self, service, mock_type_repo): entity_type=EntityType.NODE, description="A person", required_properties={"slug", "name"}, - optional_properties={"email"}, + optional_properties={"email", "source_path"}, ) mock_type_repo.get.return_value = existing_type_def - # CREATE with only required props + already-known optional prop + # CREATE with only required props + already-known optional props create_op = MutationOperation( op=MutationOperationType.CREATE, type=EntityType.NODE, @@ -185,7 +184,7 @@ def test_no_update_when_no_extra_properties(self, service, mock_type_repo): set_properties={ "slug": "alice", "name": "Alice", - "email": "alice@example.com", # Already in optional_properties + "email": "alice@example.com", "data_source_id": "ds-123", "source_path": "people/alice.md", }, diff --git a/src/api/tests/unit/graph/test_domain_value_objects.py b/src/api/tests/unit/graph/test_domain_value_objects.py index 05780c414..69a709cad 100644 --- a/src/api/tests/unit/graph/test_domain_value_objects.py +++ b/src/api/tests/unit/graph/test_domain_value_objects.py @@ -340,8 +340,8 @@ def test_create_requires_data_source_id(self): with pytest.raises(ValueError, match="data_source_id"): mutation.validate_operation() - def test_create_requires_source_path(self): - """CREATE should require source_path in set_properties.""" + def test_create_allows_missing_source_path(self): + """CREATE should not require source_path in set_properties.""" mutation = MutationOperation( op=MutationOperationType.CREATE, type=EntityType.NODE, @@ -350,10 +350,10 @@ def test_create_requires_source_path(self): set_properties={ "slug": "alice", "data_source_id": "ds-123", + "knowledge_graph_id": "kg-123", }, ) - with pytest.raises(ValueError, match="source_path"): - mutation.validate_operation() + mutation.validate_operation() def test_create_node_requires_slug(self): """CREATE node should require slug in set_properties.""" diff --git a/src/api/tests/unit/graph/test_system_properties.py b/src/api/tests/unit/graph/test_system_properties.py index e1f12afa8..d74575aa2 100644 --- a/src/api/tests/unit/graph/test_system_properties.py +++ b/src/api/tests/unit/graph/test_system_properties.py @@ -18,7 +18,7 @@ class TestSystemPropertiesConstants: def test_common_system_properties_defined(self): """Should define common system properties.""" assert "data_source_id" in COMMON_SYSTEM_PROPERTIES - assert "source_path" in COMMON_SYSTEM_PROPERTIES + assert "source_path" not in COMMON_SYSTEM_PROPERTIES def test_node_system_properties_defined(self): """Should define node-specific system properties.""" @@ -37,20 +37,16 @@ def test_returns_node_system_properties(self): """Should return common + node-specific properties for nodes.""" props = get_system_properties_for_entity(EntityType.NODE) - # Should include common properties assert "data_source_id" in props - assert "source_path" in props - - # Should include node-specific properties + assert "source_path" not in props assert "slug" in props def test_returns_edge_system_properties(self): """Should return common + edge-specific properties for edges.""" props = get_system_properties_for_entity(EntityType.EDGE) - # Should include common properties assert "data_source_id" in props - assert "source_path" in props + assert "source_path" not in props # Should NOT include node-specific properties assert "slug" not in props diff --git a/website/src/content/docs/guides/extraction-mutations.mdx b/website/src/content/docs/guides/extraction-mutations.mdx index 8c3488432..161d0726b 100644 --- a/website/src/content/docs/guides/extraction-mutations.mdx +++ b/website/src/content/docs/guides/extraction-mutations.mdx @@ -189,7 +189,10 @@ CREATE is **idempotent** - you can run it multiple times safely. It uses `MERGE` - `label` - Graph label (PascalCase: `"Person"`, `"Repository"`) - `set_properties` must include: - `data_source_id` - Your data source identifier - - `source_path` - Which file this entity came from + - `slug` - For nodes only + +**Optional provenance:** +- `source_path` - Which file this entity came from (add when you need traceability, or when your type definition requires it) **Additional required for edges:** - `start_id` - ID of source node diff --git a/website/src/content/docs/reference/mutation-operation-schema.json b/website/src/content/docs/reference/mutation-operation-schema.json index 765c40f62..e8df3e35a 100644 --- a/website/src/content/docs/reference/mutation-operation-schema.json +++ b/website/src/content/docs/reference/mutation-operation-schema.json @@ -193,8 +193,7 @@ "set_properties": { "type": "object", "required": [ - "data_source_id", - "source_path" + "data_source_id" ], "properties": { "data_source_id": { @@ -210,7 +209,7 @@ } } }, - "errorMessage": "CREATE requires 'label' and 'set_properties' with 'data_source_id' and 'source_path'" + "errorMessage": "CREATE requires 'label' and 'set_properties' with 'data_source_id'" } }, { @@ -230,7 +229,6 @@ "type": "object", "required": [ "data_source_id", - "source_path", "slug" ], "properties": { @@ -252,7 +250,7 @@ } } }, - "errorMessage": "CREATE node requires 'set_properties' with 'data_source_id', 'source_path', and 'slug'" + "errorMessage": "CREATE node requires 'set_properties' with 'data_source_id' and 'slug'" } }, { diff --git a/website/src/content/docs/reference/mutation-schema.mdx b/website/src/content/docs/reference/mutation-schema.mdx index 4881edebd..251ade2b1 100644 --- a/website/src/content/docs/reference/mutation-schema.mdx +++ b/website/src/content/docs/reference/mutation-schema.mdx @@ -90,8 +90,8 @@ listed below. "label": "string", "set_properties": { "data_source_id": "required", - "source_path": "required", - "slug": "required" // Only required for `CREATE node` operations + "slug": "required", // Only required for `CREATE node` operations + "source_path": "optional" // Include when provenance matters, or when required by the type definition ... } } diff --git a/website/src/data/system-properties.json b/website/src/data/system-properties.json index 9676ebcb8..4e1e34a5a 100644 --- a/website/src/data/system-properties.json +++ b/website/src/data/system-properties.json @@ -7,12 +7,10 @@ "common": { "description": "System properties required for all entities (nodes and edges)", "properties": [ - "data_source_id", - "source_path" + "data_source_id" ], "property_descriptions": { - "data_source_id": "Identifies which data source this entity came from (e.g., 'ds-123')", - "source_path": "The file path within the data source where this entity was extracted from" + "data_source_id": "Identifies which data source this entity came from (e.g., 'ds-123')" } }, "edge_specific": { @@ -23,8 +21,7 @@ "edge_total": { "description": "All system properties for edges (common + edge-specific)", "properties": [ - "data_source_id", - "source_path" + "data_source_id" ] }, "node_specific": { @@ -40,8 +37,7 @@ "description": "All system properties for nodes (common + node-specific)", "properties": [ "data_source_id", - "slug", - "source_path" + "slug" ] } } From 7ff77276873ab5063d4a91191d6b1660beded49b Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 5 Jun 2026 10:56:53 -0400 Subject: [PATCH 108/153] refactor(extraction): drop --source-path from JSONL converters Provenance comes from scanner properties only; converters no longer stamp source_path via a CLI flag. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../application/schema_authoring_guide.py | 4 ++-- .../instance_generator_templates/README.md | 2 ++ .../entities_to_jsonl.py | 10 ---------- .../relationships_to_jsonl.py | 9 --------- .../infrastructure/test_entities_to_jsonl.py | 20 +++++++++++++++++++ 5 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 78b6c31b7..172d32be7 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -37,7 +37,7 @@ ```bash python3 instance_generators/test.py repository-files > instance_generators/out/test_instances.json python3 instance_generators/entities_to_jsonl.py test \\ - --data-source-id schema-bootstrap --source-path graph-management-assistant \\ + --data-source-id schema-bootstrap \\ instance_generators/out/test_instances.json > instance_generators/out/test_instances.jsonl # validate-from-file → apply-from-file path=instance_generators/out/test_instances.jsonl ``` @@ -109,7 +109,7 @@ ## Instance mutations (JSONL) -- CREATE requires `data_source_id` and `slug` on nodes. Add `source_path` only when provenance matters. +- CREATE requires `data_source_id` and `slug` on nodes. Put `source_path` in scanner `properties` when needed. - CREATE is strict — use UPDATE for existing instances. - Never hand-author bulk CREATE lines in chat; use `entities_to_jsonl.py` / `relationships_to_jsonl.py`. - Create all entity nodes before relationship edges. diff --git a/src/api/extraction/infrastructure/instance_generator_templates/README.md b/src/api/extraction/infrastructure/instance_generator_templates/README.md index 73e8fce37..ba91fff14 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/README.md +++ b/src/api/extraction/infrastructure/instance_generator_templates/README.md @@ -45,4 +45,6 @@ python3 instance_generators/relationships_to_jsonl.py defines repository test \ **Relationships:** `[{"source_slug": "...", "target_slug": "...", "properties": {}}]` +Include `source_path` in `properties` only when you need provenance on that instance. + Never write output to `/tmp` — only `instance_generators/out/` is valid for apply-from-file. diff --git a/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py index 6754c9831..0b794ab0c 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py @@ -12,7 +12,6 @@ python3 instance_generators/entities_to_jsonl.py test \\ --data-source-id schema-bootstrap \\ - --source-path graph-management-assistant \\ instance_generators/out/test_instances.json \\ > instance_generators/out/test_instances.jsonl @@ -42,15 +41,12 @@ def instance_to_create_line( slug: str, properties: dict[str, Any], data_source_id: str, - source_path: str, tenant_id: str, ) -> dict[str, Any]: set_properties = dict(properties) set_properties.setdefault("slug", slug) set_properties.setdefault("name", slug) set_properties["data_source_id"] = data_source_id - if source_path.strip(): - set_properties["source_path"] = source_path.strip() return { "op": "CREATE", "type": "node", @@ -89,11 +85,6 @@ def main() -> int: parser.add_argument("input", nargs="?", help="Path to JSON file; omit to read stdin.") parser.add_argument("--tenant-id", default="", help="Tenant id for deterministic node ids.") parser.add_argument("--data-source-id", default="schema-bootstrap") - parser.add_argument( - "--source-path", - default="", - help="Optional provenance path stamped on each CREATE when set.", - ) args = parser.parse_args() raw = Path(args.input).read_text(encoding="utf-8") if args.input else sys.stdin.read() @@ -104,7 +95,6 @@ def main() -> int: slug=row["slug"], properties=row["properties"], data_source_id=args.data_source_id, - source_path=args.source_path, tenant_id=args.tenant_id, ) sys.stdout.write(json.dumps(line, separators=(",", ":")) + "\n") diff --git a/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py index fff0c4e49..f91207315 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py @@ -56,7 +56,6 @@ def relationship_to_create_line( target_slug: str, properties: dict[str, Any], data_source_id: str, - source_path: str, tenant_id: str, ) -> dict[str, Any]: start_id = deterministic_node_id( @@ -71,8 +70,6 @@ def relationship_to_create_line( ) set_properties = dict(properties) set_properties["data_source_id"] = data_source_id - if source_path.strip(): - set_properties["source_path"] = source_path.strip() return { "op": "CREATE", "type": "edge", @@ -128,11 +125,6 @@ def main() -> int: parser.add_argument("input", nargs="?", help="Path to JSON file; omit to read stdin.") parser.add_argument("--tenant-id", default="", help="Tenant id for deterministic ids.") parser.add_argument("--data-source-id", default="schema-bootstrap") - parser.add_argument( - "--source-path", - default="", - help="Optional provenance path stamped on each CREATE when set.", - ) args = parser.parse_args() raw = Path(args.input).read_text(encoding="utf-8") if args.input else sys.stdin.read() @@ -146,7 +138,6 @@ def main() -> int: target_slug=row["target_slug"], properties=row["properties"], data_source_id=args.data_source_id, - source_path=args.source_path, tenant_id=args.tenant_id, ) sys.stdout.write(json.dumps(line, separators=(",", ":")) + "\n") diff --git a/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py b/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py index b64bb554b..f9c646407 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py +++ b/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py @@ -74,3 +74,23 @@ def test_entities_to_jsonl_omits_source_path_when_not_configured(tmp_path: Path) line = json.loads(proc.stdout.strip()) assert "source_path" not in line["set_properties"] + + +def test_entities_to_jsonl_preserves_source_path_from_scanner_properties(tmp_path: Path) -> None: + instances_path = tmp_path / "test_instances.json" + instances_path.write_text( + json.dumps( + [{"slug": "a-entity", "properties": {"name": "A", "source_path": "pkg/a_test.go"}}] + ), + encoding="utf-8", + ) + + proc = subprocess.run( + [sys.executable, str(SCRIPT), "test", str(instances_path)], + check=True, + capture_output=True, + text=True, + ) + + line = json.loads(proc.stdout.strip()) + assert line["set_properties"]["source_path"] == "pkg/a_test.go" From b5d6f99006c6fd493fab5c2e3c0514393030ebf6 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 5 Jun 2026 12:06:25 -0400 Subject: [PATCH 109/153] fix(extraction): grant GMA agent write access to sticky session workspace Root-owned session files blocked the agent container (HOST_UID) from authoring scanner scripts. Chown writable paths on materialize, mount full workspace rw with repository-files ro-only, and allow Write/Edit tools. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 4 +- .../kartograph_agent_runtime/executor.py | 5 +- .../kartograph_agent_runtime/schema_tools.py | 2 +- src/agent-runtime/tests/test_schema_tools.py | 7 ++ .../application/schema_authoring_guide.py | 2 +- src/api/extraction/dependencies.py | 2 + .../sticky_session_workdir_materializer.py | 18 ++++- .../sticky_session_workspace_binds.py | 22 ++---- .../sticky_session_workspace_permissions.py | 59 ++++++++++++++ ...test_sticky_session_container_bootstrap.py | 3 +- .../test_sticky_session_workspace_binds.py | 9 +-- ...st_sticky_session_workspace_permissions.py | 77 +++++++++++++++++++ 12 files changed, 179 insertions(+), 31 deletions(-) create mode 100644 src/api/extraction/infrastructure/sticky_session_workspace_permissions.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_permissions.py diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 3598ece23..6d74c2c14 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -35,6 +35,8 @@ | Tool | Purpose | |------|---------| | `Read` | Read files under the session workspace mount | +| `Write` | Create scanner scripts and JSON outputs under `instance_generators/` | +| `Edit` | Update existing workspace files (e.g. refine a scanner script) | | `Grep` | Search file contents in `repository-files/<data_source>/` | | `Glob` | List files by pattern for instance generation | | `Bash` | Run `instance_generators/*.py` against `repository-files/` (workspace only) | @@ -54,7 +56,7 @@ """.strip() _TOOLS_COMPACT_REFERENCE = ( - "Tools: kartograph_* schema MCP tools, plus Read/Grep/Glob/Bash. " + "Tools: kartograph_* schema MCP tools, plus Read/Write/Edit/Grep/Glob/Bash. " "Prepopulation: {label}.py → out/{label}_instances.json → entities_to_jsonl.py or " "relationships_to_jsonl.py → validate/apply out/{label}_instances.jsonl. Never /tmp." ) diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index a3ef0ada9..7b7df1b5d 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -58,8 +58,9 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: "## Session workspace", f"Workspace mount: `{settings.workspace_dir}`", ( - "Read-only: `repository-files/`, `ingestion-context/`. " - "Writable: `instance_generators/` — `{label}.py` and `out/{label}_instances.json(l)`. " + "Read-only: `repository-files/`. " + "Writable: entire workspace except repository snapshots — " + "`instance_generators/` for `{label}.py` and `out/{label}_instances.json(l)`. " "Platform converters: `entities_to_jsonl.py`, `relationships_to_jsonl.py`. " "Never `/tmp`. One batch per gap via apply-from-file." ), diff --git a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py index 67da5b8a7..c00fbc846 100644 --- a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py @@ -8,7 +8,7 @@ from kartograph_agent_runtime.tools import RuntimeTooling -WORKSPACE_FILE_TOOL_NAMES = ("Read", "Grep", "Glob", "Bash") +WORKSPACE_FILE_TOOL_NAMES = ("Read", "Write", "Edit", "Grep", "Glob", "Bash") KARTOGRAPH_SCHEMA_TOOL_NAMES = ( "kartograph_get_schema_authoring_guide", diff --git a/src/agent-runtime/tests/test_schema_tools.py b/src/agent-runtime/tests/test_schema_tools.py index 8172666be..594539006 100644 --- a/src/agent-runtime/tests/test_schema_tools.py +++ b/src/agent-runtime/tests/test_schema_tools.py @@ -30,6 +30,13 @@ def test_gma_allowed_tools_include_workspace_file_tools() -> None: assert tool_name in GMA_ALLOWED_TOOL_NAMES +def test_gma_allowed_tools_include_write_and_edit() -> None: + from kartograph_agent_runtime.schema_tools import GMA_ALLOWED_TOOL_NAMES + + assert "Write" in GMA_ALLOWED_TOOL_NAMES + assert "Edit" in GMA_ALLOWED_TOOL_NAMES + + def test_gma_allowed_tools_include_bash() -> None: from kartograph_agent_runtime.schema_tools import GMA_ALLOWED_TOOL_NAMES diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 172d32be7..3c2ae8fc7 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -10,8 +10,8 @@ | Path | Access | Purpose | |------|--------|---------| | `repository-files/<data_source>/` | read-only | Source repos for Glob/Grep/Read | -| `ingestion-context/` | read-only | Sync metadata | | `instance_generators/` | **writable** | `{label}.py` scanners + `out/*_instances.json(l)` | +| rest of workspace | **writable** | Session metadata, agent-authored files | Never write to `/tmp`. Apply-from-file paths must be under `instance_generators/out/` (e.g. `instance_generators/out/test_instances.jsonl`). diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index f36212610..5c6ed70d7 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -110,6 +110,8 @@ def get_extraction_chat_turn_service( ), workdir_materializer=StickySessionWorkdirMaterializer( job_package_work_dir=Path(runtime_settings.job_package_work_dir), + container_run_uid=runtime_settings.container_run_uid, + container_run_gid=runtime_settings.container_run_gid, ), runtime_settings=runtime_settings, ) diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index 3da8989ba..ff4d22195 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -12,6 +12,9 @@ TEMPLATES_DIR, TEMPLATE_SCRIPT_NAMES, ) +from extraction.infrastructure.sticky_session_workspace_permissions import ( + ensure_agent_workspace_permissions, +) from shared_kernel.job_package.path_safety import validate_zip_entry_name from shared_kernel.job_package.reader import JobPackageReader from shared_kernel.job_package.value_objects import JobPackageId @@ -29,8 +32,16 @@ def _replace_directory(path: Path) -> None: class StickySessionWorkdirMaterializer: """Materialize JobPackage archives into a session-scoped work directory.""" - def __init__(self, *, job_package_work_dir: Path) -> None: + def __init__( + self, + *, + job_package_work_dir: Path, + container_run_uid: int | None = None, + container_run_gid: int | None = None, + ) -> None: self._job_package_work_dir = job_package_work_dir + self._container_run_uid = container_run_uid + self._container_run_gid = container_run_gid def prepare( self, @@ -102,6 +113,11 @@ def prepare( knowledge_graph_id=knowledge_graph_id, sources=index_sources, ) + ensure_agent_workspace_permissions( + session_root, + container_run_uid=self._container_run_uid, + container_run_gid=self._container_run_gid, + ) return session_root @staticmethod diff --git a/src/api/extraction/infrastructure/sticky_session_workspace_binds.py b/src/api/extraction/infrastructure/sticky_session_workspace_binds.py index 4c5647c11..32acb2d20 100644 --- a/src/api/extraction/infrastructure/sticky_session_workspace_binds.py +++ b/src/api/extraction/infrastructure/sticky_session_workspace_binds.py @@ -1,20 +1,12 @@ """Docker bind mounts for sticky session workspaces. -Repository snapshots stay read-only; ``instance_generators/`` must be writable so the -agent can author scanner scripts and JSON/JSONL outputs for bulk prepopulation. +The full workspace is writable so the agent can author scanner scripts and outputs. +``repository-files/`` is overlaid read-only so ingested source snapshots stay immutable. """ from __future__ import annotations -WORKSPACE_READONLY_SUBDIRS: tuple[str, ...] = ( - "repository-files", - "ingestion-context", -) -WORKSPACE_WRITABLE_SUBDIRS: tuple[str, ...] = ("instance_generators",) -WORKSPACE_READONLY_ROOT_FILES: tuple[str, ...] = ( - "sources-index.json", - "knowledge-graph-id", -) +WORKSPACE_READONLY_SUBDIRS: tuple[str, ...] = ("repository-files",) def build_sticky_session_workspace_binds( @@ -22,14 +14,10 @@ def build_sticky_session_workspace_binds( host_session_work_dir: str, container_work_mount: str, ) -> tuple[str, ...]: - """Return bind specs that expose a split read/write workspace layout.""" + """Return bind specs that expose a writable workspace with read-only repository files.""" host_root = host_session_work_dir.rstrip("/") container_root = container_work_mount.rstrip("/") - binds: list[str] = [] + binds = [f"{host_root}:{container_root}"] for subdir in WORKSPACE_READONLY_SUBDIRS: binds.append(f"{host_root}/{subdir}:{container_root}/{subdir}:ro") - for subdir in WORKSPACE_WRITABLE_SUBDIRS: - binds.append(f"{host_root}/{subdir}:{container_root}/{subdir}") - for filename in WORKSPACE_READONLY_ROOT_FILES: - binds.append(f"{host_root}/{filename}:{container_root}/{filename}:ro") return tuple(binds) diff --git a/src/api/extraction/infrastructure/sticky_session_workspace_permissions.py b/src/api/extraction/infrastructure/sticky_session_workspace_permissions.py new file mode 100644 index 000000000..293899707 --- /dev/null +++ b/src/api/extraction/infrastructure/sticky_session_workspace_permissions.py @@ -0,0 +1,59 @@ +"""Ensure sticky session workspaces are writable by the agent container user.""" + +from __future__ import annotations + +import os +import stat +from pathlib import Path + +_REPOSITORY_FILES_DIRNAME = "repository-files" + + +def _is_under_repository_files(path: Path, session_root: Path) -> bool: + try: + path.relative_to(session_root / _REPOSITORY_FILES_DIRNAME) + except ValueError: + return False + return True + + +def ensure_agent_workspace_permissions( + session_root: Path, + *, + container_run_uid: int | None, + container_run_gid: int | None, +) -> None: + """Grant the sticky container user write access everywhere except repository-files.""" + if container_run_uid is not None and container_run_gid is not None: + _chown_writable_tree( + session_root, + uid=container_run_uid, + gid=container_run_gid, + ) + return + _chmod_writable_tree(session_root) + + +def _chown_writable_tree(session_root: Path, *, uid: int, gid: int) -> None: + for path in sorted(session_root.rglob("*"), key=lambda item: len(item.parts), reverse=True): + if _is_under_repository_files(path, session_root): + continue + if path.is_symlink(): + continue + os.chown(path, uid, gid) + os.chmod(path, 0o775 if path.is_dir() else 0o664) + if not _is_under_repository_files(session_root, session_root): + os.chown(session_root, uid, gid) + os.chmod(session_root, 0o775) + + +def _chmod_writable_tree(session_root: Path) -> None: + for path in sorted(session_root.rglob("*"), key=lambda item: len(item.parts), reverse=True): + if _is_under_repository_files(path, session_root): + continue + if path.is_symlink(): + continue + mode = path.stat().st_mode + desired = mode | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH + os.chmod(path, desired) + session_root.chmod(session_root.stat().st_mode | stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH) diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py index d047aea3c..e9c9c6148 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py @@ -53,9 +53,8 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: assert spec.network == "kartograph_kartograph" assert spec.env["KARTOGRAPH_WORKLOAD_TOKEN"] == credentials.token assert "/tmp/skills:/app/skills:ro" in spec.binds + assert "/tmp/session-work:/workspace" in spec.binds assert "/tmp/session-work/repository-files:/workspace/repository-files:ro" in spec.binds - assert "/tmp/session-work/instance_generators:/workspace/instance_generators" in spec.binds - assert "/tmp/session-work/sources-index.json:/workspace/sources-index.json:ro" in spec.binds assert "/host/.config/gcloud:/gcloud/config:ro" in spec.binds assert spec.env["CLOUDSDK_CONFIG"] == "/gcloud/config" assert spec.env["GOOGLE_APPLICATION_CREDENTIALS"] == ( diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_binds.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_binds.py index 3fee70ac9..db27b940d 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_binds.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_binds.py @@ -7,15 +7,12 @@ ) -def test_workspace_binds_split_read_only_and_writable_paths() -> None: +def test_workspace_binds_mount_full_workspace_with_read_only_repository_files() -> None: binds = build_sticky_session_workspace_binds( host_session_work_dir="/host/session", container_work_mount="/workspace", ) + assert "/host/session:/workspace" in binds assert "/host/session/repository-files:/workspace/repository-files:ro" in binds - assert "/host/session/ingestion-context:/workspace/ingestion-context:ro" in binds - assert "/host/session/instance_generators:/workspace/instance_generators" in binds - assert "/host/session/sources-index.json:/workspace/sources-index.json:ro" in binds - assert "/host/session/knowledge-graph-id:/workspace/knowledge-graph-id:ro" in binds - assert not any(bind.endswith("/workspace:ro") for bind in binds) + assert len(binds) == 2 diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_permissions.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_permissions.py new file mode 100644 index 000000000..94e3b58cb --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workspace_permissions.py @@ -0,0 +1,77 @@ +"""Unit tests for sticky session workspace permission normalization.""" + +from __future__ import annotations + +import os +import stat +from pathlib import Path + +from extraction.infrastructure.sticky_session_workspace_permissions import ( + ensure_agent_workspace_permissions, +) + + +def test_chown_grants_container_user_write_access_outside_repository_files(tmp_path: Path) -> None: + session_root = tmp_path / "session" + generators = session_root / "instance_generators" / "out" + repo_files = session_root / "repository-files" / "hyperfleet-e2e" + generators.mkdir(parents=True) + repo_files.mkdir(parents=True) + (generators / "test_instances.json").write_text("[]\n", encoding="utf-8") + (repo_files / "example.go").write_text("package main\n", encoding="utf-8") + + target_uid = os.getuid() + target_gid = os.getgid() + ensure_agent_workspace_permissions( + session_root, + container_run_uid=target_uid, + container_run_gid=target_gid, + ) + + assert (generators / "test_instances.json").stat().st_uid == target_uid + assert (generators / "test_instances.json").stat().st_gid == target_gid + assert (session_root / "instance_generators").stat().st_mode & stat.S_IWUSR + + +def test_chmod_fallback_makes_writable_paths_world_writable(tmp_path: Path) -> None: + session_root = tmp_path / "session" + out_dir = session_root / "instance_generators" / "out" + out_dir.mkdir(parents=True) + out_dir.chmod(0o755) + + ensure_agent_workspace_permissions( + session_root, + container_run_uid=None, + container_run_gid=None, + ) + + mode = out_dir.stat().st_mode + assert mode & stat.S_IWOTH + + +def test_materializer_applies_container_user_permissions(tmp_path: Path) -> None: + from extraction.infrastructure.sticky_session_workdir_materializer import ( + StickySessionWorkdirMaterializer, + ) + + target_uid = os.getuid() + target_gid = os.getgid() + materializer = StickySessionWorkdirMaterializer( + job_package_work_dir=tmp_path, + container_run_uid=target_uid, + container_run_gid=target_gid, + ) + + session_root = materializer.prepare( + session_id="session-perms", + knowledge_graph_id="kg-1", + job_packages=(), + ) + + scanner_path = session_root / "instance_generators" / "E2ETest.py" + scanner_path.write_text("# scanner\n", encoding="utf-8") + out_path = session_root / "instance_generators" / "out" / "E2ETest_instances.json" + out_path.write_text("[]\n", encoding="utf-8") + + assert scanner_path.stat().st_uid == target_uid + assert out_path.stat().st_uid == target_uid From fa8b3efd3931d2cb7ff3d1802ed48b77e0e0a61f Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 5 Jun 2026 14:07:53 -0400 Subject: [PATCH 110/153] feat(extraction): strengthen GMA prepopulation templates and readiness Add workflow docs, scanner helpers/examples, preview_instances, and readiness next_action hints. Preserve case-sensitive ontology labels in JSONL converters. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 32 ++++- .../kartograph_agent_runtime/executor.py | 7 +- src/agent-runtime/tests/test_agent_prompt.py | 18 ++- .../application/schema_authoring_guide.py | 35 +++-- .../application/skill_resolution_service.py | 33 +++-- .../PREPOPULATION_WORKFLOW.md | 121 ++++++++++++++++ .../instance_generator_templates/README.md | 46 +++--- .../instance_generator_templates/__init__.py | 12 ++ .../_entity_scanner.example.py | 62 +++++--- .../_relationship_scanner.example.py | 61 ++++++++ .../entities_to_jsonl.py | 13 +- .../examples/adapter_scanner.example.py | 64 +++++++++ .../examples/api_endpoint_scanner.example.py | 68 +++++++++ .../examples/resource_scanner.example.py | 63 +++++++++ .../examples/test_scanner.example.py | 80 +++++++++++ .../preview_instances.py | 65 +++++++++ .../relationships_to_jsonl.py | 22 ++- .../scanner_common.py | 67 +++++++++ .../sticky_session_workdir_materializer.py | 8 ++ .../presentation/workload_routes.py | 2 + .../workspace_readiness.py | 133 ++++++++++++++++++ .../infrastructure/test_entities_to_jsonl.py | 20 +++ .../infrastructure/test_preview_instances.py | 40 ++++++ .../infrastructure/test_scanner_common.py | 36 +++++ ...est_sticky_session_workdir_materializer.py | 6 + .../presentation/test_workload_routes.py | 3 +- .../test_workspace_readiness.py | 6 + 27 files changed, 1047 insertions(+), 76 deletions(-) create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/_relationship_scanner.example.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/examples/adapter_scanner.example.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/examples/api_endpoint_scanner.example.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/examples/resource_scanner.example.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/examples/test_scanner.example.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/preview_instances.py create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/scanner_common.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_preview_instances.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_scanner_common.py diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 6d74c2c14..b25b71699 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -39,14 +39,16 @@ | `Edit` | Update existing workspace files (e.g. refine a scanner script) | | `Grep` | Search file contents in `repository-files/<data_source>/` | | `Glob` | List files by pattern for instance generation | -| `Bash` | Run `instance_generators/*.py` against `repository-files/` (workspace only) | +| `Bash` | Run scanners and `preview_instances.py` against `repository-files/` | + +See `instance_generators/PREPOPULATION_WORKFLOW.md` for the numbered prepopulation checklist. ### Quick workflow 1. `kartograph_get_schema_authoring_guide` 2. `kartograph_get_workspace_readiness` 3. `kartograph_get_schema_ontology` -4. Prepopulation: `{label}.py` → `out/{label}_instances.json` → `entities_to_jsonl.py` → apply-from-file +4. Prepopulation: `{Label}.py` (case-sensitive) → `out/{Label}_instances.json` → `preview_instances.py` → `entities_to_jsonl.py` → apply-from-file 5. Model types → `kartograph_save_schema_ontology` 6. Apply CREATE mutations → `kartograph_apply_graph_mutations` (small fixes inline; bulk via generator output) 7. Create relationship edges after entity IDs are known @@ -65,11 +67,16 @@ def _format_workspace_readiness(readiness: dict[str, Any]) -> str: lines = ["## Workspace readiness (live snapshot)"] + next_action = str(readiness.get("next_action") or "").strip() + if next_action: + lines.append(f"- **Next action:** {next_action}") + entity_gaps = readiness.get("prepopulated_entity_types_without_instances_live") or [] rel_gaps = readiness.get("prepopulated_relationship_types_without_instances_live") or [] blocking = readiness.get("blocking_reasons") or [] prepopulated_types = readiness.get("prepopulated_entity_types") or [] prepopulated_relationships = readiness.get("prepopulated_relationship_types") or [] + prepopulation_tasks = readiness.get("prepopulation_tasks") or [] if entity_gaps: lines.append( @@ -85,6 +92,23 @@ def _format_workspace_readiness(readiness: dict[str, Any]) -> str: + ", ".join(f"`{key}`" for key in rel_gaps) ) + if prepopulation_tasks: + lines.append("- Prepopulation tasks:") + for task in prepopulation_tasks[:8]: + if not isinstance(task, dict): + continue + kind = str(task.get("kind") or "task") + if kind == "entity": + label = str(task.get("label") or "?") + live = task.get("live_instance_count", 0) + scanner = str(task.get("scanner_path") or "?") + lines.append(f" - `{label}` ({live} live) → create `{scanner}`") + else: + key = str(task.get("key") or "?") + live = task.get("live_instance_count", 0) + scanner = str(task.get("scanner_path") or "?") + lines.append(f" - `{key}` ({live} live) → create `{scanner}`") + if prepopulated_types: lines.append("- Prepopulated entity coverage:") for row in prepopulated_types: @@ -93,7 +117,9 @@ def _format_workspace_readiness(readiness: dict[str, Any]) -> str: label = str(row.get("label") or "?") live = row.get("live_instance_count", 0) metadata = row.get("metadata_instance_count", 0) - lines.append(f" - `{label}`: live={live}, metadata={metadata}") + required = row.get("required_properties") or [] + req_hint = f", required={list(required)}" if required else "" + lines.append(f" - `{label}`: live={live}, metadata={metadata}{req_hint}") if prepopulated_relationships: lines.append("- Prepopulated relationship coverage:") diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 7b7df1b5d..263c6240b 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -60,9 +60,10 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: ( "Read-only: `repository-files/`. " "Writable: entire workspace except repository snapshots — " - "`instance_generators/` for `{label}.py` and `out/{label}_instances.json(l)`. " - "Platform converters: `entities_to_jsonl.py`, `relationships_to_jsonl.py`. " - "Never `/tmp`. One batch per gap via apply-from-file." + "see `instance_generators/PREPOPULATION_WORKFLOW.md`. " + "`instance_generators/{Label}.py` and `out/{Label}_instances.json(l)` (case-sensitive). " + "Platform: `entities_to_jsonl.py`, `relationships_to_jsonl.py`, `preview_instances.py`, " + "`scanner_common.py`. Never `/tmp`. One batch per gap via apply-from-file." ), ] for source in sources[:12]: diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index ebf7b1d33..a56fecfb7 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -46,10 +46,24 @@ def test_build_agent_system_prompt_includes_workspace_readiness() -> None: KARTOGRAPH_KNOWLEDGE_GRAPH_ID="kg-123", ), workspace_readiness={ + "next_action": "Run entity prepopulation for `folder`.", + "prepopulation_tasks": [ + { + "kind": "entity", + "label": "folder", + "live_instance_count": 0, + "scanner_path": "instance_generators/folder.py", + } + ], "prepopulated_entity_types_without_instances_live": ["folder"], "prepopulated_relationship_types_without_instances_live": [], "prepopulated_entity_types": [ - {"label": "folder", "live_instance_count": 0, "metadata_instance_count": 0} + { + "label": "folder", + "live_instance_count": 0, + "metadata_instance_count": 0, + "required_properties": ["name"], + } ], "blocking_reasons": ["Prepopulated entity types require instances before transition: folder"], "transition_eligible": False, @@ -57,6 +71,8 @@ def test_build_agent_system_prompt_includes_workspace_readiness() -> None: ) assert "Workspace readiness" in prompt + assert "Next action" in prompt + assert "instance_generators/folder.py" in prompt assert "`folder`" in prompt assert "kartograph_get_workspace_readiness" in prompt assert "Read" in prompt diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 3c2ae8fc7..0c6dc190a 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -13,11 +13,16 @@ | `instance_generators/` | **writable** | `{label}.py` scanners + `out/*_instances.json(l)` | | rest of workspace | **writable** | Session metadata, agent-authored files | -Never write to `/tmp`. Apply-from-file paths must be under `instance_generators/out/` -(e.g. `instance_generators/out/test_instances.jsonl`). +Never write to `/tmp` — files there are outside the sticky workspace and cannot be used with +apply-from-file. If `instance_generators/` is not writable, report the error; do not work around it. -Bundled platform scripts (do not edit): `entities_to_jsonl.py`, `relationships_to_jsonl.py`. -Copy `_entity_scanner.example.py` to `{label}.py` for each prepopulated type. +Read `instance_generators/PREPOPULATION_WORKFLOW.md` for the numbered six-step entity pipeline, +relationship workflow, slug rules, batch sizes, and verification checklist. + +Bundled platform scripts (do not edit): `entities_to_jsonl.py`, `relationships_to_jsonl.py`, +`preview_instances.py`, `scanner_common.py`. +Copy `_entity_scanner.example.py` to `{Label}.py` — **filename must match ontology label exactly** +(case-sensitive: `E2ETest.py`, not `e2etest.py`). Domain references: `instance_generators/examples/`. ## Bootstrap workflow (6 phases) @@ -35,11 +40,12 @@ **Entities** (all entity gaps before any relationship gap): ```bash -python3 instance_generators/test.py repository-files > instance_generators/out/test_instances.json -python3 instance_generators/entities_to_jsonl.py test \\ +python3 instance_generators/E2ETest.py repository-files > instance_generators/out/E2ETest_instances.json +python3 instance_generators/preview_instances.py E2ETest --limit 5 +python3 instance_generators/entities_to_jsonl.py E2ETest \\ --data-source-id schema-bootstrap \\ - instance_generators/out/test_instances.json > instance_generators/out/test_instances.jsonl -# validate-from-file → apply-from-file path=instance_generators/out/test_instances.jsonl + instance_generators/out/E2ETest_instances.json > instance_generators/out/E2ETest_instances.jsonl +# validate-from-file → apply-from-file path=instance_generators/out/E2ETest_instances.jsonl ``` **Relationships** (after entity slugs exist; name files `{source}_{rel}_{target}_instances.*`): @@ -91,7 +97,16 @@ } ``` -Scanner script convention: `instance_generators/{label}.py` → `out/{label}_instances.json`. +Scanner script convention: `instance_generators/{Label}.py` → `out/{Label}_instances.json` +(case-sensitive `{Label}` matching ontology). + +## Slug and property rules + +- Slugs: lowercase snake_case via `scanner_common.generate_slug()`; dedupe with `dedupe_instances()`. +- Required properties: see `required_properties` on each type in ontology/readiness — include in every instance. +- Optional properties: omit or use empty defaults when source data is incomplete. +- Single deliverable (one entity type): run the full pipeline without stopping. +- Multiple deliverables: one label per turn, then report and continue. ## Relationship type shape @@ -120,5 +135,5 @@ - Every `prepopulated=true` relationship type needs ≥1 live edge. - Prepopulated relationships may only reference prepopulated entity types. -Call `kartograph_get_workspace_readiness` for gaps and `blocking_reasons`. +Call `kartograph_get_workspace_readiness` for gaps, `next_action`, `prepopulation_tasks`, and `blocking_reasons`. """.strip() diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 24176d756..200dbb51e 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -59,16 +59,22 @@ class ResolvedExtractionSkillPack: "Exception: the user explicitly says to save/apply or continues after reviewing your draft." ), ( - "Prepopulation (prepopulated=true types): write instance_generators/{label}.py → " - "out/{label}_instances.json → entities_to_jsonl.py or relationships_to_jsonl.py → " - "validate/apply instance_generators/out/{label}_instances.jsonl in one batch. " - "Never /tmp, never hand-author CREATE lines. All entity gaps before relationship gaps." + "Prepopulation (prepopulated=true types): copy _entity_scanner.example.py to " + "instance_generators/{Label}.py (case-sensitive — must match ontology label exactly) → " + "out/{Label}_instances.json → preview_instances.py (optional) → entities_to_jsonl.py → " + "validate/apply instance_generators/out/{Label}_instances.jsonl in one batch. " + "Never /tmp (not persisted, invalid for apply-from-file). All entity gaps before relationship gaps." + ), + ( + "Single prepopulation deliverable (one entity or relationship type): execute the full " + "PREPOPULATION_WORKFLOW.md pipeline end-to-end without stopping for permission. " + "Multiple deliverables in one message: one label per turn, summarize, then continue." ), ( "When readiness shows prepopulated gaps after schema save, execute immediately — do not ask " - "permission. One label per turn: copy _entity_scanner.example.py to {label}.py, customize " - "scan(), run pipeline, re-check readiness. Only ask when discovery strategy is ambiguous " - "or strict CREATE reports duplicates." + "permission. One label per turn unless the user requested a single type only (then finish fully). " + "Use readiness next_action and prepopulation_tasks for the suggested scanner path. " + "Only ask when discovery strategy is ambiguous or strict CREATE reports duplicates." ), ), }, @@ -122,15 +128,16 @@ class ResolvedExtractionSkillPack: "Read/save ontology via kartograph_get_schema_ontology and kartograph_save_schema_ontology." ), "prepopulation": ( - "Per prepopulated gap: {label}.py scans repository-files/ → out/{label}_instances.json → " - "entities_to_jsonl.py or relationships_to_jsonl.py → out/{label}_instances.jsonl → " - "validate/apply-from-file (one batch). Entities before relationships. Primary edges only." + "Follow instance_generators/PREPOPULATION_WORKFLOW.md. Per gap: {Label}.py (case-sensitive filename) " + "→ out/{Label}_instances.json → preview_instances.py (optional) → entities_to_jsonl.py or " + "relationships_to_jsonl.py → validate/apply-from-file. Use scanner_common.generate_slug() and " + "dedupe_instances(). Entities before relationships. Primary edges only." ), "readiness_reporting": ( "After schema or prepopulation work, call kartograph_get_workspace_readiness and cite " - "blocking_reasons, prepopulated gaps, and transition_eligible. When gaps remain after " - "schema save, state which single prepopulation task you are executing next — do not poll " - "the user for permission to start." + "next_action, prepopulation_tasks, blocking_reasons, and transition_eligible. When gaps remain " + "after schema save, state which single prepopulation task you are executing next — do not poll " + "the user for permission to start unless the user asked for multiple types at once." ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { diff --git a/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md b/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md new file mode 100644 index 000000000..d57cdc876 --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md @@ -0,0 +1,121 @@ +# Entity and relationship prepopulation workflow + +Use this checklist for every `prepopulated: true` type after the ontology is saved. + +## Entity prepopulation (six steps) + +### Step 1 — Create scanner + +Copy `_entity_scanner.example.py` to `instance_generators/{Label}.py`. + +- **Filename must match the ontology `label` exactly** (case-sensitive): `E2ETest.py`, not `e2etest.py`. +- Customize `scan()` to discover instances across **all** `repository-files/<data_source>/` folders. +- Import helpers from `scanner_common.py` (`generate_slug`, `dedupe_instances`). + +Domain-specific references live in `instance_generators/examples/`. + +### Step 2 — Run scanner + +```bash +python3 instance_generators/{Label}.py repository-files \ + > instance_generators/out/{Label}_instances.json +``` + +Stdout contract: `[{"slug": "kebab-or-snake-case", "properties": {...}}, ...]` + +### Step 3 — Preview (optional, recommended) + +```bash +python3 instance_generators/preview_instances.py {Label} --limit 5 +``` + +Fix scanner logic before JSONL conversion if slugs or properties look wrong. + +### Step 4 — Convert to JSONL + +```bash +python3 instance_generators/entities_to_jsonl.py {Label} \ + --data-source-id schema-bootstrap \ + instance_generators/out/{Label}_instances.json \ + > instance_generators/out/{Label}_instances.jsonl +``` + +The CLI `{Label}` must match the ontology entity type **exactly** (case-sensitive). +`entities_to_jsonl.py` preserves that casing in CREATE `label` lines. + +### Step 5 — Validate (dry run) + +`kartograph_validate_graph_mutations_from_file` with path `instance_generators/out/{Label}_instances.jsonl`. + +CREATE is strict — duplicates fail here, not at apply time. + +### Step 6 — Apply and verify + +`kartograph_apply_graph_mutations_from_file` with the same path, then: + +1. Confirm apply result reports created count. +2. `kartograph_get_workspace_readiness()` — live count should increase; label leaves entity gaps. +3. `kartograph_list_instances_by_type(entity_type="{label}")` — spot-check slugs. + +## Relationship prepopulation (after all entity gaps) + +Copy `_relationship_scanner.example.py` to `instance_generators/{Source}_{rel}_{Target}.py`. + +```bash +python3 instance_generators/{Source}_{rel}_{Target}.py repository-files \ + > instance_generators/out/{Source}_{rel}_{Target}_instances.json + +python3 instance_generators/relationships_to_jsonl.py {rel} {source} {target} \ + instance_generators/out/{Source}_{rel}_{Target}_instances.json \ + > instance_generators/out/{Source}_{rel}_{Target}_instances.jsonl +``` + +Author **primary direction only** — the platform creates inverse twins for bidirectional types. + +## Slug rules + +- Lowercase snake_case (use `generate_slug()` from `scanner_common.py`). +- Unique within the entity type. +- Deduplicate before writing JSON (`dedupe_instances()`). + +## Required vs optional properties + +Check `kartograph_get_schema_ontology` or readiness `prepopulated_entity_types[].required_properties`. + +- **Required:** must appear in every instance `properties` object (use sensible defaults if source data lacks them). +- **Optional:** omit or set `null`/empty when unknown. +- **Description:** auto-generate from slug when missing, e.g. `f"E2E test: {slug}"`. + +## Entity order + +Entities can be prepopulated in any order — relationships run after all entity gaps close. + +Suggested dependency order for clarity: + +1. Resource +2. APIEndpoint +3. Adapter +4. ComponentTest / E2ETest + +## Batch sizes + +- Recommended: **100–500** instances per JSONL file. +- Above **1000**: split into `{Label}_instances_001.jsonl`, `{Label}_instances_002.jsonl`, apply each batch separately. + +## Never use `/tmp` + +Files under `/tmp` are outside the sticky workspace mount and are not valid for apply-from-file. +If you hit permission errors on `instance_generators/`, report them — do not work around with `/tmp`. + +## Single vs multi deliverable + +- **One entity type requested** (e.g. "prepopulate E2ETest"): run steps 1–6 end-to-end without stopping. +- **Multiple types requested**: complete one label per turn, report, then continue. + +## Success criteria + +Prepopulation for one label is complete when: + +- Apply succeeds with expected instance count. +- `kartograph_get_workspace_readiness()` shows zero live gap for that label. +- Spot-check via `kartograph_list_instances_by_type` looks correct. diff --git a/src/api/extraction/infrastructure/instance_generator_templates/README.md b/src/api/extraction/infrastructure/instance_generator_templates/README.md index ba91fff14..7f7e1db7a 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/README.md +++ b/src/api/extraction/infrastructure/instance_generator_templates/README.md @@ -4,47 +4,59 @@ Prepopulation for `prepopulated: true` types uses **three kinds of files**: | File | Who writes it | Purpose | |------|---------------|---------| -| `{label}.py` | Agent | Scans `repository-files/` → JSON array on stdout | +| `{Label}.py` | Agent | Scans `repository-files/` → JSON array on stdout | | `entities_to_jsonl.py` | Platform | `{label}_instances.json` → `{label}_instances.jsonl` | | `relationships_to_jsonl.py` | Platform | `{key}_instances.json` → `{key}_instances.jsonl` | -Copy `_entity_scanner.example.py` to `{entity_label}.py` and replace the `scan()` body. +**Read `PREPOPULATION_WORKFLOW.md` first** — it documents the full six-step entity pipeline, +relationship workflow, slug rules, batch sizes, and verification. + +## Naming (case-sensitive) + +| Item | Convention | +|------|------------| +| Entity scanner | `instance_generators/{Label}.py` — must match ontology `label` exactly (`E2ETest.py`, not `e2etest.py`) | +| Entity output | `instance_generators/out/{Label}_instances.json` | +| Relationship scanner | `instance_generators/{Source}_{rel}_{Target}.py` | +| Relationship output | `instance_generators/out/{Source}_{rel}_{Target}_instances.json` | + +Copy `_entity_scanner.example.py` to `{Label}.py` or start from `examples/` for domain patterns. ## Entity prepopulation (one type per turn) ```bash -python3 instance_generators/test.py repository-files \ - > instance_generators/out/test_instances.json +python3 instance_generators/E2ETest.py repository-files \ + > instance_generators/out/E2ETest_instances.json -python3 instance_generators/entities_to_jsonl.py test \ +python3 instance_generators/preview_instances.py E2ETest --limit 5 + +python3 instance_generators/entities_to_jsonl.py E2ETest \ --data-source-id schema-bootstrap \ - instance_generators/out/test_instances.json \ - > instance_generators/out/test_instances.jsonl + instance_generators/out/E2ETest_instances.json \ + > instance_generators/out/E2ETest_instances.jsonl ``` Then `kartograph_validate_graph_mutations_from_file` and `kartograph_apply_graph_mutations_from_file` with path -`instance_generators/out/test_instances.jsonl` (one batch for all instances). +`instance_generators/out/E2ETest_instances.jsonl`. ## Relationship prepopulation (after all entity gaps) -Naming: `out/{source}_{relationship}_{target}_instances.json` (e.g. `repository_defines_test_instances.json`). - ```bash -python3 instance_generators/repository_defines_test.py repository-files \ - > instance_generators/out/repository_defines_test_instances.json +python3 instance_generators/ComponentTest_exercises_APIEndpoint.py repository-files \ + > instance_generators/out/ComponentTest_exercises_APIEndpoint_instances.json -python3 instance_generators/relationships_to_jsonl.py defines repository test \ - instance_generators/out/repository_defines_test_instances.json \ - > instance_generators/out/repository_defines_test_instances.jsonl +python3 instance_generators/relationships_to_jsonl.py exercises component_test api_endpoint \ + instance_generators/out/ComponentTest_exercises_APIEndpoint_instances.json \ + > instance_generators/out/ComponentTest_exercises_APIEndpoint_instances.jsonl ``` ## Scanner JSON contract -**Entities:** `[{"slug": "kebab-case", "properties": {...}}]` +**Entities:** `[{"slug": "snake_case", "properties": {...}}]` **Relationships:** `[{"source_slug": "...", "target_slug": "...", "properties": {}}]` -Include `source_path` in `properties` only when you need provenance on that instance. +Use `scanner_common.generate_slug()` and `dedupe_instances()` / `dedupe_relationships()`. Never write output to `/tmp` — only `instance_generators/out/` is valid for apply-from-file. diff --git a/src/api/extraction/infrastructure/instance_generator_templates/__init__.py b/src/api/extraction/infrastructure/instance_generator_templates/__init__.py index 79bae8585..b300d9bf6 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/__init__.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/__init__.py @@ -3,10 +3,22 @@ from pathlib import Path TEMPLATES_DIR = Path(__file__).resolve().parent +EXAMPLES_DIR = TEMPLATES_DIR / "examples" TEMPLATE_SCRIPT_NAMES = ( "_entity_scanner.example.py", + "_relationship_scanner.example.py", "entities_to_jsonl.py", "relationships_to_jsonl.py", + "preview_instances.py", + "scanner_common.py", "README.md", + "PREPOPULATION_WORKFLOW.md", +) + +EXAMPLE_SCANNER_NAMES = ( + "test_scanner.example.py", + "api_endpoint_scanner.example.py", + "resource_scanner.example.py", + "adapter_scanner.example.py", ) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/_entity_scanner.example.py b/src/api/extraction/infrastructure/instance_generator_templates/_entity_scanner.example.py index a47d944ab..e13f0142b 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/_entity_scanner.example.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/_entity_scanner.example.py @@ -1,12 +1,18 @@ #!/usr/bin/env python3 """Example entity scanner — copy to ``{entity_label}.py`` and customize discovery logic. +NAMING CONVENTION: The filename MUST match the ontology entity type ``label`` exactly +(case-sensitive). Examples: ``E2ETest.py``, ``APIEndpoint.py``, ``ComponentTest.py``. +Do not use lowercase variants like ``e2etest.py``. + Contract: - argv[1]: path to ``repository-files/`` (one folder per data source) - stdout: JSON array of ``{"slug": "...", "properties": {...}}`` sorted deterministically -- stderr: optional progress logging only +- stderr: progress logging only Output file convention: ``instance_generators/out/{entity_label}_instances.json`` + +See ``PREPOPULATION_WORKFLOW.md`` for the full six-step pipeline. """ from __future__ import annotations @@ -14,31 +20,43 @@ import json import sys from pathlib import Path +from typing import Any + +from scanner_common import dedupe_instances, generate_slug def scan(repository_files: Path) -> list[dict]: """Find every instance of this entity type across all data sources.""" - instances: list[dict] = [] - for data_source_dir in sorted(repository_files.iterdir()): - if not data_source_dir.is_dir() or data_source_dir.name.startswith("."): - continue - # Example: one instance per *_test.go file — replace with your entity's discovery rules. - for file_path in sorted(data_source_dir.rglob("*_test.go")): - if not file_path.is_file(): - continue - rel = file_path.relative_to(data_source_dir) - slug = f"{data_source_dir.name}-{str(rel).replace('/', '-').replace('_', '-')}".lower() - instances.append( - { - "slug": slug, - "properties": { - "name": file_path.name, - "file_path": str(rel), - "data_source": data_source_dir.name, - }, - } - ) - return sorted(instances, key=lambda row: row["slug"]) + instances: list[dict[str, Any]] = [] + files = sorted(path for path in repository_files.rglob("*_test.go") if path.is_file()) + print(f"Found {len(files)} candidate file(s)...", file=sys.stderr) + + for index, file_path in enumerate(files): + if index > 0 and index % 25 == 0: + print(f"Progress: {index}/{len(files)} files scanned...", file=sys.stderr) + data_source_dir = next( + (parent for parent in file_path.parents if parent.parent == repository_files), + repository_files, + ) + rel = file_path.relative_to(data_source_dir) + slug = generate_slug(f"{data_source_dir.name}-{rel.stem}") + instances.append( + { + "slug": slug, + "properties": { + "name": file_path.stem, + "file_path": str(rel), + "data_source": data_source_dir.name, + "description": f"Example instance from {rel}", + }, + } + ) + + unique, skipped = dedupe_instances(instances) + if skipped: + print(f"Skipped {skipped} duplicate slug(s).", file=sys.stderr) + print(f"Scan complete: {len(unique)} instance(s).", file=sys.stderr) + return unique if __name__ == "__main__": diff --git a/src/api/extraction/infrastructure/instance_generator_templates/_relationship_scanner.example.py b/src/api/extraction/infrastructure/instance_generator_templates/_relationship_scanner.example.py new file mode 100644 index 000000000..40338926c --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/_relationship_scanner.example.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +"""Example relationship scanner — copy to ``{source}_{rel}_{target}.py``. + +NAMING CONVENTION: Filename uses source entity label, relationship label, and target +entity label from the ontology (case-sensitive), e.g. ``ComponentTest_exercises_APIEndpoint.py``. + +Contract: +- argv[1]: path to ``repository-files/`` (one folder per data source) +- stdout: JSON array of ``{"source_slug": "...", "target_slug": "...", "properties": {...}}`` +- stderr: progress logging only + +Output file convention: +``instance_generators/out/{source}_{rel}_{target}_instances.json`` + +Prerequisites: +- Entity prepopulation finished for both endpoint types (slugs must exist in the graph). +- Use ``kartograph_list_instances_by_type`` or ``kartograph_check_graph_slugs`` when matching. +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path +from typing import Any + +from scanner_common import dedupe_relationships, generate_slug + + +def scan(repository_files: Path) -> list[dict[str, Any]]: + """Discover relationship instances across all data sources.""" + relationships: list[dict[str, Any]] = [] + files = sorted(path for path in repository_files.rglob("*") if path.is_file()) + print(f"Scanning {len(files)} files for relationships...", file=sys.stderr) + + for index, file_path in enumerate(files): + if index > 0 and index % 50 == 0: + print(f"Progress: {index}/{len(files)} files...", file=sys.stderr) + # Replace with logic that maps repository evidence to known entity slugs. + source_slug = generate_slug(file_path.stem) + target_slug = generate_slug(f"{file_path.parent.name}-target") + relationships.append( + { + "source_slug": source_slug, + "target_slug": target_slug, + "properties": { + "source_path": str(file_path.relative_to(repository_files)), + }, + } + ) + + unique, skipped = dedupe_relationships(relationships) + if skipped: + print(f"Skipped {skipped} duplicate relationship row(s).", file=sys.stderr) + print(f"Scan complete: {len(unique)} relationship(s).", file=sys.stderr) + return unique + + +if __name__ == "__main__": + root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") + print(json.dumps(scan(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py index 0b794ab0c..81fd57c6b 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/entities_to_jsonl.py @@ -29,12 +29,18 @@ def deterministic_node_id(*, entity_label: str, slug: str, tenant_id: str = "") -> str: + """Stable node id prefix — normalized to lowercase for deterministic hashing.""" normalized_type = entity_label.strip().lower() combined = f"{tenant_id}:{normalized_type}:{slug.strip()}" digest = hashlib.sha256(combined.encode()).hexdigest()[:16] return f"{normalized_type}:{digest}" +def mutation_entity_label(entity_label: str) -> str: + """Return the CREATE label — must match ontology ``label`` exactly (case-sensitive).""" + return entity_label.strip() + + def instance_to_create_line( *, entity_label: str, @@ -55,7 +61,7 @@ def instance_to_create_line( slug=slug, tenant_id=tenant_id, ), - "label": entity_label.strip().lower(), + "label": mutation_entity_label(entity_label), "set_properties": set_properties, } @@ -81,7 +87,10 @@ def main() -> int: parser = argparse.ArgumentParser( description="Convert entity scanner JSON to Kartograph node CREATE JSONL.", ) - parser.add_argument("entity_label", help="Entity type label (e.g. test, api_endpoint).") + parser.add_argument( + "entity_label", + help="Entity type label matching ontology exactly (case-sensitive, e.g. APIEndpoint).", + ) parser.add_argument("input", nargs="?", help="Path to JSON file; omit to read stdin.") parser.add_argument("--tenant-id", default="", help="Tenant id for deterministic node ids.") parser.add_argument("--data-source-id", default="schema-bootstrap") diff --git a/src/api/extraction/infrastructure/instance_generator_templates/examples/adapter_scanner.example.py b/src/api/extraction/infrastructure/instance_generator_templates/examples/adapter_scanner.example.py new file mode 100644 index 000000000..088ecef18 --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/examples/adapter_scanner.example.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +"""Reference scanner for Adapter entities. + +Looks for adapter registration/config files. Copy to ``instance_generators/Adapter.py``. +""" + +from __future__ import annotations + +import json +import re +import sys +from pathlib import Path +from typing import Any + +from scanner_common import dedupe_instances, generate_slug + +_ADAPTER_NAME = re.compile(r"adapter[_-]?name[\"']?\s*[:=]\s*[\"']([^\"']+)[\"']", re.IGNORECASE) + + +def scan(repository_files: Path) -> list[dict[str, Any]]: + instances: list[dict[str, Any]] = [] + patterns = ("**/adapter/**/*.yaml", "**/adapters/**/*.yaml", "**/*adapter*.go") + files: list[Path] = [] + for data_source_dir in sorted(repository_files.iterdir()): + if not data_source_dir.is_dir(): + continue + for pattern in patterns: + files.extend(sorted(data_source_dir.glob(pattern))) + files = sorted({path for path in files if path.is_file()}) + print(f"Found {len(files)} adapter candidate file(s)...", file=sys.stderr) + + for index, file_path in enumerate(files): + if index > 0 and index % 20 == 0: + print(f"Progress: {index}/{len(files)}...", file=sys.stderr) + data_source = file_path.relative_to(repository_files).parts[0] + rel = file_path.relative_to(repository_files / data_source) + content = file_path.read_text(encoding="utf-8", errors="replace") + names = [match.group(1) for match in _ADAPTER_NAME.finditer(content)] + if not names: + names = [file_path.stem] + for name in names: + slug = generate_slug(name) + instances.append( + { + "slug": slug, + "properties": { + "name": name, + "file_path": str(rel), + "data_source": data_source, + "description": f"Adapter: {name}", + }, + } + ) + + unique, skipped = dedupe_instances(instances) + if skipped: + print(f"Skipped {skipped} duplicate slug(s).", file=sys.stderr) + print(f"Scan complete: {len(unique)} adapter(s).", file=sys.stderr) + return unique + + +if __name__ == "__main__": + root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") + print(json.dumps(scan(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/examples/api_endpoint_scanner.example.py b/src/api/extraction/infrastructure/instance_generator_templates/examples/api_endpoint_scanner.example.py new file mode 100644 index 000000000..5186f7b21 --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/examples/api_endpoint_scanner.example.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Reference scanner for APIEndpoint entities. + +Looks for route registrations and OpenAPI-style path declarations. +Copy to ``instance_generators/APIEndpoint.py`` and customize patterns. +""" + +from __future__ import annotations + +import json +import re +import sys +from pathlib import Path +from typing import Any + +from scanner_common import dedupe_instances, generate_slug + +_ROUTE_PATTERNS = ( + re.compile(r"@app\.(get|post|put|patch|delete)\(\s*[\"']([^\"']+)[\"']"), + re.compile(r"router\.(Get|Post|Put|Patch|Delete)\(\s*[\"']([^\"']+)[\"']"), + re.compile(r"HandleFunc\(\s*[\"']([^\"']+)[\"']"), +) + + +def scan(repository_files: Path) -> list[dict[str, Any]]: + instances: list[dict[str, Any]] = [] + files = sorted(path for path in repository_files.rglob("*") if path.is_file()) + print(f"Scanning {len(files)} file(s) for API endpoints...", file=sys.stderr) + + for index, file_path in enumerate(files): + if file_path.suffix not in {".go", ".py", ".ts", ".yaml", ".yml"}: + continue + if index > 0 and index % 40 == 0: + print(f"Progress: {index}/{len(files)}...", file=sys.stderr) + data_source = next( + (parent.name for parent in file_path.parents if parent.parent == repository_files), + "unknown", + ) + rel = file_path.relative_to(repository_files / data_source) + content = file_path.read_text(encoding="utf-8", errors="replace") + for pattern in _ROUTE_PATTERNS: + for match in pattern.finditer(content): + path_value = match.group(match.lastindex or 1) + method = match.group(1).upper() if match.lastindex and match.lastindex > 1 else "GET" + slug = generate_slug(f"{method}-{path_value}") + instances.append( + { + "slug": slug, + "properties": { + "name": path_value, + "method": method, + "path": path_value, + "file_path": str(rel), + "description": f"{method} {path_value}", + }, + } + ) + + unique, skipped = dedupe_instances(instances) + if skipped: + print(f"Skipped {skipped} duplicate slug(s).", file=sys.stderr) + print(f"Scan complete: {len(unique)} endpoint(s).", file=sys.stderr) + return unique + + +if __name__ == "__main__": + root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") + print(json.dumps(scan(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/examples/resource_scanner.example.py b/src/api/extraction/infrastructure/instance_generator_templates/examples/resource_scanner.example.py new file mode 100644 index 000000000..185838cfb --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/examples/resource_scanner.example.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Reference scanner for Resource entities (K8s/custom resources, config files). + +Copy to ``instance_generators/Resource.py`` and adapt resource kinds / globs. +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path +from typing import Any + +from scanner_common import dedupe_instances, generate_slug + +_RESOURCE_GLOBS = ( + "**/*.yaml", + "**/*.yml", + "**/templates/**/*.yaml", + "**/manifests/**/*.yaml", +) + + +def scan(repository_files: Path) -> list[dict[str, Any]]: + instances: list[dict[str, Any]] = [] + files: list[Path] = [] + for data_source_dir in sorted(repository_files.iterdir()): + if not data_source_dir.is_dir(): + continue + for pattern in _RESOURCE_GLOBS: + files.extend(sorted(data_source_dir.glob(pattern))) + files = sorted({path for path in files if path.is_file()}) + print(f"Found {len(files)} resource file(s)...", file=sys.stderr) + + for index, file_path in enumerate(files): + if index > 0 and index % 30 == 0: + print(f"Progress: {index}/{len(files)}...", file=sys.stderr) + data_source = file_path.relative_to(repository_files).parts[0] + rel = file_path.relative_to(repository_files / data_source) + slug = generate_slug(f"{data_source}-{rel.stem}") + instances.append( + { + "slug": slug, + "properties": { + "name": file_path.name, + "kind": "Resource", + "file_path": str(rel), + "data_source": data_source, + "description": f"Resource manifest: {rel}", + }, + } + ) + + unique, skipped = dedupe_instances(instances) + if skipped: + print(f"Skipped {skipped} duplicate slug(s).", file=sys.stderr) + print(f"Scan complete: {len(unique)} resource(s).", file=sys.stderr) + return unique + + +if __name__ == "__main__": + root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") + print(json.dumps(scan(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/examples/test_scanner.example.py b/src/api/extraction/infrastructure/instance_generator_templates/examples/test_scanner.example.py new file mode 100644 index 000000000..de86bdb4e --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/examples/test_scanner.example.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +"""Reference scanner for test-like entities (E2ETest, ComponentTest). + +Copy to ``instance_generators/{Label}.py`` and adapt glob patterns / parsers. +""" + +from __future__ import annotations + +import json +import re +import sys +from pathlib import Path +from typing import Any + +from scanner_common import dedupe_instances, generate_slug + +_E2E_SUITE_DIRS = ("adapter", "cluster", "nodepool", "channel", "version") +_GINKGO_IT = re.compile( + r"^\s*(?:It|PIt|FIt)\(\s*[\"']([^\"']+)[\"']", + re.MULTILINE, +) + + +def _suite_from_path(path: Path, repository_files: Path) -> str: + rel_parts = path.relative_to(repository_files).parts + for part in rel_parts: + if part in _E2E_SUITE_DIRS: + return part + return "unknown" + + +def scan(repository_files: Path) -> list[dict[str, Any]]: + instances: list[dict[str, Any]] = [] + patterns = ("**/e2e/**/*.go", "**/*_test.go", "**/test_*.py") + files: list[Path] = [] + for data_source_dir in sorted(repository_files.iterdir()): + if not data_source_dir.is_dir(): + continue + for pattern in patterns: + files.extend(sorted(data_source_dir.glob(pattern))) + files = sorted({path for path in files if path.is_file()}) + print(f"Found {len(files)} test file(s)...", file=sys.stderr) + + for index, file_path in enumerate(files): + if index > 0 and index % 20 == 0: + print(f"Progress: {index}/{len(files)}...", file=sys.stderr) + data_source = next( + parent.name for parent in file_path.parents if parent.parent == repository_files + ) + rel = file_path.relative_to(repository_files / data_source) + content = file_path.read_text(encoding="utf-8", errors="replace") + for match in _GINKGO_IT.finditer(content): + title = match.group(1) + slug = generate_slug(title) + line_number = content.count("\n", 0, match.start()) + 1 + instances.append( + { + "slug": slug, + "properties": { + "name": title, + "suite": _suite_from_path(file_path, repository_files), + "file_path": str(rel), + "line_number": line_number, + "labels": [], + "tier": "medium", + "description": f"Test: {title}", + }, + } + ) + + unique, skipped = dedupe_instances(instances) + if skipped: + print(f"Skipped {skipped} duplicate slug(s).", file=sys.stderr) + print(f"Scan complete: {len(unique)} test instance(s).", file=sys.stderr) + return unique + + +if __name__ == "__main__": + root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("repository-files") + print(json.dumps(scan(root), indent=2)) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/preview_instances.py b/src/api/extraction/infrastructure/instance_generator_templates/preview_instances.py new file mode 100644 index 000000000..43671ec1f --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/preview_instances.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +"""Preview scanner JSON before converting to JSONL. + +Example: + + python3 instance_generators/preview_instances.py E2ETest --limit 5 + python3 instance_generators/preview_instances.py E2ETest \\ + instance_generators/out/E2ETest_instances.json +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any + + +def _default_input_path(entity_label: str) -> Path: + return Path("instance_generators") / "out" / f"{entity_label}_instances.json" + + +def _format_row(index: int, row: dict[str, Any]) -> str: + slug = str(row.get("slug") or "?") + properties = row.get("properties") or {} + if not isinstance(properties, dict): + properties = {} + lines = [f"{index}. {slug}"] + for key in sorted(properties): + lines.append(f" - {key}: {properties[key]!r}") + return "\n".join(lines) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Preview entity scanner JSON output.") + parser.add_argument("entity_label", help="Entity type label (matches scanner filename).") + parser.add_argument( + "input", + nargs="?", + help="Path to JSON file (default: instance_generators/out/{label}_instances.json).", + ) + parser.add_argument("--limit", type=int, default=5, help="Max instances to print.") + args = parser.parse_args() + + input_path = Path(args.input) if args.input else _default_input_path(args.entity_label) + payload = json.loads(input_path.read_text(encoding="utf-8")) + if not isinstance(payload, list): + raise ValueError("Scanner output must be a JSON array") + + total = len(payload) + limit = max(1, args.limit) + preview = payload[:limit] + print(f"Preview of {args.entity_label} instances ({len(preview)} of {total}):\n") + for index, row in enumerate(preview, start=1): + if isinstance(row, dict): + print(_format_row(index, row)) + print() + if total > limit: + print(f"... and {total - limit} more. Run entities_to_jsonl.py when ready.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py b/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py index f91207315..12fe678fd 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/relationships_to_jsonl.py @@ -47,6 +47,11 @@ def deterministic_edge_id( return f"{normalized_label}:{digest}" +def mutation_relationship_label(relationship_label: str) -> str: + """Return the CREATE label — must match ontology ``label`` exactly (case-sensitive).""" + return relationship_label.strip() + + def relationship_to_create_line( *, relationship_label: str, @@ -79,7 +84,7 @@ def relationship_to_create_line( end_id=end_id, tenant_id=tenant_id, ), - "label": relationship_label.strip().lower(), + "label": mutation_relationship_label(relationship_label), "start_id": start_id, "end_id": end_id, "set_properties": set_properties, @@ -119,9 +124,18 @@ def main() -> int: parser = argparse.ArgumentParser( description="Convert relationship scanner JSON to Kartograph edge CREATE JSONL.", ) - parser.add_argument("relationship_label", help="Relationship type label in the ontology.") - parser.add_argument("source_entity_type", help="Source endpoint entity type label.") - parser.add_argument("target_entity_type", help="Target endpoint entity type label.") + parser.add_argument( + "relationship_label", + help="Relationship type label matching ontology exactly (case-sensitive).", + ) + parser.add_argument( + "source_entity_type", + help="Source entity type label matching ontology exactly (case-sensitive).", + ) + parser.add_argument( + "target_entity_type", + help="Target entity type label matching ontology exactly (case-sensitive).", + ) parser.add_argument("input", nargs="?", help="Path to JSON file; omit to read stdin.") parser.add_argument("--tenant-id", default="", help="Tenant id for deterministic ids.") parser.add_argument("--data-source-id", default="schema-bootstrap") diff --git a/src/api/extraction/infrastructure/instance_generator_templates/scanner_common.py b/src/api/extraction/infrastructure/instance_generator_templates/scanner_common.py new file mode 100644 index 000000000..d53e7ecdb --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/scanner_common.py @@ -0,0 +1,67 @@ +"""Shared helpers for entity and relationship scanner scripts.""" + +from __future__ import annotations + +import re +from typing import Any + +_SLUG_PATTERN = re.compile(r"[^a-z0-9]+") + + +def generate_slug(name: str) -> str: + """Build a stable slug from arbitrary source text. + + Rules: + - Lowercase ASCII + - Non-alphanumeric runs become single underscores + - No leading or trailing underscores + """ + slug = _SLUG_PATTERN.sub("_", name.strip().lower()) + return slug.strip("_") + + +def dedupe_instances( + instances: list[dict[str, Any]], + *, + slug_key: str = "slug", +) -> tuple[list[dict[str, Any]], int]: + """Drop duplicate slugs, keeping the first occurrence deterministically.""" + seen: set[str] = set() + unique: list[dict[str, Any]] = [] + skipped = 0 + for row in sorted(instances, key=lambda item: str(item.get(slug_key, ""))): + slug = str(row.get(slug_key) or "").strip() + if not slug or slug in seen: + skipped += 1 + continue + seen.add(slug) + unique.append(row) + return unique, skipped + + +def dedupe_relationships( + relationships: list[dict[str, Any]], +) -> tuple[list[dict[str, Any]], int]: + """Drop duplicate (source_slug, target_slug) pairs, keeping first occurrence.""" + seen: set[tuple[str, str]] = set() + unique: list[dict[str, Any]] = [] + skipped = 0 + for row in sorted( + relationships, + key=lambda item: ( + str(item.get("source_slug") or ""), + str(item.get("target_slug") or ""), + ), + ): + source = str(row.get("source_slug") or "").strip() + target = str(row.get("target_slug") or "").strip() + if not source or not target: + skipped += 1 + continue + key = (source, target) + if key in seen: + skipped += 1 + continue + seen.add(key) + unique.append(row) + return unique, skipped diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index ff4d22195..f45d60a88 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -9,6 +9,8 @@ from extraction.domain.prepared_job_package_source import PreparedJobPackageSource from extraction.infrastructure.instance_generator_templates import ( + EXAMPLES_DIR, + EXAMPLE_SCANNER_NAMES, TEMPLATES_DIR, TEMPLATE_SCRIPT_NAMES, ) @@ -129,6 +131,12 @@ def _materialize_instance_generators(session_root: Path) -> None: source = TEMPLATES_DIR / name if source.is_file(): shutil.copy2(source, target_dir / name) + examples_target = target_dir / "examples" + examples_target.mkdir(parents=True, exist_ok=True) + for name in EXAMPLE_SCANNER_NAMES: + source = EXAMPLES_DIR / name + if source.is_file(): + shutil.copy2(source, examples_target / name) (target_dir / "out").mkdir(parents=True, exist_ok=True) @staticmethod diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index 4121977af..6d957aa11 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -120,6 +120,8 @@ class WorkloadReadinessResponse(BaseModel): prepopulated_types_ready_live: bool = False prepopulated_entity_types: list[dict[str, object]] = Field(default_factory=list) prepopulated_relationship_types: list[dict[str, object]] = Field(default_factory=list) + prepopulation_tasks: list[dict[str, object]] = Field(default_factory=list) + next_action: str = "" blocking_reasons: list[str] = Field(default_factory=list) transition_eligible: bool diff --git a/src/api/infrastructure/extraction_workload/workspace_readiness.py b/src/api/infrastructure/extraction_workload/workspace_readiness.py index d36ae3d78..9cf318ef5 100644 --- a/src/api/infrastructure/extraction_workload/workspace_readiness.py +++ b/src/api/infrastructure/extraction_workload/workspace_readiness.py @@ -17,6 +17,108 @@ from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig +def _entity_scanner_path(label: str) -> str: + return f"instance_generators/{label}.py" + + +def _relationship_scanner_path(*, source: str, relationship: str, target: str) -> str: + return f"instance_generators/{source}_{relationship}_{target}.py" + + +def _build_prepopulation_tasks( + *, + ontology: OntologyConfig | None, + live_entity_gaps: list[str], + live_relationship_gaps: list[str], + entity_instance_counts: dict[str, int], + relationship_instance_counts: dict[str, int], +) -> list[dict[str, object]]: + tasks: list[dict[str, object]] = [] + if ontology is None: + return tasks + + for label in live_entity_gaps: + node_type = next((nt for nt in ontology.node_types if nt.label == label), None) + live_count = entity_instance_counts.get(label, 0) + tasks.append( + { + "kind": "entity", + "label": label, + "live_instance_count": live_count, + "scanner_path": _entity_scanner_path(label), + "output_json": f"instance_generators/out/{label}_instances.json", + "output_jsonl": f"instance_generators/out/{label}_instances.jsonl", + "required_properties": list(node_type.required_properties) if node_type else [], + "optional_properties": list(node_type.optional_properties) if node_type else [], + "action": ( + f"Copy _entity_scanner.example.py to {_entity_scanner_path(label)} " + f"(filename must match label exactly), run PREPOPULATION_WORKFLOW.md steps 2–6." + ), + } + ) + + for key in live_relationship_gaps: + edge_type = next( + (et for et in ontology.edge_types if relationship_readiness_key(et) == key), + None, + ) + source = edge_type.source_labels[0] if edge_type and edge_type.source_labels else "" + target = edge_type.target_labels[0] if edge_type and edge_type.target_labels else "" + rel = edge_type.label if edge_type else "" + scanner = ( + _relationship_scanner_path(source=source, relationship=rel, target=target) + if source and target and rel + else f"instance_generators/{key}.py" + ) + tasks.append( + { + "kind": "relationship", + "key": key, + "relationship_type": rel, + "source_entity_type": source, + "target_entity_type": target, + "live_instance_count": relationship_instance_counts.get(key, 0), + "scanner_path": scanner, + "output_json": f"instance_generators/out/{key}_instances.json", + "output_jsonl": f"instance_generators/out/{key}_instances.jsonl", + "action": ( + f"Copy _relationship_scanner.example.py to {scanner}, then run " + "relationship steps in PREPOPULATION_WORKFLOW.md." + ), + } + ) + return tasks + + +def _build_next_action( + *, + live_entity_gaps: list[str], + live_relationship_gaps: list[str], + transition_eligible: bool, + blocking_reasons: list[str], +) -> str: + if live_entity_gaps: + label = live_entity_gaps[0] + return ( + f"Run entity prepopulation for `{label}`: create {_entity_scanner_path(label)} " + "from _entity_scanner.example.py (case-sensitive filename), then follow " + "PREPOPULATION_WORKFLOW.md steps 2–6." + ) + if live_relationship_gaps: + key = live_relationship_gaps[0] + return ( + f"Run relationship prepopulation for `{key}` using " + "_relationship_scanner.example.py and PREPOPULATION_WORKFLOW.md." + ) + if transition_eligible: + return ( + "All prepopulated types have live instances. Bootstrap prepopulation is complete." + ) + if blocking_reasons: + return "Resolve blocking_reasons before continuing prepopulation." + return "Review kartograph_get_workspace_readiness and continue schema bootstrap." + + async def build_workload_readiness_snapshot( *, ontology: OntologyConfig | None, @@ -65,6 +167,10 @@ async def build_workload_readiness_snapshot( "label": node_type.label, "metadata_instance_count": node_type.prepopulated_instance_count, "live_instance_count": entity_instance_counts.get(node_type.label, 0), + "required_properties": list(node_type.required_properties), + "optional_properties": list(node_type.optional_properties), + "scanner_path": _entity_scanner_path(node_type.label), + "needs_instances": entity_instance_counts.get(node_type.label, 0) == 0, } for node_type in (ontology.node_types if ontology else ()) if node_type.prepopulated @@ -81,6 +187,17 @@ async def build_workload_readiness_snapshot( relationship_readiness_key(edge_type), 0, ), + "required_properties": list(edge_type.properties), + "scanner_path": _relationship_scanner_path( + source=edge_type.source_labels[0] if edge_type.source_labels else "source", + relationship=edge_type.label, + target=edge_type.target_labels[0] if edge_type.target_labels else "target", + ), + "needs_instances": relationship_instance_counts.get( + relationship_readiness_key(edge_type), + 0, + ) + == 0, } for edge_type in (ontology.edge_types if ontology else ()) if edge_type.prepopulated @@ -151,6 +268,20 @@ async def build_workload_readiness_snapshot( and live_prepopulated_ready ) + prepopulation_tasks = _build_prepopulation_tasks( + ontology=ontology, + live_entity_gaps=list(live_entity_gaps), + live_relationship_gaps=list(live_relationship_gaps), + entity_instance_counts=entity_instance_counts, + relationship_instance_counts=relationship_instance_counts, + ) + next_action = _build_next_action( + live_entity_gaps=list(live_entity_gaps), + live_relationship_gaps=list(live_relationship_gaps), + transition_eligible=transition_eligible, + blocking_reasons=blocking_reasons, + ) + return { "knowledge_graph_id": knowledge_graph_id, "has_minimum_entity_types": metadata_readiness.has_minimum_entity_types, @@ -167,6 +298,8 @@ async def build_workload_readiness_snapshot( "prepopulated_relationship_types_without_instances_live": list(live_relationship_gaps), "prepopulated_entity_types": prepopulated_entity_types, "prepopulated_relationship_types": prepopulated_relationship_types, + "prepopulation_tasks": prepopulation_tasks, + "next_action": next_action, "blocking_reasons": blocking_reasons, "transition_eligible": transition_eligible, } diff --git a/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py b/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py index f9c646407..34b8be0bf 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py +++ b/src/api/tests/unit/extraction/infrastructure/test_entities_to_jsonl.py @@ -48,6 +48,7 @@ def test_entities_to_jsonl_emits_sorted_create_lines(tmp_path: Path) -> None: assert second["set_properties"]["slug"] == "b-entity" assert first["label"] == "test" assert first["set_properties"]["data_source_id"] == "schema-bootstrap" + assert first["id"].startswith("test:") rerun = subprocess.run( [sys.executable, str(SCRIPT), "test", str(instances_path)], @@ -94,3 +95,22 @@ def test_entities_to_jsonl_preserves_source_path_from_scanner_properties(tmp_pat line = json.loads(proc.stdout.strip()) assert line["set_properties"]["source_path"] == "pkg/a_test.go" + + +def test_entities_to_jsonl_preserves_pascal_case_entity_label(tmp_path: Path) -> None: + instances_path = tmp_path / "APIEndpoint_instances.json" + instances_path.write_text( + json.dumps([{"slug": "get-healthz", "properties": {"method": "GET", "path": "/healthz"}}]), + encoding="utf-8", + ) + + proc = subprocess.run( + [sys.executable, str(SCRIPT), "APIEndpoint", str(instances_path)], + check=True, + capture_output=True, + text=True, + ) + + line = json.loads(proc.stdout.strip()) + assert line["label"] == "APIEndpoint" + assert line["id"].startswith("apiendpoint:") diff --git a/src/api/tests/unit/extraction/infrastructure/test_preview_instances.py b/src/api/tests/unit/extraction/infrastructure/test_preview_instances.py new file mode 100644 index 000000000..c6b21e857 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_preview_instances.py @@ -0,0 +1,40 @@ +"""Unit tests for preview_instances helper script.""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +SCRIPT = ( + Path(__file__).resolve().parents[4] + / "extraction/infrastructure/instance_generator_templates/preview_instances.py" +) + + +def test_preview_instances_prints_limited_rows(tmp_path: Path, capsys) -> None: + input_path = tmp_path / "E2ETest_instances.json" + input_path.write_text( + json.dumps( + [ + {"slug": "alpha", "properties": {"suite": "adapter"}}, + {"slug": "beta", "properties": {"suite": "cluster"}}, + {"slug": "gamma", "properties": {"suite": "channel"}}, + ] + ), + encoding="utf-8", + ) + + proc = subprocess.run( + [sys.executable, str(SCRIPT), "E2ETest", str(input_path), "--limit", "2"], + check=True, + capture_output=True, + text=True, + ) + + assert "Preview of E2ETest instances (2 of 3)" in proc.stdout + assert "alpha" in proc.stdout + assert "beta" in proc.stdout + assert "gamma" not in proc.stdout + assert "... and 1 more" in proc.stdout diff --git a/src/api/tests/unit/extraction/infrastructure/test_scanner_common.py b/src/api/tests/unit/extraction/infrastructure/test_scanner_common.py new file mode 100644 index 000000000..9d7833e63 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_scanner_common.py @@ -0,0 +1,36 @@ +"""Unit tests for scanner_common helpers.""" + +from __future__ import annotations + +from extraction.infrastructure.instance_generator_templates.scanner_common import ( + dedupe_instances, + dedupe_relationships, + generate_slug, +) + + +def test_generate_slug_normalizes_text() -> None: + assert generate_slug("Basic Workflow Validation!") == "basic_workflow_validation" + assert generate_slug(" Foo--Bar ") == "foo_bar" + + +def test_dedupe_instances_keeps_first_slug() -> None: + rows = [ + {"slug": "b", "properties": {}}, + {"slug": "a", "properties": {}}, + {"slug": "a", "properties": {"name": "dup"}}, + ] + unique, skipped = dedupe_instances(rows) + assert [row["slug"] for row in unique] == ["a", "b"] + assert skipped == 1 + + +def test_dedupe_relationships_keeps_first_pair() -> None: + rows = [ + {"source_slug": "a", "target_slug": "b"}, + {"source_slug": "a", "target_slug": "b"}, + {"source_slug": "a", "target_slug": "c"}, + ] + unique, skipped = dedupe_relationships(rows) + assert len(unique) == 2 + assert skipped == 1 diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index 9436cf91c..fa5cf4230 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -169,11 +169,17 @@ def test_materializer_copies_instance_generator_templates(tmp_path: Path) -> Non assert generators_dir.is_dir() for name in ( "_entity_scanner.example.py", + "_relationship_scanner.example.py", "entities_to_jsonl.py", "relationships_to_jsonl.py", + "preview_instances.py", + "scanner_common.py", "README.md", + "PREPOPULATION_WORKFLOW.md", ): assert (generators_dir / name).is_file() + assert (generators_dir / "examples" / "test_scanner.example.py").is_file() + assert (generators_dir / "examples" / "api_endpoint_scanner.example.py").is_file() readme = (generators_dir / "README.md").read_text(encoding="utf-8") assert "repository-files" in readme assert (generators_dir / "out").is_dir() diff --git a/src/api/tests/unit/extraction/presentation/test_workload_routes.py b/src/api/tests/unit/extraction/presentation/test_workload_routes.py index 58a60f9d1..7b2c5aa69 100644 --- a/src/api/tests/unit/extraction/presentation/test_workload_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_workload_routes.py @@ -166,7 +166,8 @@ def test_workload_get_schema_authoring_guide(workload_client: tuple[TestClient, ) assert response.status_code == 200 assert "kartograph_get_schema_ontology" in response.json()["guide"] - assert "Instance generation cookbook" in response.json()["guide"] + assert "PREPOPULATION_WORKFLOW.md" in response.json()["guide"] + assert "case-sensitive" in response.json()["guide"] def test_workload_get_workspace_readiness(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py b/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py index fb0c7c65a..f60b415ae 100644 --- a/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py @@ -48,6 +48,12 @@ async def test_build_workload_readiness_snapshot_reports_live_relationship_gaps( assert "folder" in snapshot["prepopulated_entity_types_without_instances_live"] assert snapshot["prepopulated_types_ready_live"] is False assert snapshot["prepopulated_relationship_types"][0]["live_instance_count"] == 1 + assert snapshot["next_action"] + assert "folder" in snapshot["next_action"] + assert snapshot["prepopulation_tasks"] + assert snapshot["prepopulation_tasks"][0]["kind"] == "entity" + assert snapshot["prepopulation_tasks"][0]["scanner_path"] == "instance_generators/folder.py" + assert "required_properties" in snapshot["prepopulated_entity_types"][0] @pytest.mark.asyncio From f9cff145a62726d4e8f14a04f4eeb7564dd902ba Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 5 Jun 2026 17:20:50 -0400 Subject: [PATCH 111/153] fix(management): report true design artifact instance counts Per-type instance_count was derived from a globally truncated node sample, under-reporting large types like ComponentTest. Count from the full graph while keeping browsable instances capped, with truncation hints in the UI. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../application/design_artifacts.py | 90 +++++++++++++------ .../presentation/knowledge_graphs/models.py | 4 + .../application/test_design_artifacts.py | 33 +++++++ .../GraphDesignEntitiesPanel.vue | 12 ++- .../GraphDesignRelationshipsPanel.vue | 12 ++- src/dev-ui/app/utils/kgDesignArtifacts.ts | 4 + 6 files changed, 123 insertions(+), 32 deletions(-) diff --git a/src/api/management/application/design_artifacts.py b/src/api/management/application/design_artifacts.py index 6724f4f00..1a03b19eb 100644 --- a/src/api/management/application/design_artifacts.py +++ b/src/api/management/application/design_artifacts.py @@ -58,6 +58,24 @@ def build_design_artifacts( node_by_age_id = {str(node.get("id")): node for node in nodes if node.get("id")} + def _node_instance(node: dict[str, Any]) -> dict[str, Any]: + slug = str(node.get("slug") or node.get("domainId") or node.get("id") or "") + return { + "slug": slug, + "properties": _instance_properties(node), + } + + full_instances_by_type: dict[str, list[dict[str, Any]]] = defaultdict(list) + for node in sorted( + nodes, + key=lambda item: ( + str(item.get("type") or ""), + str(item.get("slug") or item.get("domainId") or item.get("id") or ""), + ), + ): + entity_type = str(node.get("type") or "unknown") + full_instances_by_type[entity_type].append(_node_instance(node)) + instances_by_type: dict[str, list[dict[str, Any]]] = defaultdict(list) sorted_nodes = sorted( nodes, @@ -70,13 +88,7 @@ def build_design_artifacts( for node in truncated_nodes: entity_type = str(node.get("type") or "unknown") - slug = str(node.get("slug") or node.get("domainId") or node.get("id") or "") - instances_by_type[entity_type].append( - { - "slug": slug, - "properties": _instance_properties(node), - } - ) + instances_by_type[entity_type].append(_node_instance(node)) entities: dict[str, dict[str, Any]] = {} if ontology is not None: @@ -88,6 +100,7 @@ def build_design_artifacts( for prop in (*required, *optional) } type_instances = instances_by_type.get(node_type.label, []) + total_instances = len(full_instances_by_type.get(node_type.label, [])) entities[node_type.label] = { "type": node_type.label, "description": node_type.description, @@ -95,13 +108,16 @@ def build_design_artifacts( "optional_properties": optional, "property_definitions": property_definitions, "prepopulated_instances": node_type.prepopulated, - "instance_count": len(instances_by_type.get(node_type.label, [])), + "instance_count": total_instances, + "instances_returned": len(type_instances), + "instances_truncated": total_instances > len(type_instances), "instances": type_instances, } for entity_type, type_instances in instances_by_type.items(): if entity_type in entities: continue + total_instances = len(full_instances_by_type.get(entity_type, [])) entities[entity_type] = { "type": entity_type, "description": "", @@ -109,32 +125,27 @@ def build_design_artifacts( "optional_properties": [], "property_definitions": {}, "prepopulated_instances": False, - "instance_count": len(type_instances), + "instance_count": total_instances, + "instances_returned": len(type_instances), + "instances_truncated": total_instances > len(type_instances), "instances": type_instances, } relationship_instances: dict[str, list[dict[str, Any]]] = defaultdict(list) - sorted_edges = sorted( - edges, - key=lambda edge: ( - str(edge.get("type") or ""), - str(edge.get("source") or ""), - str(edge.get("target") or ""), - ), - ) - truncated_edges = sorted_edges[:limit] + full_relationship_instances: dict[str, list[dict[str, Any]]] = defaultdict(list) - for edge in truncated_edges: + def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: source_node = node_by_age_id.get(str(edge.get("source"))) target_node = node_by_age_id.get(str(edge.get("target"))) if source_node is None or target_node is None: - continue + return None source_type = str(source_node.get("type") or "unknown") target_type = str(target_node.get("type") or "unknown") relationship_type = str(edge.get("type") or "unknown") composite_key = f"{source_type}|{relationship_type}|{target_type}" - relationship_instances[composite_key].append( - { + return { + "composite_key": composite_key, + "instance": { "source_slug": str( source_node.get("slug") or source_node.get("domainId") @@ -148,8 +159,29 @@ def build_design_artifacts( or "" ), "properties": _instance_properties(edge), - } - ) + }, + } + + sorted_edges = sorted( + edges, + key=lambda edge: ( + str(edge.get("type") or ""), + str(edge.get("source") or ""), + str(edge.get("target") or ""), + ), + ) + for edge in sorted_edges: + parsed = _edge_instance(edge) + if parsed is None: + continue + full_relationship_instances[parsed["composite_key"]].append(parsed["instance"]) + + truncated_edges = sorted_edges[:limit] + for edge in truncated_edges: + parsed = _edge_instance(edge) + if parsed is None: + continue + relationship_instances[parsed["composite_key"]].append(parsed["instance"]) relationships: list[dict[str, Any]] = [] if ontology is not None: @@ -166,6 +198,7 @@ def build_design_artifacts( type_instances = instances break reverse_label = _reverse_relationship_label(edge_type) + total_instances = len(full_relationship_instances.get(composite_key, [])) relationships.append( { "key": composite_key, @@ -178,7 +211,9 @@ def build_design_artifacts( ), "prepopulated_instances": edge_type.prepopulated, "description": edge_type.description or None, - "instance_count": len(type_instances), + "instance_count": total_instances, + "instances_returned": len(type_instances), + "instances_truncated": total_instances > len(type_instances), "instances": type_instances, "required_parameters": list(edge_type.properties), "optional_parameters": [], @@ -196,6 +231,7 @@ def build_design_artifacts( parts = composite_key.split("|") if len(parts) != 3: continue + total_instances = len(full_relationship_instances.get(composite_key, [])) relationships.append( { "key": composite_key, @@ -206,7 +242,9 @@ def build_design_artifacts( "reverse_relationship_description": None, "prepopulated_instances": False, "description": None, - "instance_count": len(type_instances), + "instance_count": total_instances, + "instances_returned": len(type_instances), + "instances_truncated": total_instances > len(type_instances), "instances": type_instances, "required_parameters": [], "optional_parameters": [], diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index 5d19e2127..25c373795 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -462,6 +462,8 @@ class DesignArtifactEntityTypeModel(BaseModel): property_definitions: dict[str, str] = Field(default_factory=dict) prepopulated_instances: bool | str = False instance_count: int = 0 + instances_returned: int = 0 + instances_truncated: bool = False instances: list[DesignArtifactInstanceModel] = Field(default_factory=list) @@ -477,6 +479,8 @@ class DesignArtifactRelationshipTypeModel(BaseModel): prepopulated_instances: bool | str = False description: str | None = None instance_count: int = 0 + instances_returned: int = 0 + instances_truncated: bool = False instances: list[DesignArtifactInstanceModel] = Field(default_factory=list) required_parameters: list[str] = Field(default_factory=list) optional_parameters: list[str] = Field(default_factory=list) diff --git a/src/api/tests/unit/management/application/test_design_artifacts.py b/src/api/tests/unit/management/application/test_design_artifacts.py index 64ae79634..86da3482a 100644 --- a/src/api/tests/unit/management/application/test_design_artifacts.py +++ b/src/api/tests/unit/management/application/test_design_artifacts.py @@ -63,6 +63,39 @@ def test_build_design_artifacts_merges_ontology_with_graph_instances() -> None: assert payload["relationships"][0]["instances"][0]["source_slug"] == "api-gateway" +def test_build_design_artifacts_reports_true_instance_count_when_payload_truncated() -> None: + graph_data = { + "nodes": [ + { + "id": f"age-{index}", + "type": "service", + "slug": f"service-{index:04d}", + "knowledge_graph_id": "kg-1", + } + for index in range(600) + ], + "edges": [], + } + + payload = build_design_artifacts( + knowledge_graph_id="kg-1", + ontology=OntologyConfig( + node_types=( + NodeTypeDefinition(label="service", description="Service", prepopulated=True), + ), + ), + graph_data=graph_data, + limit=500, + ) + + service = payload["entities"]["service"] + assert service["instance_count"] == 600 + assert service["instances_returned"] == 500 + assert service["instances_truncated"] is True + assert len(service["instances"]) == 500 + assert payload["limits"]["entity_instances_truncated"] is True + + def test_build_design_artifacts_filters_other_knowledge_graphs() -> None: payload = build_design_artifacts( knowledge_graph_id="kg-1", diff --git a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue index 0e4f72363..3e6f266fb 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue @@ -215,6 +215,10 @@ defineExpose({ refresh: fetchEntities }) class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" /> Instances + <span v-if="row.instances_truncated" class="font-normal text-muted-foreground"> + (showing {{ row.instances_returned ?? row.instances?.length ?? 0 }} of + {{ row.instance_count }}) + </span> </summary> <div class="space-y-2 border-t p-2"> <ul class="space-y-1 text-xs"> @@ -248,7 +252,7 @@ defineExpose({ refresh: fetchEntities }) <span class="text-xs text-muted-foreground"> Page {{ pageSlice(instancePage, row.type, row.instances || []).page + 1 }} / {{ pageSlice(instancePage, row.type, row.instances || []).totalPages }} - ({{ pageSlice(instancePage, row.type, row.instances || []).total }} total) + ({{ row.instances_truncated ? `${row.instances_returned ?? row.instances?.length ?? 0} loaded of ${row.instance_count}` : pageSlice(instancePage, row.type, row.instances || []).total }} total) </span> <Button variant="outline" @@ -274,8 +278,10 @@ defineExpose({ refresh: fetchEntities }) v-if="data.limits.entity_instances_truncated" class="text-xs text-muted-foreground" > - Showing the first {{ data.limits.entity_instances_returned }} of - {{ data.counts.entity_instances }} entity instances. Increase the API limit to inspect more. + Instance counts reflect the full graph. The browsable instance list is capped at + {{ data.limits.entity_instances_returned }} of {{ data.counts.entity_instances }} + total instances across all types (API limit {{ data.limits.requested }}). Per-type badges + still show true totals. </p> </template> </template> diff --git a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue index 07c0c164c..48feb57d7 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue @@ -208,6 +208,10 @@ defineExpose({ refresh: fetchRelationships }) class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" /> Instances + <span v-if="rel.instances_truncated" class="font-normal text-muted-foreground"> + (showing {{ rel.instances_returned ?? rel.instances.length }} of + {{ rel.instance_count }}) + </span> </summary> <div class="space-y-2 border-t p-2"> <ul class="space-y-1 text-xs"> @@ -243,7 +247,7 @@ defineExpose({ refresh: fetchRelationships }) <span class="text-xs text-muted-foreground"> Page {{ pageSlice(instancePage, rel.key, rel.instances).page + 1 }} / {{ pageSlice(instancePage, rel.key, rel.instances).totalPages }} - ({{ pageSlice(instancePage, rel.key, rel.instances).total }} total) + ({{ rel.instances_truncated ? `${rel.instances_returned ?? rel.instances.length} loaded of ${rel.instance_count}` : pageSlice(instancePage, rel.key, rel.instances).total }} total) </span> <Button variant="outline" @@ -271,8 +275,10 @@ defineExpose({ refresh: fetchRelationships }) v-if="data.limits.relationship_instances_truncated" class="text-xs text-muted-foreground" > - Showing the first {{ data.limits.relationship_instances_returned }} of - {{ data.counts.relationship_instances }} relationship instances. + Relationship counts reflect the full graph. The browsable instance list is capped at + {{ data.limits.relationship_instances_returned }} of + {{ data.counts.relationship_instances }} total instances (API limit + {{ data.limits.requested }}). </p> </template> </template> diff --git a/src/dev-ui/app/utils/kgDesignArtifacts.ts b/src/dev-ui/app/utils/kgDesignArtifacts.ts index b3edbd6ef..57ad1cba6 100644 --- a/src/dev-ui/app/utils/kgDesignArtifacts.ts +++ b/src/dev-ui/app/utils/kgDesignArtifacts.ts @@ -46,6 +46,8 @@ export interface DesignArtifactEntityType { property_definitions?: Record<string, string> prepopulated_instances?: string | boolean instance_count: number + instances_returned?: number + instances_truncated?: boolean instances?: DesignArtifactInstance[] } @@ -59,6 +61,8 @@ export interface DesignArtifactRelationshipType { prepopulated_instances?: string | boolean description: string | null instance_count: number + instances_returned?: number + instances_truncated?: boolean instances: DesignArtifactInstance[] required_parameters: string[] optional_parameters: string[] From ba8f1d68ee2d2766d1c498d31854ea025cede591 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sat, 6 Jun 2026 17:47:50 -0400 Subject: [PATCH 112/153] fix(management): show bidirectional relationships as one design-artifact row Hide auto-generated inverse edge types in design artifacts and readiness while keeping primary/inverse labels on a single row; dedupe manually authored inverse duplicates on save and clarify GMA schema guidance. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../application/schema_authoring_guide.py | 20 ++++++---- .../application/skill_resolution_service.py | 9 +++-- .../workspace_readiness.py | 3 +- .../application/design_artifacts.py | 15 ++++++- .../application/workspace_readiness.py | 6 ++- .../management/domain/relationship_pairing.py | 39 +++++++++++++++++++ .../test_design_artifacts_pairing.py | 37 ++++++++++++++++++ .../domain/test_relationship_pairing.py | 26 +++++++++++++ .../GraphDesignRelationshipsPanel.vue | 2 +- 9 files changed, 142 insertions(+), 15 deletions(-) diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 0c6dc190a..c9f1c8c36 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -65,8 +65,11 @@ ## Schema modeling rules - **Property vs entity:** categorize → property; track instances/relationships → entity + edges. -- **Bidirectional relationships** default on — author primary direction only; platform creates inverse + twins. -- Set `bidirectional: false` for asymmetric edges (`depends_on`, `created_by`). +- **Bidirectional relationships** default on — author **one primary direction only** in `edge_types`. + Set optional `inverse_label` (default `{label}_inverse`). Never add a separate inverse type; + the platform auto-generates it and twin edge instances. Design artifacts show + `primary / inverse` on a single row. +- Set `bidirectional: false` only for asymmetric edges (`depends_on`, `created_by`). ## Workspace discovery patterns @@ -112,15 +115,18 @@ ```json { - "label": "defines", - "source_labels": ["repository"], - "target_labels": ["api_endpoint"], + "label": "exercises", + "source_labels": ["ComponentTest"], + "target_labels": ["APIEndpoint"], "prepopulated": true, - "bidirectional": true + "bidirectional": true, + "inverse_label": "exercises_inverse" } ``` -Relationship scanner convention: `out/{source}_{label}_{target}_instances.json`. +Do **not** also add `exercises_inverse` as its own `edge_types` entry — that inverse is auto-generated on save. + +Relationship scanner convention: `out/{source}_{label}_{target}_instances.json` (primary direction only). ## Instance mutations (JSONL) diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 200dbb51e..afb724881 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -119,9 +119,10 @@ class ResolvedExtractionSkillPack: "schema_modeling": ( "Property vs entity: distinguish/categorize → property on an existing type; " "track which/what or needs relationships → entity type + edges. " - "Relationships default bidirectional — author primary direction only; platform creates " - "inverse type + twin instances. Set bidirectional=false for asymmetric edges " - "(depends_on, created_by). For asymmetric edges, confirm X → rel → Y direction explicitly." + "Relationships default bidirectional — author one primary direction in edge_types with " + "optional inverse_label; never add a separate inverse edge type (platform auto-generates " + "it and twin instances). Design artifacts show primary/inverse on one row. " + "Set bidirectional=false only for asymmetric edges (depends_on, created_by)." ), "schema_workflow": ( "Call kartograph_get_schema_authoring_guide when you need shapes, phases, or mutation rules. " @@ -131,7 +132,7 @@ class ResolvedExtractionSkillPack: "Follow instance_generators/PREPOPULATION_WORKFLOW.md. Per gap: {Label}.py (case-sensitive filename) " "→ out/{Label}_instances.json → preview_instances.py (optional) → entities_to_jsonl.py or " "relationships_to_jsonl.py → validate/apply-from-file. Use scanner_common.generate_slug() and " - "dedupe_instances(). Entities before relationships. Primary edges only." + "dedupe_instances(). Entities before relationships. Primary relationship direction only." ), "readiness_reporting": ( "After schema or prepopulation work, call kartograph_get_workspace_readiness and cite " diff --git a/src/api/infrastructure/extraction_workload/workspace_readiness.py b/src/api/infrastructure/extraction_workload/workspace_readiness.py index 9cf318ef5..9c840b9df 100644 --- a/src/api/infrastructure/extraction_workload/workspace_readiness.py +++ b/src/api/infrastructure/extraction_workload/workspace_readiness.py @@ -11,6 +11,7 @@ from management.domain.ontology_prepopulation import relationship_readiness_key from management.domain.relationship_pairing import ( bidirectional_pair_key, + is_primary_relationship_for_display, resolve_inverse_label_for_primary, twin_validation_errors, ) @@ -200,7 +201,7 @@ async def build_workload_readiness_snapshot( == 0, } for edge_type in (ontology.edge_types if ontology else ()) - if edge_type.prepopulated + if edge_type.prepopulated and is_primary_relationship_for_display(edge_type) ] live_entity_gaps = live_gaps["entity_types_without_instances"] diff --git a/src/api/management/application/design_artifacts.py b/src/api/management/application/design_artifacts.py index 1a03b19eb..e561219d8 100644 --- a/src/api/management/application/design_artifacts.py +++ b/src/api/management/application/design_artifacts.py @@ -5,7 +5,11 @@ from collections import defaultdict from typing import Any -from management.domain.relationship_pairing import resolve_inverse_label_for_primary +from management.domain.relationship_pairing import ( + is_primary_relationship_for_display, + is_secondary_bidirectional_edge, + resolve_inverse_label_for_primary, +) from management.domain.value_objects import EdgeTypeDefinition, OntologyConfig _SYSTEM_NODE_PROPERTIES = frozenset( @@ -186,6 +190,8 @@ def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: relationships: list[dict[str, Any]] = [] if ontology is not None: for edge_type in ontology.edge_types: + if not is_primary_relationship_for_display(edge_type): + continue source_label = edge_type.source_labels[0] if edge_type.source_labels else "" target_label = edge_type.target_labels[0] if edge_type.target_labels else "" composite_key = f"{source_label}|{edge_type.label}|{target_label}" @@ -231,6 +237,13 @@ def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: parts = composite_key.split("|") if len(parts) != 3: continue + relationship_label = parts[1] + if any( + is_secondary_bidirectional_edge(edge) + for edge in (ontology.edge_types if ontology else ()) + if edge.label == relationship_label + ): + continue total_instances = len(full_relationship_instances.get(composite_key, [])) relationships.append( { diff --git a/src/api/management/application/workspace_readiness.py b/src/api/management/application/workspace_readiness.py index 0ced02c13..2892f00f1 100644 --- a/src/api/management/application/workspace_readiness.py +++ b/src/api/management/application/workspace_readiness.py @@ -3,6 +3,7 @@ from __future__ import annotations from management.domain.ontology_prepopulation import relationship_readiness_key +from management.domain.relationship_pairing import is_primary_relationship_for_display from management.domain.value_objects import OntologyConfig, WorkspaceReadinessStatus @@ -22,7 +23,9 @@ def evaluate_workspace_readiness(ontology: OntologyConfig | None) -> WorkspaceRe prepopulated_relationships_without_instances = tuple( relationship_readiness_key(edge_type) for edge_type in ontology.edge_types - if edge_type.prepopulated and edge_type.prepopulated_instance_count <= 0 + if edge_type.prepopulated + and edge_type.prepopulated_instance_count <= 0 + and is_primary_relationship_for_display(edge_type) ) has_min_entities = node_type_count >= 1 @@ -84,6 +87,7 @@ def prepopulated_gaps_from_live_counts( for edge_type in ontology.edge_types if edge_type.prepopulated and relationship_instance_counts.get(relationship_readiness_key(edge_type), 0) <= 0 + and is_primary_relationship_for_display(edge_type) ) return { "entity_types_without_instances": entity_gaps, diff --git a/src/api/management/domain/relationship_pairing.py b/src/api/management/domain/relationship_pairing.py index 8d3bf3445..623b8e41f 100644 --- a/src/api/management/domain/relationship_pairing.py +++ b/src/api/management/domain/relationship_pairing.py @@ -76,8 +76,47 @@ def _is_primary_bidirectional_edge(edge_type: EdgeTypeDefinition) -> bool: return edge_type.bidirectional and not edge_type.auto_generated and not edge_type.inverse_of +def is_secondary_bidirectional_edge(edge_type: EdgeTypeDefinition) -> bool: + """True for inverse edge types that should not appear as separate design-artifact rows.""" + return bool(edge_type.auto_generated or edge_type.inverse_of) + + +def is_primary_relationship_for_display(edge_type: EdgeTypeDefinition) -> bool: + """Relationship types rendered as a single primary row in design artifacts.""" + return not is_secondary_bidirectional_edge(edge_type) + + +def dedupe_manual_inverse_edge_types(config: OntologyConfig) -> OntologyConfig: + """Drop manually-authored inverse duplicates so pairing can recreate metadata.""" + edge_types = list(config.edge_types) + by_label = {edge.label: edge for edge in edge_types} + labels_to_drop: set[str] = set() + + for primary in edge_types: + if not _is_primary_bidirectional_edge(primary): + continue + if not primary.source_labels or not primary.target_labels: + continue + inverse_label = resolve_inverse_label_for_primary(primary) + existing = by_label.get(inverse_label) + if existing is None or is_secondary_bidirectional_edge(existing): + continue + labels_to_drop.add(inverse_label) + + if not labels_to_drop: + return config + + filtered = tuple(edge for edge in edge_types if edge.label not in labels_to_drop) + return OntologyConfig( + node_types=config.node_types, + edge_types=filtered, + approved_at=config.approved_at, + ) + + def expand_ontology_bidirectional_pairs(config: OntologyConfig) -> OntologyConfig: """Ensure every primary bidirectional edge type has a linked inverse type definition.""" + config = dedupe_manual_inverse_edge_types(config) edge_types = list(config.edge_types) by_label = {edge.label: edge for edge in edge_types} diff --git a/src/api/tests/unit/management/application/test_design_artifacts_pairing.py b/src/api/tests/unit/management/application/test_design_artifacts_pairing.py index 4e3ea479c..d0edd1002 100644 --- a/src/api/tests/unit/management/application/test_design_artifacts_pairing.py +++ b/src/api/tests/unit/management/application/test_design_artifacts_pairing.py @@ -32,3 +32,40 @@ def test_design_artifacts_exposes_reverse_relationship_type() -> None: row for row in artifacts["relationships"] if row["relationship_type"] == "contains" ) assert contains["reverse_relationship_type"] == "contained_in" + inverse_labels = {row["relationship_type"] for row in artifacts["relationships"]} + assert "contained_in" not in inverse_labels + assert len(artifacts["relationships"]) == 1 + + +def test_design_artifacts_hides_auto_generated_inverse_rows() -> None: + ontology = expand_ontology_bidirectional_pairs( + OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="exercises", + source_labels=("ComponentTest",), + target_labels=("APIEndpoint",), + bidirectional=True, + inverse_label="exercises_inverse", + ), + EdgeTypeDefinition( + label="covered_by", + source_labels=("Feature",), + target_labels=("ComponentTest",), + bidirectional=True, + ), + ) + ) + ) + + artifacts = build_design_artifacts( + knowledge_graph_id="kg-1", + ontology=ontology, + graph_data={"nodes": [], "edges": []}, + limit=100, + ) + + labels = {row["relationship_type"] for row in artifacts["relationships"]} + assert labels == {"exercises", "covered_by"} + exercises = next(row for row in artifacts["relationships"] if row["relationship_type"] == "exercises") + assert exercises["reverse_relationship_type"] == "exercises_inverse" diff --git a/src/api/tests/unit/management/domain/test_relationship_pairing.py b/src/api/tests/unit/management/domain/test_relationship_pairing.py index b25a75c24..8bad08987 100644 --- a/src/api/tests/unit/management/domain/test_relationship_pairing.py +++ b/src/api/tests/unit/management/domain/test_relationship_pairing.py @@ -100,6 +100,32 @@ def test_legacy_edge_without_bidirectional_flag_is_unchanged(self) -> None: assert len(expanded.edge_types) == 1 + def test_dedupe_drops_manual_inverse_before_expanding(self) -> None: + config = OntologyConfig( + edge_types=( + EdgeTypeDefinition( + label="exercises", + source_labels=("ComponentTest",), + target_labels=("APIEndpoint",), + bidirectional=True, + inverse_label="exercises_inverse", + ), + EdgeTypeDefinition( + label="exercises_inverse", + source_labels=("APIEndpoint",), + target_labels=("ComponentTest",), + bidirectional=True, + ), + ) + ) + + expanded = expand_ontology_bidirectional_pairs(config) + + inverse_rows = [edge for edge in expanded.edge_types if edge.label == "exercises_inverse"] + assert len(inverse_rows) == 1 + assert inverse_rows[0].auto_generated is True + assert inverse_rows[0].inverse_of == "exercises" + class TestExpandTwinEdgeCreates: def test_primary_create_expands_to_inverse(self) -> None: diff --git a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue index 48feb57d7..90ba6d09a 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue @@ -77,7 +77,7 @@ defineExpose({ refresh: fetchRelationships }) <div class="min-w-0"> <h2 class="text-sm font-semibold tracking-tight">Relationship ontology</h2> <p class="text-[11px] leading-snug text-muted-foreground"> - Relationship types and instances for this knowledge graph. + Primary relationship types (forward / inverse on one row). Inverse types are stored but not listed separately. </p> </div> <div class="flex items-center gap-2"> From d632287feba3d2b46c18901399f9a0119e75722e Mon Sep 17 00:00:00 2001 From: Austin Redenbaugh <aredenba@redhat.com> Date: Sat, 6 Jun 2026 18:26:15 -0400 Subject: [PATCH 113/153] feat(extraction): add job sets UI, orchestration, and phase3 workspace (#772) Introduce extraction job set configuration on knowledge graphs with materialized jobs, run orchestration (start/pause/halt), and management APIs. Restructure Extraction Jobs mode to k-extract phase3 layout: job sets left, ontology schema right, run extraction and job status below. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../application/skill_resolution_service.py | 17 +- src/api/extraction/domain/extraction_job.py | 122 +++++ .../infrastructure/extraction_job_executor.py | 33 ++ .../extraction_run_orchestrator.py | 268 +++++++++ .../infrastructure/models/extraction_job.py | 55 ++ .../repositories/extraction_job_repository.py | 420 +++++++++++++++ .../workload_runtime_settings.py | 1 + .../management/extraction_job_materializer.py | 134 +++++ .../extraction_jobs_dependencies.py | 49 ++ .../management/extraction_jobs_service.py | 333 ++++++++++++ ...h1i2j3k4l5m6_add_extraction_jobs_tables.py | 105 ++++ .../domain/extraction_job_config.py | 130 +++++ .../infrastructure/models/knowledge_graph.py | 3 + .../knowledge_graph_repository.py | 26 + .../presentation/knowledge_graphs/__init__.py | 11 +- .../extraction_jobs_routes.py | 306 +++++++++++ .../test_extraction_job_materializer.py | 54 ++ .../domain/test_extraction_job_config.py | 42 ++ .../GraphExtractionJobSetsPanel.vue | 319 +++++++++++ .../GraphExtractionJobsWorkspace.vue | 507 ++++++++++++++++++ .../pages/knowledge-graphs/[kgId]/manage.vue | 14 +- .../knowledge-graph-manage-workspace.test.ts | 10 +- src/dev-ui/app/utils/kgGraphManagement.ts | 2 +- 23 files changed, 2947 insertions(+), 14 deletions(-) create mode 100644 src/api/extraction/domain/extraction_job.py create mode 100644 src/api/extraction/infrastructure/extraction_job_executor.py create mode 100644 src/api/extraction/infrastructure/extraction_run_orchestrator.py create mode 100644 src/api/extraction/infrastructure/models/extraction_job.py create mode 100644 src/api/extraction/infrastructure/repositories/extraction_job_repository.py create mode 100644 src/api/infrastructure/management/extraction_job_materializer.py create mode 100644 src/api/infrastructure/management/extraction_jobs_dependencies.py create mode 100644 src/api/infrastructure/management/extraction_jobs_service.py create mode 100644 src/api/infrastructure/migrations/versions/h1i2j3k4l5m6_add_extraction_jobs_tables.py create mode 100644 src/api/management/domain/extraction_job_config.py create mode 100644 src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py create mode 100644 src/api/tests/unit/infrastructure/management/test_extraction_job_materializer.py create mode 100644 src/api/tests/unit/management/domain/test_extraction_job_config.py create mode 100644 src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue create mode 100644 src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index afb724881..70cdeb6f3 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -143,8 +143,15 @@ class ResolvedExtractionSkillPack: }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { "job_setup": ( - "Prioritize extraction job setup, file-targeting strategy, and " - "safe incremental mutation planning." + "Prioritize extraction job set authoring: by_instances batches with required " + "per-instance extraction descriptions (no separate extraction_plan.md). " + "Each description tells the extraction worker what to enrich for assigned entity slugs." + ), + "job_set_contract": ( + "Job sets are saved via extraction-jobs API on the knowledge graph. Each set needs: " + "name, strategy (by_instances primary), entity_type, instances_per_job, and description. " + "Saving regenerates pending jobs from live graph instances. Job sets run sequentially; " + "jobs within a set run concurrently up to worker count." ), "minor_edits": ( "Allow focused direct graph edits while preserving mutation-log " @@ -168,8 +175,10 @@ class ResolvedExtractionSkillPack: }, GraphManagementUiMode.EXTRACTION_JOBS: { "ui_mode_framing": ( - "Focus on extraction job setup, JobPackage-aware file targeting, and " - "incremental sync planning." + "Focus on extraction job set setup: define by_instances batches with per-instance " + "extraction descriptions, save to regenerate pending jobs, then guide the operator " + "to Run extraction. Use ontology schema panels for context. JobPackage readiness " + "still applies when file-backed context is required." ), }, GraphManagementUiMode.ONE_OFF_MUTATIONS: { diff --git a/src/api/extraction/domain/extraction_job.py b/src/api/extraction/domain/extraction_job.py new file mode 100644 index 000000000..24f7e5ae2 --- /dev/null +++ b/src/api/extraction/domain/extraction_job.py @@ -0,0 +1,122 @@ +"""Domain types for materialized extraction jobs and runs.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from enum import StrEnum +from typing import Any + + +class ExtractionJobStatus(StrEnum): + """Lifecycle status for one materialized extraction job.""" + + PENDING = "pending" + IN_PROGRESS = "in_progress" + COMPLETED = "completed" + FAILED = "failed" + + +class ExtractionRunStatus(StrEnum): + """Orchestrator state for one knowledge graph extraction run.""" + + IDLE = "idle" + RUNNING = "running" + PAUSING = "pausing" + PAUSED = "paused" + HALTED = "halted" + + +@dataclass(frozen=True) +class ExtractionTargetInstance: + """One entity instance assigned to an extraction job.""" + + slug: str + entity_type: str + properties: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return { + "slug": self.slug, + "entity_type": self.entity_type, + "properties": dict(self.properties), + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> ExtractionTargetInstance: + return cls( + slug=str(data.get("slug") or ""), + entity_type=str(data.get("entity_type") or ""), + properties=dict(data.get("properties") or {}), + ) + + +@dataclass(frozen=True) +class ExtractionJobRecord: + """One persisted extraction job row.""" + + id: str + knowledge_graph_id: str + job_id: str + job_set_name: str + strategy: str + status: ExtractionJobStatus + order_index: int + description: str + target_instances: tuple[ExtractionTargetInstance, ...] = field(default_factory=tuple) + worker_id: str | None = None + started_at: datetime | None = None + completed_at: datetime | None = None + error_message: str | None = None + attempt: int = 0 + input_tokens: int = 0 + output_tokens: int = 0 + cache_read_tokens: int = 0 + cache_creation_tokens: int = 0 + cost_usd: float = 0.0 + entities_created: int = 0 + entities_modified: int = 0 + relationships_created: int = 0 + + def to_dict(self) -> dict[str, Any]: + return { + "id": self.id, + "knowledge_graph_id": self.knowledge_graph_id, + "job_id": self.job_id, + "job_set": self.job_set_name, + "job_set_name": self.job_set_name, + "strategy": self.strategy, + "status": self.status.value, + "order_index": self.order_index, + "description": self.description, + "target_instances": [instance.to_dict() for instance in self.target_instances], + "worker_id": self.worker_id, + "started_at": self.started_at.isoformat() if self.started_at else None, + "completed_at": self.completed_at.isoformat() if self.completed_at else None, + "error_message": self.error_message, + "attempt": self.attempt, + "input_tokens": self.input_tokens, + "output_tokens": self.output_tokens, + "cache_read_tokens": self.cache_read_tokens, + "cache_creation_tokens": self.cache_creation_tokens, + "cost_usd": self.cost_usd, + "entities_created": self.entities_created, + "entities_modified": self.entities_modified, + "relationships_created": self.relationships_created, + "instance_count": len(self.target_instances), + "file_count": 0, + } + + +@dataclass(frozen=True) +class ExtractionRunRecord: + """Orchestrator run metadata for one knowledge graph.""" + + id: str + knowledge_graph_id: str + status: ExtractionRunStatus + worker_count: int + started_at: datetime | None = None + completed_at: datetime | None = None + pause_requested: bool = False + orchestrator_pid: int | None = None diff --git a/src/api/extraction/infrastructure/extraction_job_executor.py b/src/api/extraction/infrastructure/extraction_job_executor.py new file mode 100644 index 000000000..95eb8bd48 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_executor.py @@ -0,0 +1,33 @@ +"""Execute one materialized extraction job.""" + +from __future__ import annotations + +import asyncio +from typing import Any + +from extraction.domain.extraction_job import ExtractionJobRecord + + +class ExtractionJobExecutor: + """Runs one extraction job using per-instance description guidance.""" + + async def execute(self, job: ExtractionJobRecord) -> dict[str, Any]: + """Process target instances for one job. + + The sticky extraction agent container path will replace this stub with + a full Claude Agent SDK turn scoped to ``job.description`` and the + assigned instance slugs. For now we simulate successful completion so + orchestration, status APIs, and UI can be exercised end-to-end. + """ + await asyncio.sleep(0.05) + instance_count = len(job.target_instances) + return { + "input_tokens": 100 * instance_count, + "output_tokens": 50 * instance_count, + "cache_read_tokens": 0, + "cache_creation_tokens": 0, + "cost_usd": 0.001 * instance_count, + "entities_created": 0, + "entities_modified": instance_count, + "relationships_created": 0, + } diff --git a/src/api/extraction/infrastructure/extraction_run_orchestrator.py b/src/api/extraction/infrastructure/extraction_run_orchestrator.py new file mode 100644 index 000000000..f15230f5e --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_run_orchestrator.py @@ -0,0 +1,268 @@ +"""Background orchestrator for parallel extraction job execution.""" + +from __future__ import annotations + +import asyncio +import logging +import os +from dataclasses import dataclass, field +from datetime import UTC, datetime +from typing import Any + +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +from extraction.infrastructure.extraction_job_executor import ExtractionJobExecutor +from extraction.domain.extraction_job import ExtractionJobStatus, ExtractionRunStatus +from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository +from extraction.infrastructure.workload_runtime_factory import ( + create_ephemeral_extraction_worker_launcher, + get_workload_credential_issuer, +) +from extraction.infrastructure.workload_runtime_settings import ( + get_extraction_workload_runtime_settings, +) +from extraction.ports.runtime import EphemeralWorkerLaunchRequest + +logger = logging.getLogger(__name__) + + +@dataclass +class _OrchestratorState: + knowledge_graph_id: str + tenant_id: str + worker_count: int + tasks: list[asyncio.Task[None]] = field(default_factory=list) + stop_event: asyncio.Event = field(default_factory=asyncio.Event) + + +class ExtractionRunOrchestrator: + """Manage extraction run lifecycle and worker pool for one knowledge graph.""" + + def __init__( + self, + *, + session_factory: async_sessionmaker[AsyncSession], + job_executor: ExtractionJobExecutor | None = None, + ) -> None: + self._session_factory = session_factory + self._job_executor = job_executor or ExtractionJobExecutor() + self._active: dict[str, _OrchestratorState] = {} + self._lock = asyncio.Lock() + + async def start( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + worker_count: int, + ) -> None: + async with self._lock: + existing = self._active.get(knowledge_graph_id) + if existing and not existing.stop_event.is_set(): + return + + state = _OrchestratorState( + knowledge_graph_id=knowledge_graph_id, + tenant_id=tenant_id, + worker_count=max(1, worker_count), + ) + self._active[knowledge_graph_id] = state + + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + await repo.upsert_run( + knowledge_graph_id=knowledge_graph_id, + status=ExtractionRunStatus.RUNNING, + worker_count=state.worker_count, + pause_requested=False, + orchestrator_pid=os.getpid(), + started_at=datetime.now(UTC), + completed_at=None, + ) + await session.commit() + + runtime_settings = get_extraction_workload_runtime_settings() + if runtime_settings.backend == "container": + for index in range(state.worker_count): + state.tasks.append( + asyncio.create_task( + self._container_worker_loop(state, worker_index=index + 1) + ) + ) + else: + for index in range(state.worker_count): + state.tasks.append( + asyncio.create_task( + self._in_process_worker_loop(state, worker_index=index + 1) + ) + ) + + async def request_pause(self, *, knowledge_graph_id: str) -> None: + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + await repo.set_pause_requested(knowledge_graph_id=knowledge_graph_id, pause_requested=True) + await repo.upsert_run( + knowledge_graph_id=knowledge_graph_id, + status=ExtractionRunStatus.PAUSING, + worker_count=1, + pause_requested=True, + ) + await session.commit() + + async def halt(self, *, knowledge_graph_id: str) -> None: + state = self._active.get(knowledge_graph_id) + if state is not None: + state.stop_event.set() + for task in state.tasks: + task.cancel() + self._active.pop(knowledge_graph_id, None) + + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + await repo.mark_in_progress_failed( + knowledge_graph_id=knowledge_graph_id, + error_message="Extraction halted by operator", + ) + await repo.upsert_run( + knowledge_graph_id=knowledge_graph_id, + status=ExtractionRunStatus.HALTED, + worker_count=1, + pause_requested=False, + completed_at=datetime.now(UTC), + ) + await session.commit() + + async def _in_process_worker_loop(self, state: _OrchestratorState, *, worker_index: int) -> None: + worker_id = f"worker-{worker_index:02d}" + try: + while not state.stop_event.is_set(): + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + if await repo.is_pause_requested(knowledge_graph_id=state.knowledge_graph_id): + await repo.upsert_run( + knowledge_graph_id=state.knowledge_graph_id, + status=ExtractionRunStatus.PAUSED, + worker_count=state.worker_count, + pause_requested=True, + completed_at=datetime.now(UTC), + ) + await session.commit() + state.stop_event.set() + break + + job = await repo.claim_next_pending_job( + knowledge_graph_id=state.knowledge_graph_id, + worker_id=worker_id, + ) + if job is None: + await session.commit() + await self._maybe_finish_run(state) + break + await session.commit() + + try: + metrics = await self._job_executor.execute(job) + except Exception as exc: + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + await repo.mark_job_failed( + knowledge_graph_id=state.knowledge_graph_id, + job_id=job.job_id, + error_message=str(exc), + ) + await session.commit() + continue + + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + await repo.mark_job_completed( + knowledge_graph_id=state.knowledge_graph_id, + job_id=job.job_id, + metrics=metrics, + ) + await session.commit() + except asyncio.CancelledError: + return + + async def _container_worker_loop(self, state: _OrchestratorState, *, worker_index: int) -> None: + worker_id = f"worker-{worker_index:02d}" + launcher = create_ephemeral_extraction_worker_launcher() + credential_issuer = get_workload_credential_issuer() + runtime_settings = get_extraction_workload_runtime_settings() + + try: + while not state.stop_event.is_set(): + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + if await repo.is_pause_requested(knowledge_graph_id=state.knowledge_graph_id): + await session.commit() + state.stop_event.set() + break + job = await repo.claim_next_pending_job( + knowledge_graph_id=state.knowledge_graph_id, + worker_id=worker_id, + ) + if job is None: + await session.commit() + await self._maybe_finish_run(state) + break + await session.commit() + + credentials = credential_issuer.issue( + tenant_id=state.tenant_id, + knowledge_graph_id=state.knowledge_graph_id, + ) + launch_result = launcher.launch( + request=EphemeralWorkerLaunchRequest( + tenant_id=state.tenant_id, + knowledge_graph_id=state.knowledge_graph_id, + session_id=f"extraction-job:{job.job_id}", + sync_run_id=job.job_id, + job_package_id=job.id, + ), + credentials=credentials, + ) + logger.info( + "Launched extraction worker %s for job %s (container backend)", + launch_result.worker_id, + job.job_id, + ) + # Container worker is responsible for marking completion via workload API. + await asyncio.sleep(runtime_settings.worker_poll_seconds) + except asyncio.CancelledError: + return + + async def _maybe_finish_run(self, state: _OrchestratorState) -> None: + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + counts = await repo.count_by_status(knowledge_graph_id=state.knowledge_graph_id) + pending = counts.get("pending", 0) + in_progress = counts.get("in_progress", 0) + if pending == 0 and in_progress == 0: + await repo.upsert_run( + knowledge_graph_id=state.knowledge_graph_id, + status=ExtractionRunStatus.IDLE, + worker_count=state.worker_count, + pause_requested=False, + completed_at=datetime.now(UTC), + ) + await session.commit() + state.stop_event.set() + self._active.pop(state.knowledge_graph_id, None) + + def is_live(self, *, knowledge_graph_id: str) -> bool: + state = self._active.get(knowledge_graph_id) + return state is not None and not state.stop_event.is_set() + + +_orchestrator_singleton: ExtractionRunOrchestrator | None = None + + +def get_extraction_run_orchestrator( + *, + session_factory: async_sessionmaker[AsyncSession], +) -> ExtractionRunOrchestrator: + global _orchestrator_singleton + if _orchestrator_singleton is None: + _orchestrator_singleton = ExtractionRunOrchestrator(session_factory=session_factory) + return _orchestrator_singleton diff --git a/src/api/extraction/infrastructure/models/extraction_job.py b/src/api/extraction/infrastructure/models/extraction_job.py new file mode 100644 index 000000000..099c458f0 --- /dev/null +++ b/src/api/extraction/infrastructure/models/extraction_job.py @@ -0,0 +1,55 @@ +"""SQLAlchemy ORM models for extraction jobs and runs.""" + +from __future__ import annotations + +from datetime import datetime + +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from infrastructure.database.models import Base, TimestampMixin + + +class ExtractionJobModel(Base, TimestampMixin): + """Materialized extraction job assigned to one knowledge graph.""" + + __tablename__ = "extraction_jobs" + + id: Mapped[str] = mapped_column(sa.String(26), primary_key=True) + knowledge_graph_id: Mapped[str] = mapped_column(sa.String(26), nullable=False) + job_id: Mapped[str] = mapped_column(sa.String(128), nullable=False) + job_set_name: Mapped[str] = mapped_column(sa.String(128), nullable=False) + strategy: Mapped[str] = mapped_column(sa.String(32), nullable=False) + status: Mapped[str] = mapped_column(sa.String(32), nullable=False) + order_index: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + description: Mapped[str] = mapped_column(sa.Text(), nullable=False, default="") + target_instances: Mapped[list[dict]] = mapped_column(JSONB, nullable=False, default=list) + worker_id: Mapped[str | None] = mapped_column(sa.String(64), nullable=True) + started_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) + completed_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) + error_message: Mapped[str | None] = mapped_column(sa.Text(), nullable=True) + attempt: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + input_tokens: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + output_tokens: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + cache_read_tokens: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + cache_creation_tokens: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + cost_usd: Mapped[float] = mapped_column(sa.Float(), nullable=False, default=0.0) + entities_created: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + entities_modified: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + relationships_created: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + + +class ExtractionRunModel(Base, TimestampMixin): + """Orchestrator run state for one knowledge graph.""" + + __tablename__ = "extraction_runs" + + id: Mapped[str] = mapped_column(sa.String(26), primary_key=True) + knowledge_graph_id: Mapped[str] = mapped_column(sa.String(26), nullable=False, unique=True) + status: Mapped[str] = mapped_column(sa.String(32), nullable=False) + worker_count: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=1) + pause_requested: Mapped[bool] = mapped_column(sa.Boolean(), nullable=False, default=False) + started_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) + completed_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) + orchestrator_pid: Mapped[int | None] = mapped_column(sa.Integer(), nullable=True) diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py new file mode 100644 index 000000000..32d1403c8 --- /dev/null +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -0,0 +1,420 @@ +"""PostgreSQL repository for materialized extraction jobs and runs.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any + +from sqlalchemy import delete, func, select, update +from sqlalchemy.ext.asyncio import AsyncSession +from ulid import ULID + +from extraction.domain.extraction_job import ( + ExtractionJobRecord, + ExtractionJobStatus, + ExtractionRunRecord, + ExtractionRunStatus, + ExtractionTargetInstance, +) +from extraction.infrastructure.models.extraction_job import ExtractionJobModel, ExtractionRunModel + + +def _job_model_to_record(model: ExtractionJobModel) -> ExtractionJobRecord: + return ExtractionJobRecord( + id=model.id, + knowledge_graph_id=model.knowledge_graph_id, + job_id=model.job_id, + job_set_name=model.job_set_name, + strategy=model.strategy, + status=ExtractionJobStatus(model.status), + order_index=model.order_index, + description=model.description, + target_instances=tuple( + ExtractionTargetInstance.from_dict(row) for row in (model.target_instances or []) + ), + worker_id=model.worker_id, + started_at=model.started_at, + completed_at=model.completed_at, + error_message=model.error_message, + attempt=model.attempt, + input_tokens=model.input_tokens, + output_tokens=model.output_tokens, + cache_read_tokens=model.cache_read_tokens, + cache_creation_tokens=model.cache_creation_tokens, + cost_usd=model.cost_usd, + entities_created=model.entities_created, + entities_modified=model.entities_modified, + relationships_created=model.relationships_created, + ) + + +def _run_model_to_record(model: ExtractionRunModel) -> ExtractionRunRecord: + return ExtractionRunRecord( + id=model.id, + knowledge_graph_id=model.knowledge_graph_id, + status=ExtractionRunStatus(model.status), + worker_count=model.worker_count, + started_at=model.started_at, + completed_at=model.completed_at, + pause_requested=model.pause_requested, + orchestrator_pid=model.orchestrator_pid, + ) + + +class ExtractionJobRepository: + """Persistence for extraction jobs and orchestrator runs.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + + async def replace_pending_jobs( + self, + *, + knowledge_graph_id: str, + jobs: list[ExtractionJobRecord], + ) -> int: + await self._session.execute( + delete(ExtractionJobModel).where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.PENDING.value, + ) + ) + for job in jobs: + self._session.add( + ExtractionJobModel( + id=job.id, + knowledge_graph_id=job.knowledge_graph_id, + job_id=job.job_id, + job_set_name=job.job_set_name, + strategy=job.strategy, + status=job.status.value, + order_index=job.order_index, + description=job.description, + target_instances=[instance.to_dict() for instance in job.target_instances], + ) + ) + await self._session.flush() + return len(jobs) + + async def count_by_status(self, *, knowledge_graph_id: str) -> dict[str, int]: + stmt = ( + select(ExtractionJobModel.status, func.count()) + .where(ExtractionJobModel.knowledge_graph_id == knowledge_graph_id) + .group_by(ExtractionJobModel.status) + ) + result = await self._session.execute(stmt) + counts = {status.value: 0 for status in ExtractionJobStatus} + for status, count in result.all(): + counts[str(status)] = int(count) + return counts + + async def count_by_job_set(self, *, knowledge_graph_id: str) -> dict[str, dict[str, int]]: + stmt = ( + select( + ExtractionJobModel.job_set_name, + ExtractionJobModel.status, + func.count(), + ) + .where(ExtractionJobModel.knowledge_graph_id == knowledge_graph_id) + .group_by(ExtractionJobModel.job_set_name, ExtractionJobModel.status) + ) + result = await self._session.execute(stmt) + grouped: dict[str, dict[str, int]] = {} + for job_set_name, status, count in result.all(): + bucket = grouped.setdefault( + job_set_name, + { + "pending": 0, + "in_progress": 0, + "completed": 0, + "failed": 0, + "total": 0, + }, + ) + bucket[str(status)] = int(count) + bucket["total"] += int(count) + return grouped + + async def has_in_progress_jobs(self, *, knowledge_graph_id: str) -> bool: + stmt = select(func.count()).where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.IN_PROGRESS.value, + ) + result = await self._session.execute(stmt) + return int(result.scalar_one()) > 0 + + async def list_recent_jobs( + self, + *, + knowledge_graph_id: str, + limit: int = 20, + ) -> list[ExtractionJobRecord]: + stmt = ( + select(ExtractionJobModel) + .where(ExtractionJobModel.knowledge_graph_id == knowledge_graph_id) + .order_by( + ExtractionJobModel.updated_at.desc(), + ExtractionJobModel.order_index.asc(), + ) + .limit(limit) + ) + result = await self._session.execute(stmt) + return [_job_model_to_record(model) for model in result.scalars().all()] + + async def list_active_workers(self, *, knowledge_graph_id: str) -> list[dict[str, Any]]: + stmt = select(ExtractionJobModel).where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.IN_PROGRESS.value, + ) + result = await self._session.execute(stmt) + workers: list[dict[str, Any]] = [] + for model in result.scalars().all(): + workers.append( + { + "workerId": model.worker_id, + "jobId": model.job_id, + "jobSet": model.job_set_name, + "strategy": model.strategy, + "fileCount": 0, + "instanceCount": len(model.target_instances or []), + "startedAt": model.started_at.isoformat() if model.started_at else None, + } + ) + return workers + + async def claim_next_pending_job( + self, + *, + knowledge_graph_id: str, + worker_id: str, + ) -> ExtractionJobRecord | None: + stmt = ( + select(ExtractionJobModel) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.PENDING.value, + ) + .order_by(ExtractionJobModel.order_index.asc(), ExtractionJobModel.job_id.asc()) + .limit(1) + .with_for_update(skip_locked=True) + ) + result = await self._session.execute(stmt) + model = result.scalar_one_or_none() + if model is None: + return None + model.status = ExtractionJobStatus.IN_PROGRESS.value + model.worker_id = worker_id + model.started_at = datetime.now(UTC) + model.attempt = int(model.attempt) + 1 + await self._session.flush() + return _job_model_to_record(model) + + async def mark_job_completed( + self, + *, + knowledge_graph_id: str, + job_id: str, + metrics: dict[str, Any] | None = None, + ) -> None: + payload = metrics or {} + await self._session.execute( + update(ExtractionJobModel) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.job_id == job_id, + ) + .values( + status=ExtractionJobStatus.COMPLETED.value, + completed_at=datetime.now(UTC), + input_tokens=int(payload.get("input_tokens", 0)), + output_tokens=int(payload.get("output_tokens", 0)), + cache_read_tokens=int(payload.get("cache_read_tokens", 0)), + cache_creation_tokens=int(payload.get("cache_creation_tokens", 0)), + cost_usd=float(payload.get("cost_usd", 0.0)), + entities_created=int(payload.get("entities_created", 0)), + entities_modified=int(payload.get("entities_modified", 0)), + relationships_created=int(payload.get("relationships_created", 0)), + ) + ) + + async def mark_job_failed( + self, + *, + knowledge_graph_id: str, + job_id: str, + error_message: str, + ) -> None: + await self._session.execute( + update(ExtractionJobModel) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.job_id == job_id, + ) + .values( + status=ExtractionJobStatus.FAILED.value, + completed_at=datetime.now(UTC), + error_message=error_message, + ) + ) + + async def mark_in_progress_failed( + self, + *, + knowledge_graph_id: str, + error_message: str, + ) -> int: + result = await self._session.execute( + update(ExtractionJobModel) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.IN_PROGRESS.value, + ) + .values( + status=ExtractionJobStatus.FAILED.value, + completed_at=datetime.now(UTC), + error_message=error_message, + ) + ) + return int(result.rowcount or 0) + + async def reset_jobs_by_status( + self, + *, + knowledge_graph_id: str, + from_status: ExtractionJobStatus, + to_status: ExtractionJobStatus = ExtractionJobStatus.PENDING, + ) -> int: + result = await self._session.execute( + update(ExtractionJobModel) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == from_status.value, + ) + .values( + status=to_status.value, + worker_id=None, + started_at=None, + completed_at=None, + error_message=None, + ) + ) + return int(result.rowcount or 0) + + async def reset_all_non_pending( + self, + *, + knowledge_graph_id: str, + ) -> int: + total = 0 + for status in ( + ExtractionJobStatus.IN_PROGRESS, + ExtractionJobStatus.COMPLETED, + ExtractionJobStatus.FAILED, + ): + total += await self.reset_jobs_by_status( + knowledge_graph_id=knowledge_graph_id, + from_status=status, + ) + return total + + async def aggregate_token_metrics(self, *, knowledge_graph_id: str) -> dict[str, float | int]: + stmt = select( + func.coalesce(func.sum(ExtractionJobModel.input_tokens), 0), + func.coalesce(func.sum(ExtractionJobModel.output_tokens), 0), + func.coalesce(func.sum(ExtractionJobModel.cache_read_tokens), 0), + func.coalesce(func.sum(ExtractionJobModel.cache_creation_tokens), 0), + func.coalesce(func.sum(ExtractionJobModel.cost_usd), 0.0), + ).where(ExtractionJobModel.knowledge_graph_id == knowledge_graph_id) + result = await self._session.execute(stmt) + row = result.one() + return { + "totalInputTokens": int(row[0]), + "totalOutputTokens": int(row[1]), + "totalCacheReadTokens": int(row[2]), + "totalCacheCreationTokens": int(row[3]), + "totalCostUsd": float(row[4]), + } + + async def avg_completed_job_seconds(self, *, knowledge_graph_id: str) -> float | None: + stmt = select( + func.avg( + func.extract( + "epoch", + ExtractionJobModel.completed_at - ExtractionJobModel.started_at, + ) + ) + ).where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.COMPLETED.value, + ExtractionJobModel.started_at.is_not(None), + ExtractionJobModel.completed_at.is_not(None), + ) + result = await self._session.execute(stmt) + value = result.scalar_one_or_none() + if value is None: + return None + seconds = float(value) + return seconds if seconds > 0 else None + + async def get_run(self, *, knowledge_graph_id: str) -> ExtractionRunRecord | None: + stmt = select(ExtractionRunModel).where( + ExtractionRunModel.knowledge_graph_id == knowledge_graph_id + ) + result = await self._session.execute(stmt) + model = result.scalar_one_or_none() + return _run_model_to_record(model) if model else None + + async def upsert_run( + self, + *, + knowledge_graph_id: str, + status: ExtractionRunStatus, + worker_count: int, + pause_requested: bool = False, + orchestrator_pid: int | None = None, + started_at: datetime | None = None, + completed_at: datetime | None = None, + ) -> ExtractionRunRecord: + stmt = select(ExtractionRunModel).where( + ExtractionRunModel.knowledge_graph_id == knowledge_graph_id + ) + result = await self._session.execute(stmt) + model = result.scalar_one_or_none() + if model is None: + model = ExtractionRunModel( + id=str(ULID()), + knowledge_graph_id=knowledge_graph_id, + status=status.value, + worker_count=worker_count, + pause_requested=pause_requested, + orchestrator_pid=orchestrator_pid, + started_at=started_at, + completed_at=completed_at, + ) + self._session.add(model) + else: + model.status = status.value + model.worker_count = worker_count + model.pause_requested = pause_requested + model.orchestrator_pid = orchestrator_pid + if started_at is not None: + model.started_at = started_at + if completed_at is not None: + model.completed_at = completed_at + await self._session.flush() + return _run_model_to_record(model) + + async def set_pause_requested(self, *, knowledge_graph_id: str, pause_requested: bool) -> None: + await self._session.execute( + update(ExtractionRunModel) + .where(ExtractionRunModel.knowledge_graph_id == knowledge_graph_id) + .values(pause_requested=pause_requested) + ) + + async def is_pause_requested(self, *, knowledge_graph_id: str) -> bool: + stmt = select(ExtractionRunModel.pause_requested).where( + ExtractionRunModel.knowledge_graph_id == knowledge_graph_id + ) + result = await self._session.execute(stmt) + value = result.scalar_one_or_none() + return bool(value) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 5be9c6d77..42d9c95a5 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -52,6 +52,7 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): sticky_health_timeout_seconds: float = Field(default=90.0, ge=5.0, le=600.0) sticky_turn_timeout_seconds: float = Field(default=1000.0, ge=30.0, le=3600.0) sticky_max_turns: int = Field(default=500, ge=1, le=1000) + worker_poll_seconds: float = Field(default=1.0, ge=0.1, le=60.0) vertex_project_id: str = Field(default="") vertex_region: str = Field(default="us-east5") gcloud_config_mount: str | None = Field(default=None) diff --git a/src/api/infrastructure/management/extraction_job_materializer.py b/src/api/infrastructure/management/extraction_job_materializer.py new file mode 100644 index 000000000..b11431d5b --- /dev/null +++ b/src/api/infrastructure/management/extraction_job_materializer.py @@ -0,0 +1,134 @@ +"""Materialize extraction jobs from saved job set definitions.""" + +from __future__ import annotations + +import hashlib +import math +from typing import Any + +from ulid import ULID + +from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus, ExtractionTargetInstance +from management.domain.extraction_job_config import ( + ExtractionJobConfigDocument, + ExtractionJobSetDefinition, + ExtractionJobSetStrategy, +) + + +def _batch_items(items: list[Any], batch_size: int) -> list[list[Any]]: + return [items[i : i + batch_size] for i in range(0, len(items), batch_size)] + + +def _generate_job_id(job_set_name: str, batch_idx: int, content_hash: str) -> str: + hash_suffix = hashlib.sha256(content_hash.encode()).hexdigest()[:8] + return f"{job_set_name}_batch_{batch_idx:04d}_{hash_suffix}" + + +def entity_instance_counts_from_graph( + *, + knowledge_graph_id: str, + graph_data: dict[str, Any], +) -> dict[str, int]: + counts: dict[str, int] = {} + for node in graph_data.get("nodes", []): + if node.get("knowledge_graph_id") != knowledge_graph_id or node.get("_redacted"): + continue + entity_type = str(node.get("type") or "unknown") + counts[entity_type] = counts.get(entity_type, 0) + 1 + return counts + + +def entity_instances_by_type_from_graph( + *, + knowledge_graph_id: str, + graph_data: dict[str, Any], +) -> dict[str, list[ExtractionTargetInstance]]: + grouped: dict[str, list[ExtractionTargetInstance]] = {} + for node in sorted( + graph_data.get("nodes", []), + key=lambda item: str(item.get("slug") or item.get("domainId") or item.get("id") or ""), + ): + if node.get("knowledge_graph_id") != knowledge_graph_id or node.get("_redacted"): + continue + entity_type = str(node.get("type") or "unknown") + slug = str(node.get("slug") or node.get("domainId") or node.get("id") or "") + properties = { + key: value + for key, value in node.items() + if key + not in { + "id", + "slug", + "data_source_id", + "source_path", + "knowledge_graph_id", + "graph_id", + "name", + "type", + "domainId", + } + and not str(key).startswith("_") + } + grouped.setdefault(entity_type, []).append( + ExtractionTargetInstance(slug=slug, entity_type=entity_type, properties=properties) + ) + return grouped + + +def materialize_jobs_from_config( + *, + knowledge_graph_id: str, + config: ExtractionJobConfigDocument, + graph_data: dict[str, Any], +) -> list[ExtractionJobRecord]: + """Build pending extraction jobs from job set definitions and live graph instances.""" + instances_by_type = entity_instances_by_type_from_graph( + knowledge_graph_id=knowledge_graph_id, + graph_data=graph_data, + ) + jobs: list[ExtractionJobRecord] = [] + order_index = 0 + + for job_set in config.job_sets: + if job_set.strategy != ExtractionJobSetStrategy.BY_INSTANCES: + continue + entity_type = job_set.entity_type or "" + instances = instances_by_type.get(entity_type, []) + per_job = int(job_set.instances_per_job or 1) + if per_job < 1 or not instances: + continue + description = (job_set.description or "").strip() + for batch_idx, batch in enumerate(_batch_items(instances, per_job), start=1): + content_hash = "|".join(instance.slug for instance in batch) + job_id = _generate_job_id(job_set.name, batch_idx, content_hash) + jobs.append( + ExtractionJobRecord( + id=str(ULID()), + knowledge_graph_id=knowledge_graph_id, + job_id=job_id, + job_set_name=job_set.name, + strategy=job_set.strategy.value, + status=ExtractionJobStatus.PENDING, + order_index=order_index, + description=description, + target_instances=tuple(batch), + ) + ) + order_index += 1 + + return jobs + + +def projected_job_count( + job_set: ExtractionJobSetDefinition, + *, + entity_instance_counts: dict[str, int], +) -> int | None: + if job_set.strategy != ExtractionJobSetStrategy.BY_INSTANCES: + return None + total = entity_instance_counts.get(job_set.entity_type or "", 0) + per_job = job_set.instances_per_job + if total <= 0 or per_job is None or per_job < 1: + return 0 if total == 0 else None + return math.ceil(total / per_job) diff --git a/src/api/infrastructure/management/extraction_jobs_dependencies.py b/src/api/infrastructure/management/extraction_jobs_dependencies.py new file mode 100644 index 000000000..d45e7893c --- /dev/null +++ b/src/api/infrastructure/management/extraction_jobs_dependencies.py @@ -0,0 +1,49 @@ +"""Dependencies for extraction job endpoints.""" + +from __future__ import annotations + +from typing import Annotated + +from fastapi import Depends, Request +from sqlalchemy.ext.asyncio import AsyncSession + +from iam.application.value_objects import CurrentUser +from iam.dependencies.user import get_current_user +from infrastructure.database.connection_pool import ConnectionPool +from infrastructure.database.dependencies import get_write_session +from infrastructure.dependencies import get_age_connection_pool +from infrastructure.management.extraction_jobs_service import ExtractionJobsService +from infrastructure.outbox.repository import OutboxRepository +from management.application.services.knowledge_graph_service import KnowledgeGraphService +from management.dependencies.knowledge_graph import get_knowledge_graph_service +from management.infrastructure.repositories.knowledge_graph_repository import ( + KnowledgeGraphRepository, +) +from extraction.infrastructure.repositories.extraction_job_repository import ( + ExtractionJobRepository, +) + + +def get_write_sessionmaker(request: Request): + return request.app.state.write_sessionmaker + + +def get_extraction_jobs_service( + request: Request, + kg_service: Annotated[KnowledgeGraphService, Depends(get_knowledge_graph_service)], + session: Annotated[AsyncSession, Depends(get_write_session)], + pool: Annotated[ConnectionPool, Depends(get_age_connection_pool)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ExtractionJobsService: + outbox = OutboxRepository(session=session) + kg_repo = KnowledgeGraphRepository(session=session, outbox=outbox) + job_repo = ExtractionJobRepository(session=session) + return ExtractionJobsService( + knowledge_graph_service=kg_service, + knowledge_graph_repository=kg_repo, + extraction_job_repository=job_repo, + connection_pool=pool, + tenant_id=current_user.tenant_id.value, + session=session, + session_factory=get_write_sessionmaker(request), + ) diff --git a/src/api/infrastructure/management/extraction_jobs_service.py b/src/api/infrastructure/management/extraction_jobs_service.py new file mode 100644 index 000000000..a7a30a2db --- /dev/null +++ b/src/api/infrastructure/management/extraction_jobs_service.py @@ -0,0 +1,333 @@ +"""Application service for extraction job configuration and execution.""" + +from __future__ import annotations + +from typing import Any + +from starlette.concurrency import run_in_threadpool +from sqlalchemy.ext.asyncio import AsyncSession + +from infrastructure.management.extraction_job_materializer import ( + entity_instance_counts_from_graph, + materialize_jobs_from_config, + projected_job_count, +) +from extraction.infrastructure.extraction_run_orchestrator import get_extraction_run_orchestrator +from extraction.domain.extraction_job import ExtractionJobStatus, ExtractionRunStatus +from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository +from graph.infrastructure.bulk_data_reader import fetch_bulk_graph_data +from infrastructure.database.connection_pool import ConnectionPool +from management.application.services.knowledge_graph_service import KnowledgeGraphService +from management.domain.extraction_job_config import ( + ExtractionJobConfigDocument, + ExtractionJobSetDefinition, +) +from management.infrastructure.repositories.knowledge_graph_repository import ( + KnowledgeGraphRepository, +) + + +class ExtractionJobsService: + """Coordinate extraction job sets, materialization, and run orchestration.""" + + def __init__( + self, + *, + knowledge_graph_service: KnowledgeGraphService, + knowledge_graph_repository: KnowledgeGraphRepository, + extraction_job_repository: ExtractionJobRepository, + connection_pool: ConnectionPool, + tenant_id: str, + session: AsyncSession, + session_factory: Any, + ) -> None: + self._knowledge_graph_service = knowledge_graph_service + self._knowledge_graph_repository = knowledge_graph_repository + self._extraction_job_repository = extraction_job_repository + self._connection_pool = connection_pool + self._tenant_id = tenant_id + self._session = session + self._session_factory = session_factory + + async def _load_graph_data(self) -> dict[str, Any]: + graph_name = f"tenant_{self._tenant_id}" + return await run_in_threadpool( + fetch_bulk_graph_data, + self._connection_pool, + graph_name, + ) + + async def get_extraction_jobs_document( + self, + *, + user_id: str, + kg_id: str, + ) -> dict[str, Any] | None: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + return None + + config = await self._knowledge_graph_repository.get_extraction_job_config(kg_id) + document = config or ExtractionJobConfigDocument.empty() + graph_data = await self._load_graph_data() + counts = entity_instance_counts_from_graph( + knowledge_graph_id=kg_id, + graph_data=graph_data, + ) + entity_types = [ + {"name": name, "instance_count": count} + for name, count in sorted(counts.items(), key=lambda item: item[0]) + ] + return { + **document.to_dict(), + "entity_types": entity_types, + } + + async def save_extraction_jobs_document( + self, + *, + user_id: str, + kg_id: str, + payload: dict[str, Any], + ) -> dict[str, Any]: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + raise ValueError(f"Knowledge graph '{kg_id}' not found") + + document = ExtractionJobConfigDocument( + version=str(payload.get("version") or "1.0"), + job_sets=tuple( + ExtractionJobSetDefinition.from_dict(row) + for row in (payload.get("job_sets") or []) + ), + ) + graph_data = await self._load_graph_data() + counts = entity_instance_counts_from_graph( + knowledge_graph_id=kg_id, + graph_data=graph_data, + ) + errors = document.validation_errors(entity_instance_counts=counts) + if errors: + raise ValueError("; ".join(errors)) + + await self._knowledge_graph_repository.save_extraction_job_config(kg_id, document) + await self._session.commit() + return document.to_dict() + + async def regenerate_jobs( + self, + *, + user_id: str, + kg_id: str, + ) -> dict[str, Any]: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + raise ValueError(f"Knowledge graph '{kg_id}' not found") + + if await self._extraction_job_repository.has_in_progress_jobs(knowledge_graph_id=kg_id): + raise ValueError("Cannot regenerate jobs while extraction jobs are in progress.") + + config = await self._knowledge_graph_repository.get_extraction_job_config(kg_id) + document = config or ExtractionJobConfigDocument.empty() + graph_data = await self._load_graph_data() + jobs = materialize_jobs_from_config( + knowledge_graph_id=kg_id, + config=document, + graph_data=graph_data, + ) + generated = await self._extraction_job_repository.replace_pending_jobs( + knowledge_graph_id=kg_id, + jobs=jobs, + ) + await self._session.commit() + return {"success": True, "generated_jobs": generated} + + async def get_database_status( + self, + *, + user_id: str, + kg_id: str, + ) -> dict[str, Any] | None: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + return None + + counts = await self._extraction_job_repository.count_by_status(knowledge_graph_id=kg_id) + jobs_by_set = await self._extraction_job_repository.count_by_job_set( + knowledge_graph_id=kg_id + ) + recent_jobs = await self._extraction_job_repository.list_recent_jobs( + knowledge_graph_id=kg_id, + limit=20, + ) + active_workers = await self._extraction_job_repository.list_active_workers( + knowledge_graph_id=kg_id + ) + token_metrics = await self._extraction_job_repository.aggregate_token_metrics( + knowledge_graph_id=kg_id + ) + avg_completed = await self._extraction_job_repository.avg_completed_job_seconds( + knowledge_graph_id=kg_id + ) + graph_data = await self._load_graph_data() + entity_counts = entity_instance_counts_from_graph( + knowledge_graph_id=kg_id, + graph_data=graph_data, + ) + return { + "exists": True, + "jobsByStatus": { + "pending": counts.get("pending", 0), + "in_progress": counts.get("in_progress", 0), + "completed": counts.get("completed", 0), + "failed": counts.get("failed", 0), + }, + "jobsBySet": jobs_by_set, + "recentJobs": [ + { + "jobId": job.job_id, + "jobSet": job.job_set_name, + "status": job.status.value, + "workerId": job.worker_id, + "startedAt": job.started_at.isoformat() if job.started_at else None, + "completedAt": job.completed_at.isoformat() if job.completed_at else None, + "inputTokens": job.input_tokens, + "outputTokens": job.output_tokens, + "writeOps": job.entities_created + job.entities_modified + job.relationships_created, + "assistantPreview": job.description[:120] if job.description else None, + } + for job in recent_jobs + ], + "activeWorkers": active_workers, + "avgCompletedJobSeconds": avg_completed, + "entitiesByType": entity_counts, + "entitiesTotal": sum(entity_counts.values()), + **token_metrics, + } + + async def get_extraction_run_state( + self, + *, + user_id: str, + kg_id: str, + ) -> dict[str, Any] | None: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + return None + + run = await self._extraction_job_repository.get_run(knowledge_graph_id=kg_id) + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + live = orchestrator.is_live(knowledge_graph_id=kg_id) + if run is None: + return { + "live": live, + "status": ExtractionRunStatus.IDLE.value, + "workerCount": 0, + "pauseRequested": False, + } + return { + "live": live or run.status in {ExtractionRunStatus.RUNNING, ExtractionRunStatus.PAUSING}, + "status": run.status.value, + "workerCount": run.worker_count, + "pauseRequested": run.pause_requested, + "startedAt": run.started_at.isoformat() if run.started_at else None, + "completedAt": run.completed_at.isoformat() if run.completed_at else None, + "orchestratorPid": run.orchestrator_pid, + } + + async def get_extraction_plan_summary( + self, + *, + user_id: str, + kg_id: str, + ) -> dict[str, Any] | None: + payload = await self.get_extraction_jobs_document(user_id=user_id, kg_id=kg_id) + if payload is None: + return None + counts = { + row["name"]: row["instance_count"] for row in payload.get("entity_types", []) + } + job_sets = [] + for raw in payload.get("job_sets", []): + job_set = ExtractionJobSetDefinition.from_dict(raw) + job_sets.append( + { + **raw, + "projected_jobs": projected_job_count(job_set, entity_instance_counts=counts), + } + ) + return {"job_sets": job_sets, "entity_types": payload.get("entity_types", [])} + + async def start_extraction( + self, + *, + user_id: str, + kg_id: str, + workers: int, + ) -> dict[str, Any]: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + raise ValueError(f"Knowledge graph '{kg_id}' not found") + + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await orchestrator.start( + tenant_id=self._tenant_id, + knowledge_graph_id=kg_id, + worker_count=max(1, workers), + ) + await self._session.commit() + return { + "success": True, + "message": f"Started extraction with {max(1, workers)} worker(s).", + } + + async def pause_extraction(self, *, user_id: str, kg_id: str) -> dict[str, Any]: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + raise ValueError(f"Knowledge graph '{kg_id}' not found") + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await orchestrator.request_pause(knowledge_graph_id=kg_id) + await self._session.commit() + return {"success": True, "message": "Pause requested; in-flight jobs will finish first."} + + async def halt_extraction(self, *, user_id: str, kg_id: str) -> dict[str, Any]: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + raise ValueError(f"Knowledge graph '{kg_id}' not found") + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await orchestrator.halt(knowledge_graph_id=kg_id) + await self._session.commit() + return {"success": True, "message": "Extraction halted and incomplete jobs marked failed."} + + async def reset_stale_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: + _ = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + reset = await self._extraction_job_repository.reset_jobs_by_status( + knowledge_graph_id=kg_id, + from_status=ExtractionJobStatus.IN_PROGRESS, + ) + await self._session.commit() + return {"success": True, "reset_count": reset} + + async def reset_completed_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: + _ = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + reset = await self._extraction_job_repository.reset_jobs_by_status( + knowledge_graph_id=kg_id, + from_status=ExtractionJobStatus.COMPLETED, + ) + await self._session.commit() + return {"success": True, "reset_count": reset} + + async def reset_failed_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: + _ = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + reset = await self._extraction_job_repository.reset_jobs_by_status( + knowledge_graph_id=kg_id, + from_status=ExtractionJobStatus.FAILED, + ) + await self._session.commit() + return {"success": True, "reset_count": reset} + + async def reset_extraction(self, *, user_id: str, kg_id: str) -> dict[str, Any]: + _ = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + reset = await self._extraction_job_repository.reset_all_non_pending(knowledge_graph_id=kg_id) + await self._session.commit() + return {"success": True, "reset_count": reset} diff --git a/src/api/infrastructure/migrations/versions/h1i2j3k4l5m6_add_extraction_jobs_tables.py b/src/api/infrastructure/migrations/versions/h1i2j3k4l5m6_add_extraction_jobs_tables.py new file mode 100644 index 000000000..ba639191f --- /dev/null +++ b/src/api/infrastructure/migrations/versions/h1i2j3k4l5m6_add_extraction_jobs_tables.py @@ -0,0 +1,105 @@ +"""Add extraction job config and materialized extraction jobs tables. + +Revision ID: h1i2j3k4l5m6 +Revises: g9h0i1j2k3l4 +Create Date: 2026-06-05 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects.postgresql import JSONB + +revision: str = "h1i2j3k4l5m6" +down_revision: Union[str, Sequence[str], None] = "g9h0i1j2k3l4" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "knowledge_graphs", + sa.Column("extraction_job_config", JSONB(), nullable=True), + ) + + op.create_table( + "extraction_jobs", + sa.Column("id", sa.String(length=26), primary_key=True), + sa.Column("knowledge_graph_id", sa.String(length=26), nullable=False), + sa.Column("job_id", sa.String(length=128), nullable=False), + sa.Column("job_set_name", sa.String(length=128), nullable=False), + sa.Column("strategy", sa.String(length=32), nullable=False), + sa.Column("status", sa.String(length=32), nullable=False), + sa.Column("order_index", sa.Integer(), nullable=False, server_default="0"), + sa.Column("description", sa.Text(), nullable=False, server_default=""), + sa.Column("target_instances", JSONB(), nullable=False, server_default="[]"), + sa.Column("worker_id", sa.String(length=64), nullable=True), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("error_message", sa.Text(), nullable=True), + sa.Column("attempt", sa.Integer(), nullable=False, server_default="0"), + sa.Column("input_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column("output_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column("cache_read_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column("cache_creation_tokens", sa.Integer(), nullable=False, server_default="0"), + sa.Column("cost_usd", sa.Float(), nullable=False, server_default="0"), + sa.Column("entities_created", sa.Integer(), nullable=False, server_default="0"), + sa.Column("entities_modified", sa.Integer(), nullable=False, server_default="0"), + sa.Column("relationships_created", sa.Integer(), nullable=False, server_default="0"), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.UniqueConstraint( + "knowledge_graph_id", + "job_id", + name="uq_extraction_jobs_kg_job_id", + ), + ) + op.create_index("idx_extraction_jobs_kg_id", "extraction_jobs", ["knowledge_graph_id"]) + op.create_index("idx_extraction_jobs_status", "extraction_jobs", ["status"]) + + op.create_table( + "extraction_runs", + sa.Column("id", sa.String(length=26), primary_key=True), + sa.Column("knowledge_graph_id", sa.String(length=26), nullable=False), + sa.Column("status", sa.String(length=32), nullable=False), + sa.Column("worker_count", sa.Integer(), nullable=False, server_default="1"), + sa.Column("pause_requested", sa.Boolean(), nullable=False, server_default="false"), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("orchestrator_pid", sa.Integer(), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + nullable=False, + server_default=sa.text("now()"), + ), + sa.UniqueConstraint( + "knowledge_graph_id", + name="uq_extraction_runs_kg_id", + ), + ) + + +def downgrade() -> None: + op.drop_table("extraction_runs") + op.drop_index("idx_extraction_jobs_status", table_name="extraction_jobs") + op.drop_index("idx_extraction_jobs_kg_id", table_name="extraction_jobs") + op.drop_table("extraction_jobs") + op.drop_column("knowledge_graphs", "extraction_job_config") diff --git a/src/api/management/domain/extraction_job_config.py b/src/api/management/domain/extraction_job_config.py new file mode 100644 index 000000000..cff6445d0 --- /dev/null +++ b/src/api/management/domain/extraction_job_config.py @@ -0,0 +1,130 @@ +"""Extraction job set configuration value objects.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import StrEnum +from typing import Any + + +class ExtractionJobSetStrategy(StrEnum): + """Batching strategy for an extraction job set.""" + + BY_INSTANCES = "by_instances" + BY_FILES = "by_files" + + +@dataclass(frozen=True) +class ExtractionJobSetDefinition: + """One job set describing how to batch extraction work.""" + + name: str + strategy: ExtractionJobSetStrategy + description: str | None = None + entity_type: str | None = None + instances_per_job: int | None = None + file_patterns: tuple[str, ...] = field(default_factory=tuple) + files_per_job: int | None = None + + def __post_init__(self) -> None: + if not self.name or not self.name.strip(): + raise ValueError("Job set name must not be empty") + + def validation_errors(self, *, entity_instance_counts: dict[str, int]) -> tuple[str, ...]: + """Return human-readable validation errors for this job set.""" + errors: list[str] = [] + if self.strategy == ExtractionJobSetStrategy.BY_INSTANCES: + if not self.entity_type or not self.entity_type.strip(): + errors.append(f"{self.name}: entity type is required for by_instances.") + else: + count = entity_instance_counts.get(self.entity_type, 0) + if count <= 0: + errors.append( + f"{self.name}: selected entity type '{self.entity_type}' has 0 instances." + ) + per_job = self.instances_per_job + if per_job is None or not isinstance(per_job, int) or per_job < 1: + errors.append(f"{self.name}: instances_per_job must be an integer >= 1.") + if not self.description or not self.description.strip(): + errors.append( + f"{self.name}: per-instance extraction description is required." + ) + elif self.strategy == ExtractionJobSetStrategy.BY_FILES: + if not self.file_patterns: + errors.append(f"{self.name}: at least one file pattern is required for by_files.") + per_job = self.files_per_job + if per_job is None or not isinstance(per_job, int) or per_job < 1: + errors.append(f"{self.name}: files_per_job must be an integer >= 1.") + return tuple(errors) + + def to_dict(self) -> dict[str, Any]: + payload: dict[str, Any] = { + "name": self.name, + "strategy": self.strategy.value, + } + if self.description: + payload["description"] = self.description + if self.strategy == ExtractionJobSetStrategy.BY_INSTANCES: + if self.entity_type: + payload["entity_type"] = self.entity_type + if self.instances_per_job is not None: + payload["instances_per_job"] = self.instances_per_job + else: + payload["file_patterns"] = list(self.file_patterns) + if self.files_per_job is not None: + payload["files_per_job"] = self.files_per_job + return payload + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> ExtractionJobSetDefinition: + strategy = ExtractionJobSetStrategy(str(data["strategy"])) + raw_patterns = data.get("file_patterns") or [] + return cls( + name=str(data["name"]), + strategy=strategy, + description=str(data["description"]).strip() if data.get("description") else None, + entity_type=str(data["entity_type"]).strip() if data.get("entity_type") else None, + instances_per_job=int(data["instances_per_job"]) + if data.get("instances_per_job") is not None + else None, + file_patterns=tuple(str(pattern) for pattern in raw_patterns), + files_per_job=int(data["files_per_job"]) if data.get("files_per_job") is not None else None, + ) + + +@dataclass(frozen=True) +class ExtractionJobConfigDocument: + """Persisted extraction job configuration for one knowledge graph.""" + + version: str + job_sets: tuple[ExtractionJobSetDefinition, ...] = field(default_factory=tuple) + + def validation_errors(self, *, entity_instance_counts: dict[str, int]) -> tuple[str, ...]: + errors: list[str] = [] + seen_names: set[str] = set() + for job_set in self.job_sets: + if job_set.name in seen_names: + errors.append(f"Duplicate job set name '{job_set.name}'.") + seen_names.add(job_set.name) + errors.extend(job_set.validation_errors(entity_instance_counts=entity_instance_counts)) + return tuple(errors) + + def to_dict(self) -> dict[str, Any]: + return { + "version": self.version, + "job_sets": [job_set.to_dict() for job_set in self.job_sets], + } + + @classmethod + def from_dict(cls, data: dict[str, Any] | None) -> ExtractionJobConfigDocument | None: + if not data: + return None + raw_sets = data.get("job_sets") or [] + return cls( + version=str(data.get("version") or "1.0"), + job_sets=tuple(ExtractionJobSetDefinition.from_dict(row) for row in raw_sets), + ) + + @classmethod + def empty(cls) -> ExtractionJobConfigDocument: + return cls(version="1.0", job_sets=()) diff --git a/src/api/management/infrastructure/models/knowledge_graph.py b/src/api/management/infrastructure/models/knowledge_graph.py index e6eec1dd4..c8a64c07b 100644 --- a/src/api/management/infrastructure/models/knowledge_graph.py +++ b/src/api/management/infrastructure/models/knowledge_graph.py @@ -47,6 +47,9 @@ class KnowledgeGraphModel(Base, TimestampMixin): String(26), nullable=True ) ontology: Mapped[dict | None] = mapped_column(JSONB, nullable=True, default=None) + extraction_job_config: Mapped[dict | None] = mapped_column( + JSONB, nullable=True, default=None + ) maintenance_schedule: Mapped[dict | None] = mapped_column( JSONB, nullable=True, default=None ) diff --git a/src/api/management/infrastructure/repositories/knowledge_graph_repository.py b/src/api/management/infrastructure/repositories/knowledge_graph_repository.py index e9e12ab97..ec086af06 100644 --- a/src/api/management/infrastructure/repositories/knowledge_graph_repository.py +++ b/src/api/management/infrastructure/repositories/knowledge_graph_repository.py @@ -13,6 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from management.domain.aggregates import KnowledgeGraph +from management.domain.extraction_job_config import ExtractionJobConfigDocument from management.domain.value_objects import ( KnowledgeGraphMaintenanceRunRecord, KnowledgeGraphMaintenanceSchedule, @@ -246,6 +247,31 @@ async def get_ontology(self, kg_id: str) -> OntologyConfig | None: return OntologyConfig.from_dict(model.ontology) + async def save_extraction_job_config( + self, + kg_id: str, + config: ExtractionJobConfigDocument, + ) -> None: + from sqlalchemy import update + + stmt = ( + update(KnowledgeGraphModel) + .where(KnowledgeGraphModel.id == kg_id) + .values(extraction_job_config=config.to_dict()) + ) + result = await self._session.execute(stmt) + await self._session.flush() + if result.rowcount == 0: # type: ignore[attr-defined] + raise KnowledgeGraphNotFoundError(f"Knowledge graph '{kg_id}' not found") + + async def get_extraction_job_config(self, kg_id: str) -> ExtractionJobConfigDocument | None: + stmt = select(KnowledgeGraphModel).where(KnowledgeGraphModel.id == kg_id) + result = await self._session.execute(stmt) + model = result.scalar_one_or_none() + if model is None: + return None + return ExtractionJobConfigDocument.from_dict(model.extraction_job_config) + def _to_domain(self, model: KnowledgeGraphModel) -> KnowledgeGraph: """Reconstitute aggregate from database state without generating events.""" ontology: OntologyConfig | None = None diff --git a/src/api/management/presentation/knowledge_graphs/__init__.py b/src/api/management/presentation/knowledge_graphs/__init__.py index 54b023c90..d82e940c0 100644 --- a/src/api/management/presentation/knowledge_graphs/__init__.py +++ b/src/api/management/presentation/knowledge_graphs/__init__.py @@ -2,6 +2,15 @@ from __future__ import annotations -from management.presentation.knowledge_graphs.routes import router +from fastapi import APIRouter + +from management.presentation.knowledge_graphs.extraction_jobs_routes import ( + router as extraction_jobs_router, +) +from management.presentation.knowledge_graphs.routes import router as knowledge_graphs_router + +router = APIRouter() +router.include_router(knowledge_graphs_router) +router.include_router(extraction_jobs_router) __all__ = ["router"] diff --git a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py new file mode 100644 index 000000000..4758fcde0 --- /dev/null +++ b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py @@ -0,0 +1,306 @@ +"""HTTP routes for extraction job configuration and execution.""" + +from __future__ import annotations + +from typing import Annotated, Any + +from fastapi import APIRouter, Depends, HTTPException, status +from pydantic import BaseModel, Field + +from iam.application.value_objects import CurrentUser +from iam.dependencies.user import get_current_user +from infrastructure.management.extraction_jobs_service import ExtractionJobsService +from infrastructure.management.extraction_jobs_dependencies import get_extraction_jobs_service +from management.ports.exceptions import UnauthorizedError + +router = APIRouter(tags=["extraction-jobs"]) + + +class ExtractionJobSetModel(BaseModel): + name: str + strategy: str + description: str | None = None + entity_type: str | None = None + instances_per_job: int | None = None + file_patterns: list[str] = Field(default_factory=list) + files_per_job: int | None = None + + +class ExtractionJobsDocumentRequest(BaseModel): + version: str = "1.0" + job_sets: list[ExtractionJobSetModel] = Field(default_factory=list) + + +class ExtractionJobsDocumentResponse(BaseModel): + version: str + job_sets: list[dict[str, Any]] + entity_types: list[dict[str, Any]] = Field(default_factory=list) + + +class StartExtractionRequest(BaseModel): + workers: int = Field(default=2, ge=1, le=32) + + +class ActionResponse(BaseModel): + success: bool + message: str | None = None + generated_jobs: int | None = None + reset_count: int | None = None + + +def _handle_value_error(exc: ValueError) -> HTTPException: + return HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(exc)) + + +@router.get( + "/knowledge-graphs/{kg_id}/extraction-jobs", + response_model=ExtractionJobsDocumentResponse, +) +async def get_extraction_jobs( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ExtractionJobsDocumentResponse: + try: + payload = await service.get_extraction_jobs_document( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge graph not found") + return ExtractionJobsDocumentResponse.model_validate(payload) + + +@router.put( + "/knowledge-graphs/{kg_id}/extraction-jobs", + response_model=ExtractionJobsDocumentResponse, +) +async def save_extraction_jobs( + kg_id: str, + body: ExtractionJobsDocumentRequest, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ExtractionJobsDocumentResponse: + try: + saved = await service.save_extraction_jobs_document( + user_id=current_user.user_id.value, + kg_id=kg_id, + payload=body.model_dump(), + ) + regenerated = await service.regenerate_jobs( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + except ValueError as exc: + raise _handle_value_error(exc) + payload = await service.get_extraction_jobs_document( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge graph not found") + payload["last_regenerated_jobs"] = regenerated.get("generated_jobs") + return ExtractionJobsDocumentResponse.model_validate(payload) + + +@router.post( + "/knowledge-graphs/{kg_id}/extraction-jobs/regenerate", + response_model=ActionResponse, +) +async def regenerate_extraction_jobs( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.regenerate_jobs( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + except ValueError as exc: + raise _handle_value_error(exc) + return ActionResponse( + success=True, + message=f"Regenerated {result.get('generated_jobs', 0)} jobs", + generated_jobs=int(result.get("generated_jobs") or 0), + ) + + +@router.get("/knowledge-graphs/{kg_id}/extraction-jobs/database-status") +async def get_extraction_database_status( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> dict[str, Any]: + try: + payload = await service.get_database_status( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge graph not found") + return payload + + +@router.get("/knowledge-graphs/{kg_id}/extraction-jobs/run-state") +async def get_extraction_run_state( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> dict[str, Any]: + try: + payload = await service.get_extraction_run_state( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge graph not found") + return payload + + +@router.get("/knowledge-graphs/{kg_id}/extraction-jobs/plan-summary") +async def get_extraction_plan_summary( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> dict[str, Any]: + try: + payload = await service.get_extraction_plan_summary( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge graph not found") + return payload + + +@router.post("/knowledge-graphs/{kg_id}/extraction-jobs/start", response_model=ActionResponse) +async def start_extraction( + kg_id: str, + body: StartExtractionRequest, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.start_extraction( + user_id=current_user.user_id.value, + kg_id=kg_id, + workers=body.workers, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + except ValueError as exc: + raise _handle_value_error(exc) + return ActionResponse(success=True, message=result.get("message")) + + +@router.post("/knowledge-graphs/{kg_id}/extraction-jobs/pause", response_model=ActionResponse) +async def pause_extraction( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.pause_extraction( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + except ValueError as exc: + raise _handle_value_error(exc) + return ActionResponse(success=True, message=result.get("message")) + + +@router.post("/knowledge-graphs/{kg_id}/extraction-jobs/halt", response_model=ActionResponse) +async def halt_extraction( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.halt_extraction( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + except ValueError as exc: + raise _handle_value_error(exc) + return ActionResponse(success=True, message=result.get("message")) + + +@router.post("/knowledge-graphs/{kg_id}/extraction-jobs/reset-stale", response_model=ActionResponse) +async def reset_stale_jobs( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.reset_stale_jobs( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + return ActionResponse(success=True, reset_count=int(result.get("reset_count") or 0)) + + +@router.post("/knowledge-graphs/{kg_id}/extraction-jobs/reset-completed", response_model=ActionResponse) +async def reset_completed_jobs( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.reset_completed_jobs( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + return ActionResponse(success=True, reset_count=int(result.get("reset_count") or 0)) + + +@router.post("/knowledge-graphs/{kg_id}/extraction-jobs/reset-failed", response_model=ActionResponse) +async def reset_failed_jobs( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.reset_failed_jobs( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + return ActionResponse(success=True, reset_count=int(result.get("reset_count") or 0)) + + +@router.post("/knowledge-graphs/{kg_id}/extraction-jobs/reset", response_model=ActionResponse) +async def reset_extraction_jobs( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.reset_extraction( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + return ActionResponse(success=True, reset_count=int(result.get("reset_count") or 0)) diff --git a/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer.py b/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer.py new file mode 100644 index 000000000..b77f3827d --- /dev/null +++ b/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer.py @@ -0,0 +1,54 @@ +"""Unit tests for extraction job materialization.""" + +from infrastructure.management.extraction_job_materializer import materialize_jobs_from_config +from management.domain.extraction_job_config import ( + ExtractionJobConfigDocument, + ExtractionJobSetDefinition, + ExtractionJobSetStrategy, +) + + +def test_materialize_by_instances_batches_graph_nodes() -> None: + config = ExtractionJobConfigDocument( + version="1.0", + job_sets=( + ExtractionJobSetDefinition( + name="features", + strategy=ExtractionJobSetStrategy.BY_INSTANCES, + entity_type="Feature", + instances_per_job=2, + description="Extract acceptance criteria for each feature instance.", + ), + ), + ) + graph_data = { + "nodes": [ + { + "knowledge_graph_id": "kg-1", + "type": "Feature", + "slug": "feature-a", + }, + { + "knowledge_graph_id": "kg-1", + "type": "Feature", + "slug": "feature-b", + }, + { + "knowledge_graph_id": "kg-1", + "type": "Feature", + "slug": "feature-c", + }, + ], + "edges": [], + } + + jobs = materialize_jobs_from_config( + knowledge_graph_id="kg-1", + config=config, + graph_data=graph_data, + ) + + assert len(jobs) == 2 + assert jobs[0].target_instances[0].slug == "feature-a" + assert jobs[0].description.startswith("Extract acceptance") + assert all(job.status.value == "pending" for job in jobs) diff --git a/src/api/tests/unit/management/domain/test_extraction_job_config.py b/src/api/tests/unit/management/domain/test_extraction_job_config.py new file mode 100644 index 000000000..3e505b1f3 --- /dev/null +++ b/src/api/tests/unit/management/domain/test_extraction_job_config.py @@ -0,0 +1,42 @@ +"""Unit tests for extraction job set configuration.""" + +from management.domain.extraction_job_config import ( + ExtractionJobConfigDocument, + ExtractionJobSetDefinition, + ExtractionJobSetStrategy, +) + + +def test_by_instances_requires_description_and_entity_type() -> None: + job_set = ExtractionJobSetDefinition( + name="component_tests", + strategy=ExtractionJobSetStrategy.BY_INSTANCES, + entity_type="ComponentTest", + instances_per_job=4, + ) + errors = job_set.validation_errors(entity_instance_counts={"ComponentTest": 10}) + assert any("description" in err.lower() for err in errors) + + +def test_document_rejects_duplicate_job_set_names() -> None: + document = ExtractionJobConfigDocument( + version="1.0", + job_sets=( + ExtractionJobSetDefinition( + name="set_a", + strategy=ExtractionJobSetStrategy.BY_INSTANCES, + entity_type="Feature", + instances_per_job=2, + description="Extract feature details", + ), + ExtractionJobSetDefinition( + name="set_a", + strategy=ExtractionJobSetStrategy.BY_INSTANCES, + entity_type="Feature", + instances_per_job=2, + description="Duplicate name", + ), + ), + ) + errors = document.validation_errors(entity_instance_counts={"Feature": 3}) + assert any("Duplicate" in err for err in errors) diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue new file mode 100644 index 000000000..161efcb17 --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue @@ -0,0 +1,319 @@ +<script setup lang="ts"> +import { computed, ref, watch } from 'vue' +import { toast } from 'vue-sonner' +import { Loader2, Save, Layers, FolderSearch, Network, Sparkles } from 'lucide-vue-next' +import { Card, CardHeader, CardTitle, CardDescription, CardContent, CardFooter } from '@/components/ui/card' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' + +const props = withDefaults( + defineProps<{ + kgId: string + reloadNonce?: number + embedded?: boolean + }>(), + { reloadNonce: 0, embedded: true }, +) + +const emit = defineEmits<{ + saved: [] +}>() + +const { apiFetch } = useApiClient() + +const inputClass = + 'flex h-10 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground shadow-sm transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-primary/40' + +interface ExtractionJobSet { + name: string + description?: string + strategy: 'by_instances' | 'by_files' + entity_type?: string + instances_per_job?: number + file_patterns?: string[] + files_per_job?: number +} + +interface EntityTypeOption { + name: string + instance_count: number +} + +interface ExtractionJobsDocument { + version: string + job_sets: ExtractionJobSet[] +} + +interface ExtractionJobsGetResponse extends ExtractionJobsDocument { + entity_types?: EntityTypeOption[] +} + +const loading = ref(true) +const saving = ref(false) +const doc = ref<ExtractionJobsDocument | null>(null) +const entityTypeOptions = ref<EntityTypeOption[]>([]) + +function cloneDoc(d: ExtractionJobsDocument): ExtractionJobsDocument { + return JSON.parse(JSON.stringify(d)) as ExtractionJobsDocument +} + +async function load() { + loading.value = true + try { + const data = await apiFetch<ExtractionJobsGetResponse>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`, + ) + entityTypeOptions.value = Array.isArray(data.entity_types) + ? [...data.entity_types].sort((a, b) => a.name.localeCompare(b.name)) + : [] + doc.value = cloneDoc({ + version: data.version || '1.0', + job_sets: Array.isArray(data.job_sets) ? data.job_sets : [], + }) + } catch (e: unknown) { + const msg = e instanceof Error ? e.message : String(e) + toast.error('Failed to load extraction jobs', { description: msg }) + doc.value = { version: '1.0', job_sets: [] } + } finally { + loading.value = false + } +} + +function onStrategyChange(js: ExtractionJobSet, strategy: 'by_instances' | 'by_files') { + if (js.strategy === strategy) return + js.strategy = strategy + if (strategy === 'by_instances') { + delete js.file_patterns + delete js.files_per_job + if (!js.entity_type?.trim()) js.entity_type = entityTypeOptions.value[0]?.name ?? '' + if (js.instances_per_job === undefined) js.instances_per_job = 4 + } else { + delete js.entity_type + delete js.instances_per_job + if (!js.file_patterns?.length) js.file_patterns = ['**/*'] + if (js.files_per_job === undefined) js.files_per_job = 10 + } +} + +function addJobSet() { + if (!doc.value) return + const index = doc.value.job_sets.length + 1 + doc.value.job_sets.push({ + name: `job_set_${index}`, + strategy: 'by_instances', + entity_type: entityTypeOptions.value[0]?.name ?? '', + instances_per_job: 4, + description: '', + }) +} + +function buildPayload(): ExtractionJobsDocument { + if (!doc.value) throw new Error('No document loaded') + return { + version: doc.value.version || '1.0', + job_sets: doc.value.job_sets.map((js) => { + const base = { name: js.name, strategy: js.strategy } as ExtractionJobSet + if (typeof js.description === 'string' && js.description.trim()) { + base.description = js.description.trim() + } + if (js.strategy === 'by_instances') { + base.entity_type = js.entity_type ?? '' + const n = Number(js.instances_per_job) + if (Number.isFinite(n) && n >= 1) base.instances_per_job = Math.floor(n) + return base + } + base.file_patterns = Array.isArray(js.file_patterns) ? [...js.file_patterns] : [] + const f = Number(js.files_per_job) + if (Number.isFinite(f) && f >= 1) base.files_per_job = Math.floor(f) + return base + }), + } +} + +function getEntityTypeInstanceCount(entityType?: string): number | null { + if (!entityType?.trim()) return null + const hit = entityTypeOptions.value.find((x) => x.name === entityType) + return hit ? hit.instance_count : null +} + +function jobSetErrors(js: ExtractionJobSet): string[] { + const errs: string[] = [] + if (js.strategy === 'by_instances') { + if (!js.entity_type?.trim()) errs.push('Entity type is required for by_instances.') + if (getEntityTypeInstanceCount(js.entity_type) === 0) { + errs.push('Selected entity type has 0 instances.') + } + const n = Number(js.instances_per_job) + if (!Number.isInteger(n) || n < 1) errs.push('instances_per_job must be an integer greater than 0.') + if (!js.description?.trim()) errs.push('Per-instance extraction description is required.') + } + return errs +} + +function projectedJobCount(js: ExtractionJobSet): number | null { + if (js.strategy !== 'by_instances') return null + const total = getEntityTypeInstanceCount(js.entity_type) + const perJob = Number(js.instances_per_job) + if (total === null || !Number.isInteger(perJob) || perJob < 1) return null + if (total <= 0) return 0 + return Math.ceil(total / perJob) +} + +const hasValidationErrors = computed(() => { + if (!doc.value) return false + return doc.value.job_sets.some((js) => jobSetErrors(js).length > 0) +}) + +async function save() { + if (!doc.value) return + saving.value = true + try { + await apiFetch( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`, + { method: 'PUT', body: buildPayload() }, + ) + toast.success('Saved job sets and regenerated pending jobs') + emit('saved') + await load() + } catch (e: unknown) { + const msg = e instanceof Error ? e.message : String(e) + toast.error('Save failed', { description: msg }) + } finally { + saving.value = false + } +} + +watch( + () => [props.kgId, props.reloadNonce] as const, + () => { void load() }, + { immediate: true }, +) + +defineExpose({ refresh: load }) +</script> + +<template> + <div class="space-y-4"> + <div class="flex flex-wrap items-start gap-3 border-b pb-3"> + <div class="flex size-9 shrink-0 items-center justify-center rounded-full bg-primary font-bold text-primary-foreground"> + <Layers class="size-4" /> + </div> + <div> + <h2 class="text-lg font-semibold tracking-tight">Extraction Job — Job Sets</h2> + <p class="text-xs text-muted-foreground"> + Define how extraction work is batched. Each job set runs to completion before the next begins. + Use per-instance descriptions to guide extraction agents (no separate extraction plan document). + </p> + </div> + </div> + + <div v-if="loading" class="flex items-center justify-center py-12"> + <Loader2 class="size-8 animate-spin text-muted-foreground" /> + </div> + + <template v-else-if="doc"> + <Card> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base"> + <Sparkles class="size-4 text-primary" /> + Job sets + </CardTitle> + <CardDescription> + Author with the assistant above or edit directly. Save regenerates pending jobs from live graph instances. + </CardDescription> + </CardHeader> + <CardContent class="space-y-6"> + <div v-if="doc.job_sets.length === 0" class="text-sm text-muted-foreground"> + No job sets yet. Add one below or ask the assistant to define extraction batches. + </div> + + <div + v-for="(js, idx) in doc.job_sets" + :key="`${js.name}-${idx}`" + class="space-y-4 rounded-xl border border-cyan-500/30 bg-gradient-to-br from-cyan-500/10 via-card to-card p-4 md:p-5" + > + <div class="flex flex-wrap items-start justify-between gap-3"> + <div class="space-y-1"> + <input v-model="js.name" :class="inputClass" placeholder="Job set name" /> + </div> + <Badge variant="outline" class="text-[11px]">#{{ idx + 1 }}</Badge> + </div> + + <div class="grid gap-2 sm:grid-cols-2"> + <button + type="button" + class="flex items-start gap-2 rounded-lg border px-3 py-2 text-left text-sm" + :class="js.strategy === 'by_instances' ? 'border-primary/40 bg-primary/15' : 'border-border'" + @click="onStrategyChange(js, 'by_instances')" + > + <Network class="mt-0.5 size-4 shrink-0 text-cyan-500" /> + <span> + <span class="block font-medium">By instances</span> + <span class="block text-[11px] text-muted-foreground">Enrich known entities</span> + </span> + </button> + <button + type="button" + class="flex items-start gap-2 rounded-lg border px-3 py-2 text-left text-sm" + :class="js.strategy === 'by_files' ? 'border-primary/40 bg-primary/15' : 'border-border'" + @click="onStrategyChange(js, 'by_files')" + > + <FolderSearch class="mt-0.5 size-4 shrink-0 text-violet-500" /> + <span> + <span class="block font-medium">By files</span> + <span class="block text-[11px] text-muted-foreground">Discover from file patterns</span> + </span> + </button> + </div> + + <template v-if="js.strategy === 'by_instances'"> + <div class="space-y-1.5"> + <label class="text-xs font-medium">Entity type</label> + <select v-model="js.entity_type" :class="inputClass"> + <option value="" disabled>Select entity type</option> + <option v-for="opt in entityTypeOptions" :key="opt.name" :value="opt.name"> + {{ opt.name }} ({{ opt.instance_count }} instances) + </option> + </select> + </div> + <div class="grid gap-3 sm:grid-cols-2"> + <div class="space-y-1.5"> + <label class="text-xs font-medium">Instances per job</label> + <input v-model.number="js.instances_per_job" type="number" min="1" :class="inputClass" /> + </div> + <div class="space-y-1.5"> + <label class="text-xs font-medium">Projected jobs</label> + <div class="flex h-10 items-center rounded-lg border border-dashed bg-muted/30 px-3 font-mono text-sm"> + {{ projectedJobCount(js) ?? '—' }} + </div> + </div> + </div> + </template> + + <div class="space-y-1.5"> + <label class="text-xs font-medium">Per-instance extraction description</label> + <textarea + v-model="js.description" + rows="3" + class="w-full rounded-lg border border-border bg-background px-3 py-2 text-sm" + placeholder="Describe what to extract for each instance in this job set." + /> + </div> + + <ul v-if="jobSetErrors(js).length" class="list-disc space-y-1 pl-4 text-xs text-destructive"> + <li v-for="(err, ei) in jobSetErrors(js)" :key="`${idx}-err-${ei}`">{{ err }}</li> + </ul> + </div> + </CardContent> + <CardFooter class="flex flex-wrap gap-2"> + <Button size="sm" variant="outline" @click="addJobSet">Add job set</Button> + <Button size="sm" :disabled="saving || hasValidationErrors" @click="save"> + <Loader2 v-if="saving" class="mr-1.5 size-3.5 animate-spin" /> + <Save v-else class="mr-1.5 size-3.5" /> + Save job sets + </Button> + </CardFooter> + </Card> + </template> + </div> +</template> diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue new file mode 100644 index 000000000..6ce4494c4 --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue @@ -0,0 +1,507 @@ +<script setup lang="ts"> +import { computed, onMounted, onUnmounted, ref, watch } from 'vue' +import { toast } from 'vue-sonner' +import { + Loader2, + RefreshCw, + Play, + Settings, + ClipboardList, + AlertCircle, +} from 'lucide-vue-next' +import { Card, CardHeader, CardTitle, CardDescription, CardContent } from '@/components/ui/card' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' +import { Separator } from '@/components/ui/separator' +import GraphExtractionJobSetsPanel from '@/components/graph-management/GraphExtractionJobSetsPanel.vue' +import GraphDesignEntitiesPanel from '@/components/graph-management/GraphDesignEntitiesPanel.vue' +import GraphDesignRelationshipsPanel from '@/components/graph-management/GraphDesignRelationshipsPanel.vue' + +const props = defineProps<{ + kgId: string + reloadNonce?: number +}>() + +const { apiFetch } = useApiClient() + +type OntologyTab = 'entities' | 'relationships' + +interface DbStatus { + jobsByStatus: Record<string, number> + jobsBySet?: Record<string, { pending: number; in_progress: number; completed: number; failed: number; total: number }> + avgCompletedJobSeconds?: number | null + totalInputTokens: number + totalOutputTokens: number + totalCacheReadTokens: number + totalCacheCreationTokens: number + totalCostUsd: number + recentJobs: Array<{ + jobId: string + jobSet: string + status: string + workerId: string | null + startedAt: string | null + completedAt: string | null + inputTokens: number + outputTokens: number + writeOps: number + assistantPreview: string | null + }> + activeWorkers?: Array<{ + workerId: string + jobId: string + jobSet: string + strategy: string + instanceCount: number + startedAt: string | null + }> +} + +interface ExtractionRunState { + live: boolean + status: string + workerCount: number + pauseRequested: boolean +} + +interface PlanSummary { + job_sets: Array<{ + name: string + strategy: string + entity_type?: string + instances_per_job?: number + projected_jobs?: number | null + }> +} + +const selectedOntologyTab = ref<OntologyTab>('entities') +const jobSetsReloadNonce = ref(0) +const dbStatus = ref<DbStatus | null>(null) +const dbLoading = ref(true) +const dbError = ref<string | null>(null) +const extractionRunState = ref<ExtractionRunState | null>(null) +const planSummary = ref<PlanSummary | null>(null) +const workers = ref(2) +const startingExtraction = ref(false) +const pausingExtraction = ref(false) +const killingExtraction = ref(false) +const regeneratingJobs = ref(false) +const resettingRunning = ref(false) +const resettingCompleted = ref(false) +const resettingFailed = ref(false) +const resettingAll = ref(false) +const optimisticLiveUntilMs = ref<number | null>(null) +const nowMs = ref(Date.now()) + +let autoRefreshInterval: ReturnType<typeof setInterval> | null = null +let clockInterval: ReturnType<typeof setInterval> | null = null + +const basePath = computed(() => `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`) + +async function loadDatabaseStatus() { + dbLoading.value = true + dbError.value = null + try { + dbStatus.value = await apiFetch<DbStatus>(`${basePath.value}/database-status`) + } catch (e: unknown) { + dbError.value = e instanceof Error ? e.message : 'Failed to load status' + } finally { + dbLoading.value = false + } +} + +async function loadExtractionRunState() { + extractionRunState.value = await apiFetch<ExtractionRunState>(`${basePath.value}/run-state`) +} + +async function loadPlanSummary() { + planSummary.value = await apiFetch<PlanSummary>(`${basePath.value}/plan-summary`) +} + +async function refreshAll() { + await Promise.all([loadDatabaseStatus(), loadExtractionRunState(), loadPlanSummary()]) +} + +const workerCount = computed(() => Math.max(1, Math.floor(Number(workers.value) || 1))) +const pendingJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.pending || 0)) +const inProgressJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.in_progress || 0)) +const completedJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.completed || 0)) +const failedJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.failed || 0)) +const remainingJobsCount = computed(() => pendingJobsCount.value + inProgressJobsCount.value) +const materializedJobsTotal = computed( + () => pendingJobsCount.value + inProgressJobsCount.value + failedJobsCount.value + completedJobsCount.value, +) +const extractionRunLive = computed(() => { + if (optimisticLiveUntilMs.value && nowMs.value < optimisticLiveUntilMs.value) return true + return Boolean(extractionRunState.value?.live) +}) +const hasRunningJobs = computed(() => inProgressJobsCount.value > 0) +const extractionProgressPercent = computed(() => { + const total = materializedJobsTotal.value + if (total <= 0) return 0 + return Math.round(((completedJobsCount.value + failedJobsCount.value) / total) * 100) +}) +const plannedKnownTotalJobs = computed(() => { + const sets = planSummary.value?.job_sets || [] + return sets.reduce((sum, set) => sum + (Number(set.projected_jobs) || 0), 0) +}) +const plannedVsMaterializedMismatch = computed(() => { + const planned = plannedKnownTotalJobs.value + if (planned <= 0) return false + return planned !== materializedJobsTotal.value +}) + +async function startExtraction() { + startingExtraction.value = true + optimisticLiveUntilMs.value = Date.now() + 30000 + try { + const res = await apiFetch<{ message?: string }>(`${basePath.value}/start`, { + method: 'POST', + body: { workers: workerCount.value }, + }) + toast.success('Extraction started', { description: res.message }) + startAutoRefresh() + await refreshAll() + } catch (e: unknown) { + optimisticLiveUntilMs.value = null + toast.error('Failed to start extraction', { + description: e instanceof Error ? e.message : 'Request failed', + }) + } finally { + startingExtraction.value = false + } +} + +async function pauseExtraction() { + pausingExtraction.value = true + try { + const res = await apiFetch<{ message?: string }>(`${basePath.value}/pause`, { method: 'POST' }) + toast.success('Pause requested', { description: res.message }) + await refreshAll() + } catch (e: unknown) { + toast.error('Failed to pause extraction', { + description: e instanceof Error ? e.message : 'Request failed', + }) + } finally { + pausingExtraction.value = false + } +} + +async function killExtraction() { + killingExtraction.value = true + try { + const res = await apiFetch<{ message?: string }>(`${basePath.value}/halt`, { method: 'POST' }) + toast.success('Extraction killed', { description: res.message }) + optimisticLiveUntilMs.value = null + stopAutoRefresh() + await refreshAll() + } catch (e: unknown) { + toast.error('Failed to kill extraction', { + description: e instanceof Error ? e.message : 'Request failed', + }) + } finally { + killingExtraction.value = false + } +} + +async function regenerateJobs() { + regeneratingJobs.value = true + try { + const res = await apiFetch<{ generated_jobs?: number; message?: string }>( + `${basePath.value}/regenerate`, + { method: 'POST' }, + ) + toast.success('Jobs regenerated', { description: res.message }) + await refreshAll() + } catch (e: unknown) { + toast.error('Regenerate failed', { + description: e instanceof Error ? e.message : 'Request failed', + }) + } finally { + regeneratingJobs.value = false + } +} + +async function resetByKind(kind: 'stale' | 'completed' | 'failed' | 'all') { + const map = { + stale: { ref: resettingRunning, path: 'reset-stale' }, + completed: { ref: resettingCompleted, path: 'reset-completed' }, + failed: { ref: resettingFailed, path: 'reset-failed' }, + all: { ref: resettingAll, path: 'reset' }, + } as const + map[kind].ref.value = true + try { + await apiFetch(`${basePath.value}/${map[kind].path}`, { method: 'POST' }) + toast.success('Jobs reset') + await refreshAll() + } catch (e: unknown) { + toast.error('Reset failed', { description: e instanceof Error ? e.message : 'Request failed' }) + } finally { + map[kind].ref.value = false + } +} + +function startAutoRefresh() { + if (autoRefreshInterval) return + autoRefreshInterval = setInterval(() => { void refreshAll() }, 1500) +} + +function stopAutoRefresh() { + if (!autoRefreshInterval) return + clearInterval(autoRefreshInterval) + autoRefreshInterval = null +} + +function onJobSetsSaved() { + jobSetsReloadNonce.value += 1 + void refreshAll() +} + +watch( + () => extractionRunLive.value || hasRunningJobs.value, + (active) => { + if (active) startAutoRefresh() + else if (!optimisticLiveUntilMs.value) stopAutoRefresh() + }, +) + +watch( + () => props.reloadNonce, + () => { void refreshAll() }, +) + +onMounted(() => { + void refreshAll() + clockInterval = setInterval(() => { nowMs.value = Date.now() }, 1000) +}) + +onUnmounted(() => { + stopAutoRefresh() + if (clockInterval) clearInterval(clockInterval) +}) +</script> + +<template> + <div class="space-y-6"> + <div class="grid gap-6 lg:grid-cols-2 lg:items-start"> + <Card> + <CardContent class="p-4"> + <GraphExtractionJobSetsPanel + :kg-id="kgId" + :reload-nonce="jobSetsReloadNonce + (reloadNonce ?? 0)" + embedded + @saved="onJobSetsSaved" + /> + </CardContent> + </Card> + + <Card> + <CardHeader class="pb-2"> + <CardTitle class="text-base">Ontology Schema</CardTitle> + <CardDescription>Live entity and relationship types with expandable instances.</CardDescription> + <div class="flex gap-2 pt-2"> + <Button + size="sm" + :variant="selectedOntologyTab === 'entities' ? 'default' : 'outline'" + @click="selectedOntologyTab = 'entities'" + > + Entities + </Button> + <Button + size="sm" + :variant="selectedOntologyTab === 'relationships' ? 'default' : 'outline'" + @click="selectedOntologyTab = 'relationships'" + > + Relationships + </Button> + </div> + </CardHeader> + <CardContent class="max-h-[min(70dvh,720px)] overflow-y-auto"> + <GraphDesignEntitiesPanel + v-if="selectedOntologyTab === 'entities'" + :kg-id="kgId" + :reload-nonce="reloadNonce ?? 0" + embedded + /> + <GraphDesignRelationshipsPanel + v-else + :kg-id="kgId" + :reload-nonce="reloadNonce ?? 0" + embedded + /> + </CardContent> + </Card> + </div> + + <Card> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base"> + <Play class="size-4" /> + Run extraction + </CardTitle> + <CardDescription> + Launch parallel extraction workers. Each worker processes one pending job at a time using the job set description. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div class="flex flex-wrap items-end gap-4"> + <div class="space-y-1.5"> + <label class="text-xs font-medium text-muted-foreground">Worker concurrency</label> + <input + v-model.number="workers" + type="number" + min="1" + max="32" + class="h-10 w-24 rounded-lg border bg-background px-3 text-sm" + /> + </div> + <div class="flex flex-wrap gap-2"> + <Button size="sm" :disabled="startingExtraction" @click="startExtraction"> + <Loader2 v-if="startingExtraction" class="mr-1.5 size-3.5 animate-spin" /> + Start + </Button> + <Button size="sm" variant="outline" :disabled="pausingExtraction" @click="pauseExtraction"> + Pause + </Button> + <Button size="sm" variant="destructive" :disabled="killingExtraction" @click="killExtraction"> + Kill + </Button> + <Button size="sm" variant="ghost" @click="refreshAll"> + <RefreshCw class="mr-1.5 size-3.5" /> + Refresh + </Button> + </div> + </div> + + <div class="grid gap-3 sm:grid-cols-2 lg:grid-cols-4 text-sm"> + <div class="rounded-lg border bg-muted/30 p-3"> + <p class="text-xs text-muted-foreground">Remaining jobs</p> + <p class="text-lg font-semibold">{{ remainingJobsCount }}</p> + </div> + <div class="rounded-lg border bg-muted/30 p-3"> + <p class="text-xs text-muted-foreground">Materialized jobs</p> + <p class="text-lg font-semibold">{{ materializedJobsTotal }}</p> + </div> + <div class="rounded-lg border bg-muted/30 p-3"> + <p class="text-xs text-muted-foreground">Planned (from job sets)</p> + <p class="text-lg font-semibold">{{ plannedKnownTotalJobs || '—' }}</p> + </div> + <div class="rounded-lg border bg-muted/30 p-3"> + <p class="text-xs text-muted-foreground">Progress</p> + <p class="text-lg font-semibold">{{ extractionProgressPercent }}%</p> + </div> + </div> + + <div v-if="extractionRunLive" class="rounded-lg border border-primary/30 bg-primary/5 p-3 text-xs"> + Extraction run is live — status refreshes every 1.5s. + </div> + + <div v-if="plannedVsMaterializedMismatch" class="flex items-start gap-2 rounded-lg border border-amber-500/40 bg-amber-500/5 p-3 text-xs"> + <AlertCircle class="mt-0.5 size-4 shrink-0 text-amber-600" /> + <div> + Planned job count ({{ plannedKnownTotalJobs }}) differs from materialized total ({{ materializedJobsTotal }}). + <Button size="sm" variant="link" class="h-auto p-0 text-xs" :disabled="regeneratingJobs" @click="regenerateJobs"> + Regenerate jobs + </Button> + </div> + </div> + + <div v-if="(dbStatus?.activeWorkers?.length || 0) > 0" class="space-y-2"> + <p class="text-xs font-medium text-muted-foreground">Active workers</p> + <div class="flex flex-wrap gap-2"> + <Badge v-for="worker in dbStatus?.activeWorkers" :key="worker.workerId" variant="outline" class="font-mono text-[10px]"> + {{ worker.workerId }} → {{ worker.jobId }} + </Badge> + </div> + </div> + </CardContent> + </Card> + + <Card> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base"> + <ClipboardList class="size-4" /> + Job Status + </CardTitle> + <CardDescription>Aggregate job metrics and maintenance actions.</CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div v-if="dbLoading" class="flex items-center gap-2 text-sm text-muted-foreground"> + <Loader2 class="size-4 animate-spin" /> + Loading job status... + </div> + <div v-else-if="dbError" class="text-sm text-destructive">{{ dbError }}</div> + <template v-else-if="dbStatus"> + <div class="grid gap-3 sm:grid-cols-2 lg:grid-cols-5"> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground">Ready</p> + <p class="text-xl font-semibold">{{ pendingJobsCount }}</p> + </div> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground">Running</p> + <p class="text-xl font-semibold">{{ inProgressJobsCount }}</p> + </div> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground">Completed</p> + <p class="text-xl font-semibold">{{ completedJobsCount }}</p> + </div> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground">Failed</p> + <p class="text-xl font-semibold">{{ failedJobsCount }}</p> + </div> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground">Stale candidates</p> + <p class="text-xl font-semibold"> + {{ extractionRunLive ? 0 : inProgressJobsCount }} + </p> + </div> + </div> + + <Separator /> + + <div class="flex flex-wrap gap-2"> + <Button size="sm" variant="outline" :disabled="resettingRunning" @click="resetByKind('stale')"> + Reset Running + </Button> + <Button size="sm" variant="outline" :disabled="resettingCompleted" @click="resetByKind('completed')"> + Reset Completed + </Button> + <Button size="sm" variant="outline" :disabled="resettingFailed" @click="resetByKind('failed')"> + Reset Failed + </Button> + <Button size="sm" variant="outline" :disabled="resettingAll" @click="resetByKind('all')"> + Reset All Jobs + </Button> + <Button size="sm" variant="outline" :disabled="regeneratingJobs" @click="regenerateJobs"> + <Settings class="mr-1.5 size-3.5" /> + Regenerate jobs + </Button> + </div> + + <div v-if="dbStatus.jobsBySet && Object.keys(dbStatus.jobsBySet).length" class="space-y-2"> + <p class="text-xs font-medium text-muted-foreground">Per job set</p> + <div class="grid gap-2 md:grid-cols-2"> + <div + v-for="(stats, setName) in dbStatus.jobsBySet" + :key="setName" + class="rounded-lg border bg-muted/20 p-3 text-xs" + > + <p class="font-medium">{{ setName }}</p> + <p class="text-muted-foreground"> + ready {{ stats.pending }} · running {{ stats.in_progress }} · done {{ stats.completed }} · failed {{ stats.failed }} + </p> + </div> + </div> + </div> + + <div class="rounded-lg border bg-muted/20 p-3 text-xs text-muted-foreground"> + Run totals — + input {{ dbStatus.totalInputTokens.toLocaleString() }} · + output {{ dbStatus.totalOutputTokens.toLocaleString() }} · + cost ${{ dbStatus.totalCostUsd.toFixed(4) }} + </div> + </template> + </CardContent> + </Card> + </div> +</template> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 3929244f9..07c01b480 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -46,6 +46,7 @@ import { import SharedConversationPanel from '@/components/extraction/SharedConversationPanel.vue' import GraphDesignEntitiesPanel from '@/components/graph-management/GraphDesignEntitiesPanel.vue' import GraphDesignRelationshipsPanel from '@/components/graph-management/GraphDesignRelationshipsPanel.vue' +import GraphExtractionJobsWorkspace from '@/components/graph-management/GraphExtractionJobsWorkspace.vue' import { GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS, GRAPH_MANAGEMENT_MODE_LABELS, @@ -398,7 +399,7 @@ const graphManagementModeGate = computed((): GraphManagementModeGateInput => ({ const graphManagementChatDescription = computed(() => { if (graphManagementMode.value === 'extraction-jobs') { - return 'Coordinate extraction job setup, sync runs, and maintenance for this knowledge graph. Use the assistant below to drive operational changes.' + return 'Define extraction job sets with per-instance descriptions, review ontology schema, and run parallel extraction workers for this knowledge graph.' } if (graphManagementMode.value === 'one-off-mutations') { return 'Author and apply one-off graph mutations scoped to this knowledge graph. Use the assistant below for mutation guidance and workspace context.' @@ -1952,7 +1953,16 @@ watch( @send-message="sendChatMessage" /> - <div class="graph-management-artifacts grid gap-6 lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)] lg:items-start"> + <GraphExtractionJobsWorkspace + v-if="graphManagementMode === 'extraction-jobs'" + :kg-id="kgId" + :reload-nonce="designArtifactsReloadNonce" + /> + + <div + v-else + class="graph-management-artifacts grid gap-6 lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)] lg:items-start" + > <Card id="graph-management-schema-artifacts" class="graph-management-schema-panel lg:sticky lg:top-4 lg:self-start" diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 5c01ca2b2..4791f2d80 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -469,7 +469,8 @@ describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { expect(manageWorkspaceVue).toContain('graph-management-detail') expect(manageWorkspaceVue).toContain('selectedRailItemId') expect(manageWorkspaceVue).toContain("selectedRailItemId === 'schema-readiness'") - expect(manageWorkspaceVue).toContain("selectedRailItemId === 'extraction-jobs-setup'") + expect(manageWorkspaceVue).toContain('GraphExtractionJobsWorkspace') + expect(manageWorkspaceVue).toContain("graphManagementMode === 'extraction-jobs'") expect(manageWorkspaceVue).toContain("selectedRailItemId === 'mutation-authoring'") }) @@ -645,11 +646,8 @@ describe('KG-MANAGE-020 - forbidden and disabled action restrictions', () => { }) describe('KG-MANAGE-021 - unified in-place graph operations', () => { - it('runs extraction jobs and logs directly in graph-management without data-sources redirect', () => { - expect(manageWorkspaceVue).toContain('triggerInlineSync') - expect(manageWorkspaceVue).toContain('loadInlineSyncRuns') - expect(manageWorkspaceVue).toContain('loadInlineRunLogs') - expect(manageWorkspaceVue).toContain('Run logs') + it('runs extraction jobs workspace in graph-management without data-sources redirect', () => { + expect(manageWorkspaceVue).toContain('GraphExtractionJobsWorkspace') expect(manageWorkspaceVue).not.toContain('Open Data Source Operations') expect(manageWorkspaceVue).not.toContain('Open Maintain Step') }) diff --git a/src/dev-ui/app/utils/kgGraphManagement.ts b/src/dev-ui/app/utils/kgGraphManagement.ts index dd2f87f44..edd110e29 100644 --- a/src/dev-ui/app/utils/kgGraphManagement.ts +++ b/src/dev-ui/app/utils/kgGraphManagement.ts @@ -30,7 +30,7 @@ export const GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS: Record<GraphManagementMode, st 'initial-schema-design': 'Describe schema goals, entity types, or relationship constraints for this knowledge graph…', 'extraction-jobs': - 'Ask about extraction job setup, sync runs, or maintenance execution for this graph…', + 'Ask about extraction job sets, per-instance descriptions, or running extraction workers…', 'one-off-mutations': 'Author or preview one-off graph mutations scoped to this knowledge graph…', } From 9fe15694a2a021fc27ceca48df9a0945cab61f1f Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Tue, 9 Jun 2026 11:58:16 -0400 Subject: [PATCH 114/153] agent session service --- src/api/extraction/application/agent_session_service.py | 1 - .../unit/extraction/application/test_agent_session_service.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/api/extraction/application/agent_session_service.py b/src/api/extraction/application/agent_session_service.py index d22f47a5d..d50652754 100644 --- a/src/api/extraction/application/agent_session_service.py +++ b/src/api/extraction/application/agent_session_service.py @@ -205,4 +205,3 @@ async def set_bootstrap_intake_path_for_active_session( session.updated_at = datetime.now(UTC) await self._repository.save(session) return session - diff --git a/src/api/tests/unit/extraction/application/test_agent_session_service.py b/src/api/tests/unit/extraction/application/test_agent_session_service.py index 444ab8c18..06c1d32f6 100644 --- a/src/api/tests/unit/extraction/application/test_agent_session_service.py +++ b/src/api/tests/unit/extraction/application/test_agent_session_service.py @@ -250,4 +250,3 @@ async def test_select_bootstrap_intake_path_persists_choice_for_continuity(self) assert intake["status"] == "path_selected" assert intake["capabilities_goals"] == "I can provide domain terms but need guidance." assert updated.id == session.id - From 9dc8eae212f132d709feed88f9b797d68c6bc229 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 11 Jun 2026 19:00:49 -0400 Subject: [PATCH 115/153] feat(extraction): run extraction jobs via agentic-ci sandboxes Replace the stub executor and broken busybox worker loop with opendatahub-io/agentic-ci containers, add by_files materialization with target_files persistence, and wire dev compose for migrations, gcloud ADC, and the ai-helpers image. Co-authored-by: Cursor <cursoragent@cursor.com> --- Makefile | 3 +- compose.dev.yaml | 9 +- compose.yaml | 2 +- src/api/extraction/domain/extraction_job.py | 28 ++- .../agentic_ci_extraction_job_runner.py | 236 ++++++++++++++++++ .../infrastructure/extraction_job_executor.py | 44 ++-- .../infrastructure/extraction_job_metrics.py | 72 ++++++ .../infrastructure/extraction_job_prompt.py | 51 ++++ .../extraction_job_runner_factory.py | 43 ++++ .../extraction_job_workdir_materializer.py | 124 +++++++++ .../extraction_run_orchestrator.py | 92 ++----- .../infrastructure/models/extraction_job.py | 1 + .../repositories/extraction_job_repository.py | 7 +- .../stub_extraction_job_runner.py | 28 +++ .../workload_runtime_settings.py | 12 + .../extraction/ports/extraction_job_runner.py | 15 ++ .../management/extraction_job_materializer.py | 138 +++++++++- .../management/extraction_jobs_service.py | 37 ++- ...4l5m6n7_add_extraction_job_target_files.py | 28 +++ src/api/pyproject.toml | 1 + .../test_extraction_job_metrics.py | 68 +++++ .../test_extraction_job_prompt.py | 39 +++ .../test_stub_extraction_job_runner.py | 34 +++ ...st_extraction_job_materializer_by_files.py | 117 +++++++++ src/api/uv.lock | 15 ++ 25 files changed, 1128 insertions(+), 116 deletions(-) create mode 100644 src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py create mode 100644 src/api/extraction/infrastructure/extraction_job_metrics.py create mode 100644 src/api/extraction/infrastructure/extraction_job_prompt.py create mode 100644 src/api/extraction/infrastructure/extraction_job_runner_factory.py create mode 100644 src/api/extraction/infrastructure/extraction_job_workdir_materializer.py create mode 100644 src/api/extraction/infrastructure/stub_extraction_job_runner.py create mode 100644 src/api/extraction/ports/extraction_job_runner.py create mode 100644 src/api/infrastructure/migrations/versions/i2j3k4l5m6n7_add_extraction_job_target_files.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_metrics.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_stub_extraction_job_runner.py create mode 100644 src/api/tests/unit/infrastructure/management/test_extraction_job_materializer_by_files.py diff --git a/Makefile b/Makefile index 62679cad5..02fe7f065 100755 --- a/Makefile +++ b/Makefile @@ -36,9 +36,10 @@ dev: certs .PHONY: down down: docker compose -f compose.yaml -f compose.dev.yaml down - @echo "Stopping Graph Management sticky and worker containers..." + @echo "Stopping Graph Management sticky, worker, and extraction job containers..." -@docker ps -aq --filter name=kartograph-sticky- | xargs -r docker rm -f -@docker ps -aq --filter name=kartograph-worker- | xargs -r docker rm -f + -@docker ps -aq --filter name=kartograph-extract- | xargs -r docker rm -f .PHONY: run diff --git a/compose.dev.yaml b/compose.dev.yaml index 372227556..d083decbf 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -26,6 +26,10 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID: ${HOST_GID} KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_TURN_TIMEOUT_SECONDS: "3600" KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_MAX_TURNS: "500" + KARTOGRAPH_EXTRACTION_RUNTIME_JOB_RUNNER: agentic_ci + KARTOGRAPH_EXTRACTION_RUNTIME_AGENTIC_CI_IMAGE: ghcr.io/opendatahub-io/ai-helpers:latest + KARTOGRAPH_EXTRACTION_RUNTIME_AGENTIC_CI_HARNESS: claude-code + KARTOGRAPH_EXTRACTION_RUNTIME_EXTRACTION_JOB_WORK_DIR: /tmp/kartograph/extraction_jobs # Vertex AI for Claude Agent SDK in sticky assistant containers CLAUDE_CODE_USE_VERTEX: "1" ANTHROPIC_VERTEX_PROJECT_ID: itpc-gcp-hcm-pe-eng-claude @@ -37,6 +41,9 @@ services: - /app/.venv # Shared with sibling sticky containers launched via the host Docker socket - /tmp/kartograph/job_packages:/tmp/kartograph/job_packages + - /tmp/kartograph/extraction_jobs:/tmp/kartograph/extraction_jobs + # gcloud ADC for Vertex-backed agentic-ci extraction job containers + - ${HOME}/.config/gcloud:${HOME}/.config/gcloud:ro,z # Allow API process to launch sibling extraction runtime containers locally - /var/run/docker.sock:/var/run/docker.sock # Docker/Podman CLI from host (required for container runtime backend) @@ -45,7 +52,7 @@ services: - /bin/bash - -c - | - uv run fastapi dev main.py --host 0.0.0.0 --port 8000 + uv sync --frozen && uv run alembic upgrade head && uv run fastapi dev main.py --host 0.0.0.0 --port 8000 dev-ui: build: diff --git a/compose.yaml b/compose.yaml index bf632a8a7..98a435584 100644 --- a/compose.yaml +++ b/compose.yaml @@ -148,7 +148,7 @@ services: - ./skills:/app/skills:ro # Mount host CA bundle (supports multiple OS types via env var) # Default fallback order: RHEL/Fedora -> Debian/Ubuntu -> macOS - - ${HOST_CA_BUNDLE:-/etc/pki/tls/certs/ca-bundle.crt}:/etc/ssl/certs/ca-bundle.crt:ro + - ${HOST_CA_BUNDLE:-/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem}:/etc/ssl/certs/ca-bundle.crt:ro tty: true extra_hosts: - "localhost:host-gateway" diff --git a/src/api/extraction/domain/extraction_job.py b/src/api/extraction/domain/extraction_job.py index 24f7e5ae2..5babf15d1 100644 --- a/src/api/extraction/domain/extraction_job.py +++ b/src/api/extraction/domain/extraction_job.py @@ -27,6 +27,30 @@ class ExtractionRunStatus(StrEnum): HALTED = "halted" +@dataclass(frozen=True) +class ExtractionTargetFile: + """One repository file assigned to an extraction job.""" + + path: str + repository_folder: str + package_id: str + + def to_dict(self) -> dict[str, Any]: + return { + "path": self.path, + "repository_folder": self.repository_folder, + "package_id": self.package_id, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> ExtractionTargetFile: + return cls( + path=str(data.get("path") or ""), + repository_folder=str(data.get("repository_folder") or ""), + package_id=str(data.get("package_id") or ""), + ) + + @dataclass(frozen=True) class ExtractionTargetInstance: """One entity instance assigned to an extraction job.""" @@ -64,6 +88,7 @@ class ExtractionJobRecord: order_index: int description: str target_instances: tuple[ExtractionTargetInstance, ...] = field(default_factory=tuple) + target_files: tuple[ExtractionTargetFile, ...] = field(default_factory=tuple) worker_id: str | None = None started_at: datetime | None = None completed_at: datetime | None = None @@ -90,6 +115,7 @@ def to_dict(self) -> dict[str, Any]: "order_index": self.order_index, "description": self.description, "target_instances": [instance.to_dict() for instance in self.target_instances], + "target_files": [target_file.to_dict() for target_file in self.target_files], "worker_id": self.worker_id, "started_at": self.started_at.isoformat() if self.started_at else None, "completed_at": self.completed_at.isoformat() if self.completed_at else None, @@ -104,7 +130,7 @@ def to_dict(self) -> dict[str, Any]: "entities_modified": self.entities_modified, "relationships_created": self.relationships_created, "instance_count": len(self.target_instances), - "file_count": 0, + "file_count": len(self.target_files), } diff --git a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py new file mode 100644 index 000000000..01aac9106 --- /dev/null +++ b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py @@ -0,0 +1,236 @@ +"""Run extraction jobs inside agentic-ci sandbox containers.""" + +from __future__ import annotations + +import os +import re +import subprocess +import tempfile +from pathlib import Path +from typing import Any + +from agentic_ci.harness import create_harness +from agentic_ci import otel + +from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.infrastructure.extraction_job_metrics import metrics_from_otel_log +from extraction.infrastructure.extraction_job_prompt import build_extraction_job_prompt +from extraction.infrastructure.extraction_job_workdir_materializer import ( + ExtractionJobWorkdirMaterializer, +) +from extraction.infrastructure.vertex_runtime_env import build_vertex_container_env +from extraction.infrastructure.workload_runtime_factory import get_workload_credential_issuer +from extraction.infrastructure.workload_runtime_settings import ( + ExtractionWorkloadRuntimeSettings, + get_extraction_workload_runtime_settings, +) +from extraction.ports.extraction_job_runner import IExtractionJobRunner +from shared_kernel.container_runtime.factory import create_container_runtime +from shared_kernel.container_runtime.ports import ContainerRuntimeError + +_CONTAINER_NAME_SAFE = re.compile(r"[^a-zA-Z0-9_.-]+") +_GCLOUD_ADC_FILENAME = "application_default_credentials.json" + + +def _sanitize_container_name(job_id: str) -> str: + cleaned = _CONTAINER_NAME_SAFE.sub("-", job_id).strip("-") + return f"kartograph-extract-{cleaned}"[:63].rstrip("-_.") + + +class AgenticCiExtractionJobRunner(IExtractionJobRunner): + """Execute one extraction job using opendatahub-io/agentic-ci harness and containers.""" + + def __init__( + self, + *, + settings: ExtractionWorkloadRuntimeSettings | None = None, + workdir_materializer: ExtractionJobWorkdirMaterializer | None = None, + ) -> None: + self._settings = settings or get_extraction_workload_runtime_settings() + self._workdir_materializer = workdir_materializer + self._harness = create_harness(self._settings.agentic_ci_harness) + + async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: + if self._workdir_materializer is None: + raise RuntimeError("AgenticCiExtractionJobRunner requires a workdir materializer") + credentials = get_workload_credential_issuer().issue( + tenant_id=tenant_id, + knowledge_graph_id=job.knowledge_graph_id, + extra_scopes=("workload:chat",), + ) + workdir = await self._workdir_materializer.prepare( + job=job, + tenant_id=tenant_id, + credentials=credentials, + ) + prompt = build_extraction_job_prompt(job=job) + return await self._run_in_container(job=job, workdir=workdir, prompt=prompt) + + async def _run_in_container( + self, + *, + job: ExtractionJobRecord, + workdir: Path, + prompt: str, + ) -> dict[str, Any]: + import asyncio + + return await asyncio.to_thread(self._run_in_container_sync, job, workdir, prompt) + + def _run_in_container_sync( + self, + job: ExtractionJobRecord, + workdir: Path, + prompt: str, + ) -> dict[str, Any]: + runtime = create_container_runtime(self._settings.container_engine) + binary = getattr(runtime, "_binary", "podman") + model = self._resolve_model() + run_dir = tempfile.mkdtemp(prefix="kartograph-agentic-ci-") + otel_proc = None + otel_log: Path | None = None + container_name = _sanitize_container_name(job.job_id) + + try: + otel_proc, otel_port, otel_log_path, _otel_rate = otel.start_collector(run_dir) + otel_log = Path(otel_log_path) + env = self._build_container_env(otel_port=otel_port) + binds = self._build_binds(workdir=workdir) + command = self._harness.build_args(prompt, model) + rc = self._run_foreground( + binary=binary, + image=self._settings.agentic_ci_image, + name=container_name, + env=env, + binds=binds, + command=command, + timeout_seconds=self._settings.agentic_ci_timeout_seconds, + ) + if otel_proc is not None: + otel.stop_collector(otel_proc) + otel_proc = None + metrics = metrics_from_otel_log(otel_log) if otel_log is not None else {} + if rc != 0: + raise RuntimeError( + f"agentic-ci container exited with code {rc} for job {job.job_id}" + ) + return metrics + finally: + if otel_proc is not None: + otel.stop_collector(otel_proc) + subprocess.run( + [binary, "rm", "-f", container_name], + capture_output=True, + check=False, + ) + + def _resolve_model(self) -> str: + configured = self._settings.agentic_ci_model.strip() + if configured: + return configured + model_env = self._harness.model_env_var() + from_env = os.environ.get(model_env, "").strip() + if from_env: + return from_env + return self._harness.default_model() + + def _build_container_env(self, *, otel_port: int) -> dict[str, str]: + env: dict[str, str] = { + "DISABLE_AUTOUPDATER": "1", + "AGENT_MODEL": self._resolve_model(), + } + if self._harness.auth_mode == "api-key": + api_key = os.environ.get("ANTHROPIC_API_KEY", "").strip() + if api_key: + env["ANTHROPIC_API_KEY"] = api_key + else: + env.update( + build_vertex_container_env( + project_id=self._settings.vertex_project_id, + region=self._settings.vertex_region, + ) + ) + if self._harness.supports_otel and otel_port: + env.update( + { + "CLAUDE_CODE_ENABLE_TELEMETRY": "1", + "OTEL_METRICS_EXPORTER": "otlp", + "OTEL_LOGS_EXPORTER": "otlp", + "OTEL_EXPORTER_OTLP_PROTOCOL": "http/json", + "OTEL_EXPORTER_OTLP_ENDPOINT": f"http://127.0.0.1:{otel_port}", + "OTEL_METRIC_EXPORT_INTERVAL": "10000", + } + ) + return env + + def _build_binds(self, *, workdir: Path) -> list[str]: + binds = [f"{workdir}:/workspace:z"] + if self._settings.gcloud_config_mount and self._settings.vertex_enabled(): + mount_target = self._harness.credential_mount_target() + gcloud_root = self._settings.gcloud_config_mount.rstrip("/") + adc = f"{gcloud_root}/{_GCLOUD_ADC_FILENAME}" + config = f"{gcloud_root}/configurations/config_default" + if Path(adc).is_file(): + binds.append( + f"{adc}:{mount_target}/.config/gcloud/application_default_credentials.json:ro,z" + ) + if Path(config).is_file(): + binds.append( + f"{config}:{mount_target}/.config/gcloud/configurations/config_default:ro,z" + ) + return binds + + def _run_foreground( + self, + *, + binary: str, + image: str, + name: str, + env: dict[str, str], + binds: list[str], + command: list[str], + timeout_seconds: int, + ) -> int: + cmd = [ + binary, + "run", + "--rm", + "--name", + name, + "--network", + "host", + "--workdir", + "/workspace", + ] + if self._settings.container_run_uid is not None and self._settings.container_run_gid is not None: + cmd.extend( + [ + "--user", + f"{self._settings.container_run_uid}:{self._settings.container_run_gid}", + ] + ) + for key, value in sorted(env.items()): + cmd.extend(["--env", f"{key}={value}"]) + for bind in binds: + cmd.extend(["--volume", bind]) + cmd.append(image) + cmd.extend(command) + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=False, + timeout=timeout_seconds, + ) + except subprocess.TimeoutExpired as exc: + subprocess.run([binary, "rm", "-f", name], capture_output=True, check=False) + raise RuntimeError( + f"agentic-ci container timed out after {timeout_seconds}s" + ) from exc + if result.returncode != 0: + detail = result.stderr.strip() or result.stdout.strip() + raise ContainerRuntimeError( + f"{binary} run failed for {name}: {detail or 'unknown error'}" + ) + return int(result.returncode) diff --git a/src/api/extraction/infrastructure/extraction_job_executor.py b/src/api/extraction/infrastructure/extraction_job_executor.py index 95eb8bd48..58bc46842 100644 --- a/src/api/extraction/infrastructure/extraction_job_executor.py +++ b/src/api/extraction/infrastructure/extraction_job_executor.py @@ -2,32 +2,34 @@ from __future__ import annotations -import asyncio from typing import Any from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.infrastructure.extraction_job_runner_factory import create_extraction_job_runner +from extraction.infrastructure.stub_extraction_job_runner import StubExtractionJobRunner +from extraction.infrastructure.workload_runtime_settings import get_extraction_workload_runtime_settings +from extraction.ports.extraction_job_runner import IExtractionJobRunner +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker class ExtractionJobExecutor: - """Runs one extraction job using per-instance description guidance.""" + """Runs one extraction job using the configured runner backend.""" - async def execute(self, job: ExtractionJobRecord) -> dict[str, Any]: - """Process target instances for one job. + def __init__( + self, + *, + session_factory: async_sessionmaker[AsyncSession] | None = None, + runner: IExtractionJobRunner | None = None, + ) -> None: + self._session_factory = session_factory + self._runner = runner - The sticky extraction agent container path will replace this stub with - a full Claude Agent SDK turn scoped to ``job.description`` and the - assigned instance slugs. For now we simulate successful completion so - orchestration, status APIs, and UI can be exercised end-to-end. - """ - await asyncio.sleep(0.05) - instance_count = len(job.target_instances) - return { - "input_tokens": 100 * instance_count, - "output_tokens": 50 * instance_count, - "cache_read_tokens": 0, - "cache_creation_tokens": 0, - "cost_usd": 0.001 * instance_count, - "entities_created": 0, - "entities_modified": instance_count, - "relationships_created": 0, - } + async def execute(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: + if self._runner is not None: + return await self._runner.run(job, tenant_id=tenant_id) + settings = get_extraction_workload_runtime_settings() + if settings.job_runner == "stub" or self._session_factory is None: + return await StubExtractionJobRunner().run(job, tenant_id=tenant_id) + async with self._session_factory() as session: + runner = create_extraction_job_runner(session=session, settings=settings) + return await runner.run(job, tenant_id=tenant_id) diff --git a/src/api/extraction/infrastructure/extraction_job_metrics.py b/src/api/extraction/infrastructure/extraction_job_metrics.py new file mode 100644 index 000000000..db31b7a39 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_metrics.py @@ -0,0 +1,72 @@ +"""Parse agentic-ci OTEL logs into extraction job metrics.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +def metrics_from_otel_log(otel_log: Path) -> dict[str, Any]: + """Extract token and cost metrics from an agentic-ci OTEL JSONL log.""" + records: list[dict[str, Any]] = [] + if not otel_log.is_file(): + return _empty_metrics() + try: + with otel_log.open(encoding="utf-8") as handle: + for line in handle: + line = line.strip() + if line: + records.append(json.loads(line)) + except (OSError, json.JSONDecodeError): + return _empty_metrics() + if not records: + return _empty_metrics() + + from agentic_ci.otel import parse_metrics + + token_totals, cost_totals, _api_requests, _active_time = parse_metrics(records) + input_tokens = int( + sum(count for (_model, token_type), count in token_totals.items() if token_type == "input") + ) + output_tokens = int( + sum(count for (_model, token_type), count in token_totals.items() if token_type == "output") + ) + cache_read_tokens = int( + sum( + count + for (_model, token_type), count in token_totals.items() + if token_type == "cacheRead" + ) + ) + cache_creation_tokens = int( + sum( + count + for (_model, token_type), count in token_totals.items() + if token_type == "cacheCreation" + ) + ) + cost_usd = float(sum(cost_totals.values())) + return { + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "cache_read_tokens": cache_read_tokens, + "cache_creation_tokens": cache_creation_tokens, + "cost_usd": cost_usd, + "entities_created": 0, + "entities_modified": 0, + "relationships_created": 0, + } + + +def _empty_metrics() -> dict[str, Any]: + return { + "input_tokens": 0, + "output_tokens": 0, + "cache_read_tokens": 0, + "cache_creation_tokens": 0, + "cost_usd": 0.0, + "entities_created": 0, + "entities_modified": 0, + "relationships_created": 0, + } diff --git a/src/api/extraction/infrastructure/extraction_job_prompt.py b/src/api/extraction/infrastructure/extraction_job_prompt.py new file mode 100644 index 000000000..bca8ae2ec --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_prompt.py @@ -0,0 +1,51 @@ +"""Build prompts for extraction job agent runs.""" + +from __future__ import annotations + +from extraction.domain.extraction_job import ExtractionJobRecord + + +def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: + """Return the agent prompt for one materialized extraction job.""" + lines = [ + "You are an extraction agent for Kartograph, a knowledge graph platform.", + "Read job-context.json in the workspace for API credentials and scope.", + "", + "## Job instructions", + job.description.strip() or "Extract graph entities and relationships for the assigned targets.", + "", + ] + if job.target_instances: + lines.extend( + [ + "## Target entity instances", + "Process only the instances listed below. Use the workload API to read existing graph", + "context and emit JSONL mutations for new or updated entities and relationships.", + "", + ] + ) + for instance in job.target_instances: + lines.append(f"- {instance.entity_type}: {instance.slug}") + lines.append("") + if job.target_files: + lines.extend( + [ + "## Target repository files", + "Inspect only the files materialized under repository-files/. Use their content to", + "extract entities and relationships, then emit JSONL mutations via the workload API.", + "", + ] + ) + for target_file in job.target_files: + lines.append( + f"- {target_file.repository_folder}/{target_file.path} (package {target_file.package_id})" + ) + lines.append("") + lines.extend( + [ + "## Completion", + "When finished, ensure all required mutations are applied through the workload API.", + "Do not modify files outside repository-files/.", + ] + ) + return "\n".join(lines) diff --git a/src/api/extraction/infrastructure/extraction_job_runner_factory.py b/src/api/extraction/infrastructure/extraction_job_runner_factory.py new file mode 100644 index 000000000..bb5531689 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_runner_factory.py @@ -0,0 +1,43 @@ +"""Factory helpers for extraction job runners.""" + +from __future__ import annotations + +from pathlib import Path + +from extraction.infrastructure.agentic_ci_extraction_job_runner import AgenticCiExtractionJobRunner +from extraction.infrastructure.extraction_job_workdir_materializer import ( + ExtractionJobWorkdirMaterializer, +) +from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader +from extraction.infrastructure.stub_extraction_job_runner import StubExtractionJobRunner +from extraction.infrastructure.workload_runtime_settings import ( + ExtractionWorkloadRuntimeSettings, + get_extraction_workload_runtime_settings, +) +from extraction.ports.extraction_job_runner import IExtractionJobRunner +from sqlalchemy.ext.asyncio import AsyncSession + + +def create_extraction_job_runner( + *, + session: AsyncSession | None = None, + settings: ExtractionWorkloadRuntimeSettings | None = None, +) -> IExtractionJobRunner: + """Build the configured extraction job runner implementation.""" + resolved = settings or get_extraction_workload_runtime_settings() + if resolved.job_runner == "stub": + return StubExtractionJobRunner() + if session is None: + raise ValueError("database session is required for agentic-ci extraction jobs") + prepared_reader = SqlPreparedJobPackageReader( + session=session, + job_package_work_dir=Path(resolved.job_package_work_dir), + ) + materializer = ExtractionJobWorkdirMaterializer( + settings=resolved, + prepared_job_package_reader=prepared_reader, + ) + return AgenticCiExtractionJobRunner( + settings=resolved, + workdir_materializer=materializer, + ) diff --git a/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py new file mode 100644 index 000000000..1213bf95c --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py @@ -0,0 +1,124 @@ +"""Materialize per-job workspaces for agentic-ci extraction runs.""" + +from __future__ import annotations + +import json +import shutil +from pathlib import Path + +from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionTargetFile +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings +from extraction.ports.runtime import ScopedWorkloadCredentials +from shared_kernel.job_package.path_safety import validate_zip_entry_name +from shared_kernel.job_package.reader import JobPackageReader +from shared_kernel.job_package.value_objects import JobPackageId + + +class ExtractionJobWorkdirMaterializer: + """Prepare a host work directory for one extraction job container run.""" + + def __init__( + self, + *, + settings: ExtractionWorkloadRuntimeSettings, + prepared_job_package_reader: SqlPreparedJobPackageReader, + ) -> None: + self._settings = settings + self._prepared_job_package_reader = prepared_job_package_reader + self._job_package_work_dir = Path(settings.job_package_work_dir) + + async def prepare( + self, + *, + job: ExtractionJobRecord, + tenant_id: str, + credentials: ScopedWorkloadCredentials, + ) -> Path: + job_root = Path(self._settings.extraction_job_work_dir) / job.knowledge_graph_id / job.job_id + if job_root.exists(): + shutil.rmtree(job_root) + repository_files_dir = job_root / "repository-files" + repository_files_dir.mkdir(parents=True, exist_ok=True) + + job_packages = await self._prepared_job_package_reader.list_latest_for_knowledge_graph( + knowledge_graph_id=job.knowledge_graph_id, + ) + packages_by_id = {source.package_id: source for source in job_packages} + if job.target_files: + self._materialize_target_files( + repository_files_dir=repository_files_dir, + target_files=job.target_files, + packages_by_id=packages_by_id, + ) + else: + self._materialize_all_repository_files( + repository_files_dir=repository_files_dir, + job_packages=job_packages, + ) + + context = { + "tenant_id": tenant_id, + "knowledge_graph_id": job.knowledge_graph_id, + "job_id": job.job_id, + "job_set_name": job.job_set_name, + "strategy": job.strategy, + "description": job.description, + "api_base_url": self._settings.api_base_url.rstrip("/"), + "workload_token": credentials.token, + "target_instances": [instance.to_dict() for instance in job.target_instances], + "target_files": [target_file.to_dict() for target_file in job.target_files], + } + (job_root / "job-context.json").write_text( + json.dumps(context, indent=2), + encoding="utf-8", + ) + return job_root + + def _materialize_all_repository_files( + self, + *, + repository_files_dir: Path, + job_packages: tuple[PreparedJobPackageSource, ...], + ) -> None: + for source in job_packages: + archive_path = self._job_package_work_dir / JobPackageId( + value=source.package_id + ).archive_name() + if not archive_path.is_file(): + continue + reader = JobPackageReader(archive_path) + for change in reader.iter_changeset(): + if change.content_ref is None or not change.path: + continue + validate_zip_entry_name(change.path) + output_path = repository_files_dir / source.repository_folder / change.path + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(reader.read_content(change.content_ref)) + + def _materialize_target_files( + self, + *, + repository_files_dir: Path, + target_files: tuple[ExtractionTargetFile, ...], + packages_by_id: dict[str, PreparedJobPackageSource], + ) -> None: + for target_file in target_files: + source = packages_by_id.get(target_file.package_id) + if source is None: + continue + archive_path = self._job_package_work_dir / JobPackageId( + value=source.package_id + ).archive_name() + if not archive_path.is_file(): + continue + reader = JobPackageReader(archive_path) + for change in reader.iter_changeset(): + if change.path != target_file.path or change.content_ref is None: + continue + validate_zip_entry_name(change.path) + output_path = repository_files_dir / source.repository_folder / change.path + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(reader.read_content(change.content_ref)) + break diff --git a/src/api/extraction/infrastructure/extraction_run_orchestrator.py b/src/api/extraction/infrastructure/extraction_run_orchestrator.py index f15230f5e..a09a2aa8b 100644 --- a/src/api/extraction/infrastructure/extraction_run_orchestrator.py +++ b/src/api/extraction/infrastructure/extraction_run_orchestrator.py @@ -7,21 +7,12 @@ import os from dataclasses import dataclass, field from datetime import UTC, datetime -from typing import Any from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker from extraction.infrastructure.extraction_job_executor import ExtractionJobExecutor -from extraction.domain.extraction_job import ExtractionJobStatus, ExtractionRunStatus +from extraction.domain.extraction_job import ExtractionRunStatus from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository -from extraction.infrastructure.workload_runtime_factory import ( - create_ephemeral_extraction_worker_launcher, - get_workload_credential_issuer, -) -from extraction.infrastructure.workload_runtime_settings import ( - get_extraction_workload_runtime_settings, -) -from extraction.ports.runtime import EphemeralWorkerLaunchRequest logger = logging.getLogger(__name__) @@ -45,7 +36,7 @@ def __init__( job_executor: ExtractionJobExecutor | None = None, ) -> None: self._session_factory = session_factory - self._job_executor = job_executor or ExtractionJobExecutor() + self._job_executor = job_executor or ExtractionJobExecutor(session_factory=session_factory) self._active: dict[str, _OrchestratorState] = {} self._lock = asyncio.Lock() @@ -81,21 +72,10 @@ async def start( ) await session.commit() - runtime_settings = get_extraction_workload_runtime_settings() - if runtime_settings.backend == "container": - for index in range(state.worker_count): - state.tasks.append( - asyncio.create_task( - self._container_worker_loop(state, worker_index=index + 1) - ) - ) - else: - for index in range(state.worker_count): - state.tasks.append( - asyncio.create_task( - self._in_process_worker_loop(state, worker_index=index + 1) - ) - ) + for index in range(state.worker_count): + state.tasks.append( + asyncio.create_task(self._worker_loop(state, worker_index=index + 1)) + ) async def request_pause(self, *, knowledge_graph_id: str) -> None: async with self._session_factory() as session: @@ -132,7 +112,7 @@ async def halt(self, *, knowledge_graph_id: str) -> None: ) await session.commit() - async def _in_process_worker_loop(self, state: _OrchestratorState, *, worker_index: int) -> None: + async def _worker_loop(self, state: _OrchestratorState, *, worker_index: int) -> None: worker_id = f"worker-{worker_index:02d}" try: while not state.stop_event.is_set(): @@ -161,8 +141,16 @@ async def _in_process_worker_loop(self, state: _OrchestratorState, *, worker_ind await session.commit() try: - metrics = await self._job_executor.execute(job) + metrics = await self._job_executor.execute( + job, + tenant_id=state.tenant_id, + ) except Exception as exc: + logger.exception( + "Extraction job %s failed on worker %s", + job.job_id, + worker_id, + ) async with self._session_factory() as session: repo = ExtractionJobRepository(session) await repo.mark_job_failed( @@ -184,54 +172,6 @@ async def _in_process_worker_loop(self, state: _OrchestratorState, *, worker_ind except asyncio.CancelledError: return - async def _container_worker_loop(self, state: _OrchestratorState, *, worker_index: int) -> None: - worker_id = f"worker-{worker_index:02d}" - launcher = create_ephemeral_extraction_worker_launcher() - credential_issuer = get_workload_credential_issuer() - runtime_settings = get_extraction_workload_runtime_settings() - - try: - while not state.stop_event.is_set(): - async with self._session_factory() as session: - repo = ExtractionJobRepository(session) - if await repo.is_pause_requested(knowledge_graph_id=state.knowledge_graph_id): - await session.commit() - state.stop_event.set() - break - job = await repo.claim_next_pending_job( - knowledge_graph_id=state.knowledge_graph_id, - worker_id=worker_id, - ) - if job is None: - await session.commit() - await self._maybe_finish_run(state) - break - await session.commit() - - credentials = credential_issuer.issue( - tenant_id=state.tenant_id, - knowledge_graph_id=state.knowledge_graph_id, - ) - launch_result = launcher.launch( - request=EphemeralWorkerLaunchRequest( - tenant_id=state.tenant_id, - knowledge_graph_id=state.knowledge_graph_id, - session_id=f"extraction-job:{job.job_id}", - sync_run_id=job.job_id, - job_package_id=job.id, - ), - credentials=credentials, - ) - logger.info( - "Launched extraction worker %s for job %s (container backend)", - launch_result.worker_id, - job.job_id, - ) - # Container worker is responsible for marking completion via workload API. - await asyncio.sleep(runtime_settings.worker_poll_seconds) - except asyncio.CancelledError: - return - async def _maybe_finish_run(self, state: _OrchestratorState) -> None: async with self._session_factory() as session: repo = ExtractionJobRepository(session) diff --git a/src/api/extraction/infrastructure/models/extraction_job.py b/src/api/extraction/infrastructure/models/extraction_job.py index 099c458f0..7bf38a5fe 100644 --- a/src/api/extraction/infrastructure/models/extraction_job.py +++ b/src/api/extraction/infrastructure/models/extraction_job.py @@ -25,6 +25,7 @@ class ExtractionJobModel(Base, TimestampMixin): order_index: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) description: Mapped[str] = mapped_column(sa.Text(), nullable=False, default="") target_instances: Mapped[list[dict]] = mapped_column(JSONB, nullable=False, default=list) + target_files: Mapped[list[dict]] = mapped_column(JSONB, nullable=False, default=list) worker_id: Mapped[str | None] = mapped_column(sa.String(64), nullable=True) started_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) completed_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index 32d1403c8..c821236a9 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -14,6 +14,7 @@ ExtractionJobStatus, ExtractionRunRecord, ExtractionRunStatus, + ExtractionTargetFile, ExtractionTargetInstance, ) from extraction.infrastructure.models.extraction_job import ExtractionJobModel, ExtractionRunModel @@ -32,6 +33,9 @@ def _job_model_to_record(model: ExtractionJobModel) -> ExtractionJobRecord: target_instances=tuple( ExtractionTargetInstance.from_dict(row) for row in (model.target_instances or []) ), + target_files=tuple( + ExtractionTargetFile.from_dict(row) for row in (model.target_files or []) + ), worker_id=model.worker_id, started_at=model.started_at, completed_at=model.completed_at, @@ -91,6 +95,7 @@ async def replace_pending_jobs( order_index=job.order_index, description=job.description, target_instances=[instance.to_dict() for instance in job.target_instances], + target_files=[target_file.to_dict() for target_file in job.target_files], ) ) await self._session.flush() @@ -175,7 +180,7 @@ async def list_active_workers(self, *, knowledge_graph_id: str) -> list[dict[str "jobId": model.job_id, "jobSet": model.job_set_name, "strategy": model.strategy, - "fileCount": 0, + "fileCount": len(model.target_files or []), "instanceCount": len(model.target_instances or []), "startedAt": model.started_at.isoformat() if model.started_at else None, } diff --git a/src/api/extraction/infrastructure/stub_extraction_job_runner.py b/src/api/extraction/infrastructure/stub_extraction_job_runner.py new file mode 100644 index 000000000..8aff1afbb --- /dev/null +++ b/src/api/extraction/infrastructure/stub_extraction_job_runner.py @@ -0,0 +1,28 @@ +"""In-memory stub runner for extraction jobs (tests and memory backend).""" + +from __future__ import annotations + +import asyncio +from typing import Any + +from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.ports.extraction_job_runner import IExtractionJobRunner + + +class StubExtractionJobRunner(IExtractionJobRunner): + """Simulates successful job completion without launching containers.""" + + async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: + _ = tenant_id + await asyncio.sleep(0.05) + target_count = len(job.target_instances) or len(job.target_files) or 1 + return { + "input_tokens": 100 * target_count, + "output_tokens": 50 * target_count, + "cache_read_tokens": 0, + "cache_creation_tokens": 0, + "cost_usd": 0.001 * target_count, + "entities_created": 0, + "entities_modified": target_count, + "relationships_created": 0, + } diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 42d9c95a5..fdf9844d0 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -23,10 +23,16 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): ) backend: Literal["memory", "container"] = Field(default="memory") + job_runner: Literal["stub", "agentic_ci"] | None = Field(default=None) container_engine: Literal["auto", "docker", "podman"] = Field(default="auto") container_network: str | None = Field(default=None) sticky_image: str = Field(default="kartograph-agent-runtime:dev") worker_image: str = Field(default="docker.io/library/busybox:1.36") + agentic_ci_image: str = Field(default="ghcr.io/opendatahub-io/ai-helpers:latest") + agentic_ci_harness: str = Field(default="claude-code") + agentic_ci_model: str = Field(default="") + agentic_ci_timeout_seconds: int = Field(default=1200, ge=60, le=7200) + extraction_job_work_dir: str = Field(default="/tmp/kartograph/extraction_jobs") sticky_command: tuple[str, ...] = Field( default=(), description=( @@ -65,6 +71,12 @@ def vertex_enabled(self) -> bool: @model_validator(mode="after") def _apply_vertex_env_aliases(self) -> "ExtractionWorkloadRuntimeSettings": + if self.job_runner is None: + object.__setattr__( + self, + "job_runner", + "agentic_ci" if self.backend == "container" else "stub", + ) if not self.vertex_project_id: object.__setattr__( self, diff --git a/src/api/extraction/ports/extraction_job_runner.py b/src/api/extraction/ports/extraction_job_runner.py new file mode 100644 index 000000000..f7cf6d8ce --- /dev/null +++ b/src/api/extraction/ports/extraction_job_runner.py @@ -0,0 +1,15 @@ +"""Port for executing one materialized extraction job.""" + +from __future__ import annotations + +from typing import Any, Protocol + +from extraction.domain.extraction_job import ExtractionJobRecord + + +class IExtractionJobRunner(Protocol): + """Runs one extraction job and returns completion metrics.""" + + async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: + """Execute the job and return token/cost/write metrics.""" + ... diff --git a/src/api/infrastructure/management/extraction_job_materializer.py b/src/api/infrastructure/management/extraction_job_materializer.py index b11431d5b..fd29026b7 100644 --- a/src/api/infrastructure/management/extraction_job_materializer.py +++ b/src/api/infrastructure/management/extraction_job_materializer.py @@ -2,18 +2,29 @@ from __future__ import annotations +import fnmatch import hashlib import math +from pathlib import Path from typing import Any from ulid import ULID -from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus, ExtractionTargetInstance +from extraction.domain.extraction_job import ( + ExtractionJobRecord, + ExtractionJobStatus, + ExtractionTargetFile, + ExtractionTargetInstance, +) +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource from management.domain.extraction_job_config import ( ExtractionJobConfigDocument, ExtractionJobSetDefinition, ExtractionJobSetStrategy, ) +from shared_kernel.job_package.path_safety import validate_zip_entry_name +from shared_kernel.job_package.reader import JobPackageReader +from shared_kernel.job_package.value_objects import JobPackageId def _batch_items(items: list[Any], batch_size: int) -> list[list[Any]]: @@ -76,31 +87,123 @@ def entity_instances_by_type_from_graph( return grouped +def build_repository_file_catalog( + *, + job_package_work_dir: Path, + job_packages: tuple[PreparedJobPackageSource, ...], +) -> list[ExtractionTargetFile]: + """Collect repository file paths from the latest prepared JobPackages.""" + catalog: list[ExtractionTargetFile] = [] + for source in job_packages: + archive_path = job_package_work_dir / JobPackageId(value=source.package_id).archive_name() + if not archive_path.is_file(): + continue + try: + reader = JobPackageReader(archive_path) + except (OSError, ValueError): + continue + for change in reader.iter_changeset(): + if change.content_ref is None or not change.path: + continue + validate_zip_entry_name(change.path) + catalog.append( + ExtractionTargetFile( + path=str(change.path), + repository_folder=source.repository_folder, + package_id=source.package_id, + ) + ) + return sorted(catalog, key=lambda item: (item.repository_folder, item.path)) + + +def match_file_patterns( + catalog: list[ExtractionTargetFile], + patterns: tuple[str, ...], +) -> list[ExtractionTargetFile]: + """Return catalog entries matching any glob pattern.""" + if not patterns: + return [] + matched: list[ExtractionTargetFile] = [] + seen: set[tuple[str, str, str]] = set() + for target_file in catalog: + candidates = ( + target_file.path, + f"{target_file.repository_folder}/{target_file.path}", + ) + for pattern in patterns: + if any(fnmatch.fnmatch(candidate, pattern) for candidate in candidates): + key = ( + target_file.path, + target_file.repository_folder, + target_file.package_id, + ) + if key not in seen: + seen.add(key) + matched.append(target_file) + break + return matched + + def materialize_jobs_from_config( *, knowledge_graph_id: str, config: ExtractionJobConfigDocument, graph_data: dict[str, Any], + job_packages: tuple[PreparedJobPackageSource, ...] = (), + job_package_work_dir: Path | None = None, ) -> list[ExtractionJobRecord]: """Build pending extraction jobs from job set definitions and live graph instances.""" instances_by_type = entity_instances_by_type_from_graph( knowledge_graph_id=knowledge_graph_id, graph_data=graph_data, ) + file_catalog: list[ExtractionTargetFile] = [] + if job_package_work_dir is not None and job_packages: + file_catalog = build_repository_file_catalog( + job_package_work_dir=job_package_work_dir, + job_packages=job_packages, + ) jobs: list[ExtractionJobRecord] = [] order_index = 0 for job_set in config.job_sets: - if job_set.strategy != ExtractionJobSetStrategy.BY_INSTANCES: + if job_set.strategy == ExtractionJobSetStrategy.BY_INSTANCES: + entity_type = job_set.entity_type or "" + instances = instances_by_type.get(entity_type, []) + per_job = int(job_set.instances_per_job or 1) + if per_job < 1 or not instances: + continue + description = (job_set.description or "").strip() + for batch_idx, batch in enumerate(_batch_items(instances, per_job), start=1): + content_hash = "|".join(instance.slug for instance in batch) + job_id = _generate_job_id(job_set.name, batch_idx, content_hash) + jobs.append( + ExtractionJobRecord( + id=str(ULID()), + knowledge_graph_id=knowledge_graph_id, + job_id=job_id, + job_set_name=job_set.name, + strategy=job_set.strategy.value, + status=ExtractionJobStatus.PENDING, + order_index=order_index, + description=description, + target_instances=tuple(batch), + ) + ) + order_index += 1 continue - entity_type = job_set.entity_type or "" - instances = instances_by_type.get(entity_type, []) - per_job = int(job_set.instances_per_job or 1) - if per_job < 1 or not instances: + + if job_set.strategy != ExtractionJobSetStrategy.BY_FILES: + continue + matched_files = match_file_patterns(file_catalog, job_set.file_patterns) + per_job = int(job_set.files_per_job or 1) + if per_job < 1 or not matched_files: continue - description = (job_set.description or "").strip() - for batch_idx, batch in enumerate(_batch_items(instances, per_job), start=1): - content_hash = "|".join(instance.slug for instance in batch) + description = (job_set.description or "").strip() or f"Extract entities from files in {job_set.name}." + for batch_idx, batch in enumerate(_batch_items(matched_files, per_job), start=1): + content_hash = "|".join( + f"{target_file.repository_folder}:{target_file.path}" for target_file in batch + ) job_id = _generate_job_id(job_set.name, batch_idx, content_hash) jobs.append( ExtractionJobRecord( @@ -112,7 +215,7 @@ def materialize_jobs_from_config( status=ExtractionJobStatus.PENDING, order_index=order_index, description=description, - target_instances=tuple(batch), + target_files=tuple(batch), ) ) order_index += 1 @@ -124,11 +227,20 @@ def projected_job_count( job_set: ExtractionJobSetDefinition, *, entity_instance_counts: dict[str, int], + matched_file_count: int | None = None, ) -> int | None: - if job_set.strategy != ExtractionJobSetStrategy.BY_INSTANCES: + if job_set.strategy == ExtractionJobSetStrategy.BY_INSTANCES: + total = entity_instance_counts.get(job_set.entity_type or "", 0) + per_job = job_set.instances_per_job + if total <= 0 or per_job is None or per_job < 1: + return 0 if total == 0 else None + return math.ceil(total / per_job) + if job_set.strategy != ExtractionJobSetStrategy.BY_FILES: + return None + total = matched_file_count + if total is None: return None - total = entity_instance_counts.get(job_set.entity_type or "", 0) - per_job = job_set.instances_per_job + per_job = job_set.files_per_job if total <= 0 or per_job is None or per_job < 1: return 0 if total == 0 else None return math.ceil(total / per_job) diff --git a/src/api/infrastructure/management/extraction_jobs_service.py b/src/api/infrastructure/management/extraction_jobs_service.py index a7a30a2db..d77d3903e 100644 --- a/src/api/infrastructure/management/extraction_jobs_service.py +++ b/src/api/infrastructure/management/extraction_jobs_service.py @@ -2,25 +2,31 @@ from __future__ import annotations +from pathlib import Path from typing import Any from starlette.concurrency import run_in_threadpool from sqlalchemy.ext.asyncio import AsyncSession from infrastructure.management.extraction_job_materializer import ( + build_repository_file_catalog, entity_instance_counts_from_graph, + match_file_patterns, materialize_jobs_from_config, projected_job_count, ) from extraction.infrastructure.extraction_run_orchestrator import get_extraction_run_orchestrator from extraction.domain.extraction_job import ExtractionJobStatus, ExtractionRunStatus +from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository +from extraction.infrastructure.workload_runtime_settings import get_extraction_workload_runtime_settings from graph.infrastructure.bulk_data_reader import fetch_bulk_graph_data from infrastructure.database.connection_pool import ConnectionPool from management.application.services.knowledge_graph_service import KnowledgeGraphService from management.domain.extraction_job_config import ( ExtractionJobConfigDocument, ExtractionJobSetDefinition, + ExtractionJobSetStrategy, ) from management.infrastructure.repositories.knowledge_graph_repository import ( KnowledgeGraphRepository, @@ -130,10 +136,20 @@ async def regenerate_jobs( config = await self._knowledge_graph_repository.get_extraction_job_config(kg_id) document = config or ExtractionJobConfigDocument.empty() graph_data = await self._load_graph_data() + runtime_settings = get_extraction_workload_runtime_settings() + prepared_reader = SqlPreparedJobPackageReader( + session=self._session, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ) + job_packages = await prepared_reader.list_latest_for_knowledge_graph( + knowledge_graph_id=kg_id, + ) jobs = materialize_jobs_from_config( knowledge_graph_id=kg_id, config=document, graph_data=graph_data, + job_packages=job_packages, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), ) generated = await self._extraction_job_repository.replace_pending_jobs( knowledge_graph_id=kg_id, @@ -247,13 +263,32 @@ async def get_extraction_plan_summary( counts = { row["name"]: row["instance_count"] for row in payload.get("entity_types", []) } + runtime_settings = get_extraction_workload_runtime_settings() + prepared_reader = SqlPreparedJobPackageReader( + session=self._session, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ) + job_packages = await prepared_reader.list_latest_for_knowledge_graph( + knowledge_graph_id=kg_id, + ) + file_catalog = build_repository_file_catalog( + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + job_packages=job_packages, + ) job_sets = [] for raw in payload.get("job_sets", []): job_set = ExtractionJobSetDefinition.from_dict(raw) + matched_file_count = None + if job_set.strategy == ExtractionJobSetStrategy.BY_FILES: + matched_file_count = len(match_file_patterns(file_catalog, job_set.file_patterns)) job_sets.append( { **raw, - "projected_jobs": projected_job_count(job_set, entity_instance_counts=counts), + "projected_jobs": projected_job_count( + job_set, + entity_instance_counts=counts, + matched_file_count=matched_file_count, + ), } ) return {"job_sets": job_sets, "entity_types": payload.get("entity_types", [])} diff --git a/src/api/infrastructure/migrations/versions/i2j3k4l5m6n7_add_extraction_job_target_files.py b/src/api/infrastructure/migrations/versions/i2j3k4l5m6n7_add_extraction_job_target_files.py new file mode 100644 index 000000000..e789880c9 --- /dev/null +++ b/src/api/infrastructure/migrations/versions/i2j3k4l5m6n7_add_extraction_job_target_files.py @@ -0,0 +1,28 @@ +"""Add target_files column to extraction_jobs. + +Revision ID: i2j3k4l5m6n7 +Revises: h1i2j3k4l5m6 +Create Date: 2026-06-05 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects.postgresql import JSONB + +revision: str = "i2j3k4l5m6n7" +down_revision: Union[str, Sequence[str], None] = "h1i2j3k4l5m6" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "extraction_jobs", + sa.Column("target_files", JSONB(), nullable=False, server_default="[]"), + ) + + +def downgrade() -> None: + op.drop_column("extraction_jobs", "target_files") diff --git a/src/api/pyproject.toml b/src/api/pyproject.toml index 9194d5c7a..1b2ffd6f7 100644 --- a/src/api/pyproject.toml +++ b/src/api/pyproject.toml @@ -5,6 +5,7 @@ description = "Enterprise-Ready Bi-Temporal Knowledge Graphs as a Service" readme = "README.md" requires-python = ">=3.12" dependencies = [ + "agentic-ci>=0.2.22", "alembic>=1.17.2", "authlib>=1.6.9", # CVE fix: transitive dep pinned for security "apache-age-python>=0.0.7", diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_metrics.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_metrics.py new file mode 100644 index 000000000..1abb7ac6d --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_metrics.py @@ -0,0 +1,68 @@ +"""Unit tests for OTEL metric parsing for extraction jobs.""" + +from __future__ import annotations + +import json +from pathlib import Path + +from extraction.infrastructure.extraction_job_metrics import metrics_from_otel_log + + +def test_metrics_from_otel_log_sums_token_and_cost(tmp_path: Path) -> None: + otel_log = tmp_path / "claude-otel.jsonl" + payload = { + "resourceMetrics": [ + { + "scopeMetrics": [ + { + "metrics": [ + { + "name": "claude_code.token.usage", + "sum": { + "dataPoints": [ + { + "asDouble": 100, + "attributes": [ + {"key": "model", "value": {"stringValue": "claude"}}, + {"key": "type", "value": {"stringValue": "input"}}, + ], + }, + { + "asDouble": 40, + "attributes": [ + {"key": "model", "value": {"stringValue": "claude"}}, + {"key": "type", "value": {"stringValue": "output"}}, + ], + }, + ] + }, + }, + { + "name": "claude_code.cost.usage", + "sum": { + "dataPoints": [ + { + "asDouble": 0.0123, + "attributes": [ + {"key": "model", "value": {"stringValue": "claude"}}, + ], + } + ] + }, + }, + ] + } + ] + } + ] + } + otel_log.write_text( + json.dumps({"path": "/v1/metrics", "payload": payload}) + "\n", + encoding="utf-8", + ) + + metrics = metrics_from_otel_log(otel_log) + + assert metrics["input_tokens"] == 100 + assert metrics["output_tokens"] == 40 + assert metrics["cost_usd"] == 0.0123 diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py new file mode 100644 index 000000000..3eb669d0e --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py @@ -0,0 +1,39 @@ +"""Unit tests for extraction job prompt building.""" + +from extraction.domain.extraction_job import ( + ExtractionJobRecord, + ExtractionJobStatus, + ExtractionTargetFile, + ExtractionTargetInstance, +) +from extraction.infrastructure.extraction_job_prompt import build_extraction_job_prompt + + +def test_build_extraction_job_prompt_includes_instances_and_files() -> None: + job = ExtractionJobRecord( + id="job-row", + knowledge_graph_id="kg-1", + job_id="features_batch_0001_abcd1234", + job_set_name="features", + strategy="by_instances", + status=ExtractionJobStatus.PENDING, + order_index=0, + description="Extract acceptance criteria.", + target_instances=( + ExtractionTargetInstance(slug="feature-a", entity_type="Feature"), + ), + target_files=( + ExtractionTargetFile( + path="features/a.feature", + repository_folder="repo-a", + package_id="pkg-1", + ), + ), + ) + + prompt = build_extraction_job_prompt(job=job) + + assert "Extract acceptance criteria." in prompt + assert "Feature: feature-a" in prompt + assert "repo-a/features/a.feature" in prompt + assert "job-context.json" in prompt diff --git a/src/api/tests/unit/extraction/infrastructure/test_stub_extraction_job_runner.py b/src/api/tests/unit/extraction/infrastructure/test_stub_extraction_job_runner.py new file mode 100644 index 000000000..42c8fd5e4 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_stub_extraction_job_runner.py @@ -0,0 +1,34 @@ +"""Unit tests for stub extraction job runner.""" + +import pytest + +from extraction.domain.extraction_job import ( + ExtractionJobRecord, + ExtractionJobStatus, + ExtractionTargetFile, +) +from extraction.infrastructure.stub_extraction_job_runner import StubExtractionJobRunner + + +@pytest.mark.asyncio +async def test_stub_runner_returns_metrics_for_file_targets() -> None: + runner = StubExtractionJobRunner() + job = ExtractionJobRecord( + id="job-row", + knowledge_graph_id="kg-1", + job_id="docs_batch_0001_abcd1234", + job_set_name="docs", + strategy="by_files", + status=ExtractionJobStatus.PENDING, + order_index=0, + description="Extract docs.", + target_files=( + ExtractionTargetFile(path="a.md", repository_folder="repo", package_id="pkg"), + ExtractionTargetFile(path="b.md", repository_folder="repo", package_id="pkg"), + ), + ) + + metrics = await runner.run(job, tenant_id="tenant-1") + + assert metrics["input_tokens"] == 200 + assert metrics["entities_modified"] == 2 diff --git a/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer_by_files.py b/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer_by_files.py new file mode 100644 index 000000000..c7666d230 --- /dev/null +++ b/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer_by_files.py @@ -0,0 +1,117 @@ +"""Unit tests for by_files extraction job materialization.""" + +from __future__ import annotations + +from pathlib import Path + +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from infrastructure.management.extraction_job_materializer import ( + build_repository_file_catalog, + materialize_jobs_from_config, + match_file_patterns, + projected_job_count, +) +from management.domain.extraction_job_config import ( + ExtractionJobConfigDocument, + ExtractionJobSetDefinition, + ExtractionJobSetStrategy, +) +from shared_kernel.job_package.builder import JobPackageBuilder +from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, + ChangeOperation, + ChangesetEntry, + JobPackageId, + SyncMode, +) + + +def _build_package(work_dir: Path, package_id: str, files: dict[str, bytes]) -> None: + builder = JobPackageBuilder( + data_source_id="ds-1", + knowledge_graph_id="kg-1", + sync_mode=SyncMode.FULL_REFRESH, + package_id=JobPackageId(value=package_id), + ) + for index, (path, content) in enumerate(files.items()): + ref = builder.add_content(content) + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id=f"file-{index}", + type="io.kartograph.change.file", + path=path, + content_ref=ref, + content_type="text/plain", + metadata={}, + ) + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "abc"})) + builder.build(work_dir) + + +def test_match_file_patterns_supports_globs() -> None: + from extraction.domain.extraction_job import ExtractionTargetFile + + catalog = [ + ExtractionTargetFile(path="src/a.py", repository_folder="repo-a", package_id="pkg-1"), + ExtractionTargetFile(path="docs/readme.md", repository_folder="repo-a", package_id="pkg-1"), + ] + matched = match_file_patterns(catalog, ("**/*.py",)) + assert [item.path for item in matched] == ["src/a.py"] + + +def test_materialize_by_files_batches_repository_paths(tmp_path: Path) -> None: + package_id = "01J0000000000000000000001" + _build_package( + tmp_path, + package_id, + { + "features/one.feature": b"Feature: one", + "features/two.feature": b"Feature: two", + "features/three.feature": b"Feature: three", + }, + ) + source = PreparedJobPackageSource( + package_id=package_id, + data_source_id="ds-1", + data_source_name="Repo", + repository_folder="repo-a", + ) + catalog = build_repository_file_catalog( + job_package_work_dir=tmp_path, + job_packages=(source,), + ) + assert len(catalog) == 3 + + config = ExtractionJobConfigDocument( + version="1.0", + job_sets=( + ExtractionJobSetDefinition( + name="features", + strategy=ExtractionJobSetStrategy.BY_FILES, + file_patterns=("features/*.feature",), + files_per_job=2, + description="Extract Feature entities from Gherkin files.", + ), + ), + ) + jobs = materialize_jobs_from_config( + knowledge_graph_id="kg-1", + config=config, + graph_data={"nodes": [], "edges": []}, + job_packages=(source,), + job_package_work_dir=tmp_path, + ) + + assert len(jobs) == 2 + assert [target.path for target in jobs[0].target_files] == [ + "features/one.feature", + "features/three.feature", + ] + assert [target.path for target in jobs[1].target_files] == ["features/two.feature"] + assert projected_job_count( + config.job_sets[0], + entity_instance_counts={}, + matched_file_count=len(match_file_patterns(catalog, ("features/*.feature",))), + ) == 2 diff --git a/src/api/uv.lock b/src/api/uv.lock index 9e964cae4..1ac0bf3bb 100644 --- a/src/api/uv.lock +++ b/src/api/uv.lock @@ -8,6 +8,19 @@ resolution-markers = [ "(python_full_version >= '3.14' and platform_python_implementation == 'PyPy') or (python_full_version >= '3.14' and sys_platform == 'emscripten')", ] +[[package]] +name = "agentic-ci" +version = "0.2.22" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "tenacity" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4b/ca/2e123744a6111508bd5445a45b05304a97f7b5c69dec851667efbd1fcf35/agentic_ci-0.2.22.tar.gz", hash = "sha256:da0661c3d4574121af9ef282214d257ff2ac27e8bce665516a738a9a1d5a8c2c", size = 65361, upload-time = "2026-06-09T14:51:09.698Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/c6/88460334c8b725fa4187ff4f8004f1f89f5d6caf003751a154945cdc2421/agentic_ci-0.2.22-py3-none-any.whl", hash = "sha256:650b551c803850334532c081450e1ba4624373079dc4f6f9ced5d258117e3f4e", size = 80512, upload-time = "2026-06-09T14:51:08.163Z" }, +] + [[package]] name = "alembic" version = "1.17.2" @@ -1292,6 +1305,7 @@ name = "kartograph-api" version = "3.37.1" source = { virtual = "." } dependencies = [ + { name = "agentic-ci" }, { name = "alembic" }, { name = "apache-age-python" }, { name = "asyncpg" }, @@ -1333,6 +1347,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "agentic-ci", specifier = ">=0.2.22" }, { name = "alembic", specifier = ">=1.17.2" }, { name = "apache-age-python", specifier = ">=0.0.7" }, { name = "asyncpg", specifier = ">=0.31.0" }, From 85bb99388d76113ea782898024db75f879555b6d Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 11 Jun 2026 19:06:26 -0400 Subject: [PATCH 116/153] fix(extraction): remove stale sticky containers before session restart Stopped kartograph-sticky-* containers kept their names after API reloads, causing docker run name conflicts when reopening Graph Management modes like Extraction Jobs. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../container_workload_runtime.py | 8 ++++- .../container_runtime/cli_runtime.py | 25 ++++++++++++--- .../shared_kernel/container_runtime/ports.py | 4 +++ .../test_container_workload_runtime.py | 31 +++++++++++++++++++ .../container_runtime/test_cli_runtime.py | 27 ++++++++++++++++ 5 files changed, 89 insertions(+), 6 deletions(-) diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index a39b853ba..bf405fd5a 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -255,7 +255,7 @@ def _adopt_running_container_if_present( container_id = container_id_hint or self._container_runtime.container_id_for_name( container_name ) - if container_id is None: + if container_id is None or not self._container_runtime.is_running(container_id): return None runtime_base_url = f"http://{container_name}:{self._sticky_service_port}" return StickySessionRuntimeLease( @@ -333,6 +333,8 @@ def _start_runtime( if self._container_run_uid is not None and self._container_run_gid is not None: container_user = f"{self._container_run_uid}:{self._container_run_gid}" + self._remove_stale_container_name(container_name) + launched = self._container_runtime.run( ContainerRunSpec( image=self._sticky_image, @@ -364,6 +366,10 @@ def _start_runtime( runtime_base_url=runtime_base_url, ) + def _remove_stale_container_name(self, container_name: str) -> None: + """Remove stopped containers that still hold a sticky session name.""" + self._container_runtime.remove_by_name(container_name, force=True) + def _terminate_container(self, container_id: str) -> None: if self._container_runtime.is_running(container_id): self._container_runtime.stop(container_id) diff --git a/src/api/shared_kernel/container_runtime/cli_runtime.py b/src/api/shared_kernel/container_runtime/cli_runtime.py index 865ae7d15..400de71dc 100644 --- a/src/api/shared_kernel/container_runtime/cli_runtime.py +++ b/src/api/shared_kernel/container_runtime/cli_runtime.py @@ -80,6 +80,25 @@ def is_running(self, container_id: str) -> bool: def container_id_for_name(self, name: str) -> str | None: """Return the running container ID for a fixed container name, if any.""" + container_id = self._inspect_container_id(name) + if container_id is None: + return None + if not self.is_running(container_id): + return None + return container_id + + def remove_by_name(self, name: str, *, force: bool = True) -> bool: + """Remove a container by name. Returns True when a container was removed.""" + if self._inspect_container_id(name) is None: + return False + command = [self._binary, "rm"] + if force: + command.append("-f") + command.append(name) + self._execute(command) + return True + + def _inspect_container_id(self, name: str) -> str | None: result = subprocess.run( [self._binary, "inspect", "-f", "{{.Id}}", name], capture_output=True, @@ -89,11 +108,7 @@ def container_id_for_name(self, name: str) -> str | None: if result.returncode != 0: return None container_id = result.stdout.strip() - if not container_id: - return None - if not self.is_running(container_id): - return None - return container_id + return container_id or None def _execute(self, command: list[str]) -> str: result = subprocess.run( diff --git a/src/api/shared_kernel/container_runtime/ports.py b/src/api/shared_kernel/container_runtime/ports.py index 97a464806..3e7e83239 100644 --- a/src/api/shared_kernel/container_runtime/ports.py +++ b/src/api/shared_kernel/container_runtime/ports.py @@ -56,3 +56,7 @@ def is_running(self, container_id: str) -> bool: def container_id_for_name(self, name: str) -> str | None: """Return the running container ID for a fixed container name, if any.""" ... + + def remove_by_name(self, name: str, *, force: bool = True) -> bool: + """Remove a container by name. Returns True when a container was removed.""" + ... diff --git a/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py b/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py index 16c761822..f9ff5d9d9 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py +++ b/src/api/tests/unit/extraction/infrastructure/test_container_workload_runtime.py @@ -70,6 +70,37 @@ def test_adopts_running_container_after_process_restart(self) -> None: assert lease is not None assert lease.container_id == "container-existing" runtime.run.assert_not_called() + runtime.remove_by_name.assert_not_called() + + def test_removes_stopped_container_name_before_start(self) -> None: + runtime = MagicMock() + runtime.is_running.return_value = False + runtime.container_id_for_name.return_value = None + runtime.remove_by_name.return_value = True + runtime.run.return_value = ContainerRunResult( + container_id="container-1", + name="kartograph-sticky-session-1", + ) + manager = ContainerStickySessionRuntimeManager( + container_runtime=runtime, + sticky_image="busybox:1.36", + sticky_command=("sleep", "3600"), + session_ttl=timedelta(minutes=30), + ) + + lease = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="extraction_operations", + ) + + assert lease.container_id == "container-1" + runtime.remove_by_name.assert_called_once_with( + "kartograph-sticky-session-1", + force=True, + ) + runtime.run.assert_called_once() def test_reset_stops_existing_container_and_starts_new_one(self) -> None: runtime = MagicMock() diff --git a/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py b/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py index c161ad7d1..4ecefa917 100644 --- a/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py +++ b/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py @@ -106,3 +106,30 @@ def test_is_running_returns_false_for_missing_container(self) -> None: ) assert runtime.is_running("abc123") is False + + def test_remove_by_name_force_removes_existing_container(self) -> None: + runtime = CliContainerRuntime(binary="docker") + + with patch("shared_kernel.container_runtime.cli_runtime.subprocess.run") as run: + run.side_effect = [ + MagicMock(returncode=0, stdout="abc123\n", stderr=""), + MagicMock(returncode=0, stdout="", stderr=""), + ] + + removed = runtime.remove_by_name("kartograph-sticky-session-1", force=True) + + assert removed is True + assert run.call_args_list[1].args[0] == [ + "docker", + "rm", + "-f", + "kartograph-sticky-session-1", + ] + + def test_remove_by_name_returns_false_when_container_missing(self) -> None: + runtime = CliContainerRuntime(binary="docker") + + with patch("shared_kernel.container_runtime.cli_runtime.subprocess.run") as run: + run.return_value = MagicMock(returncode=1, stdout="", stderr="no such object") + + assert runtime.remove_by_name("missing") is False From 39c1a79dc7db3ece0cb01ac26a142274b43ec924 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 11 Jun 2026 19:12:10 -0400 Subject: [PATCH 117/153] refactor(extraction): drop unused repo skills mount from sticky runtime Graph Management Assistant prompts come from API-resolved agent_configuration, not filesystem SKILL.md mounts. Remove the sticky /app/skills bind, related env vars, and dead skills_dir plumbing from compose and deploy. Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 1 - compose.yaml | 2 -- deploy/apps/kartograph/base/api-deployment.yaml | 6 ------ src/agent-runtime/kartograph_agent_runtime/settings.py | 1 - .../infrastructure/container_workload_runtime.py | 4 ---- .../extraction/infrastructure/runtime_context_builder.py | 7 ++----- .../infrastructure/sticky_session_bootstrap_builder.py | 1 - .../extraction/infrastructure/workload_runtime_factory.py | 1 - .../extraction/infrastructure/workload_runtime_settings.py | 2 -- src/api/extraction/ports/runtime.py | 1 - src/api/extraction/ports/services.py | 5 ++--- src/api/main.py | 4 ---- .../extraction/test_workload_credential_injection.py | 1 - .../infrastructure/test_extraction_event_handler.py | 2 -- .../infrastructure/test_runtime_context_builder.py | 7 +------ .../test_sticky_session_container_bootstrap.py | 2 -- 16 files changed, 5 insertions(+), 42 deletions(-) diff --git a/compose.dev.yaml b/compose.dev.yaml index d083decbf..0642f74bb 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -21,7 +21,6 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_API_BASE_URL: http://api:8000 KARTOGRAPH_EXTRACTION_RUNTIME_WORKLOAD_TOKEN_SIGNING_KEY: kartograph-dev-workload-token-signing-key KARTOGRAPH_EXTRACTION_RUNTIME_JOB_PACKAGE_WORK_DIR: /tmp/kartograph/job_packages - KARTOGRAPH_EXTRACTION_RUNTIME_SKILLS_DIR: ${PWD}/skills KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_UID: ${HOST_UID} KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID: ${HOST_GID} KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_TURN_TIMEOUT_SECONDS: "3600" diff --git a/compose.yaml b/compose.yaml index 98a435584..6ed15f7c2 100644 --- a/compose.yaml +++ b/compose.yaml @@ -145,7 +145,6 @@ services: - kartograph volumes: - ./certs:/certs:ro - - ./skills:/app/skills:ro # Mount host CA bundle (supports multiple OS types via env var) # Default fallback order: RHEL/Fedora -> Debian/Ubuntu -> macOS - ${HOST_CA_BUNDLE:-/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem}:/etc/ssl/certs/ca-bundle.crt:ro @@ -157,7 +156,6 @@ services: - GRPC_DEFAULT_SSL_ROOTS_FILE_PATH=/certs/spicedb-cert.pem # SSL cert file uses mounted path (same for all systems) - SSL_CERT_FILE=/etc/ssl/certs/ca-bundle.crt - - KARTOGRAPH_EXTRACTION_SKILLS_DIR=/app/skills depends_on: postgres: condition: service_healthy diff --git a/deploy/apps/kartograph/base/api-deployment.yaml b/deploy/apps/kartograph/base/api-deployment.yaml index 3c9f2f193..1de0bc5ee 100644 --- a/deploy/apps/kartograph/base/api-deployment.yaml +++ b/deploy/apps/kartograph/base/api-deployment.yaml @@ -155,15 +155,11 @@ spec: secretKeyRef: name: kartograph-sso-client-swagger-docs key: client_id - - name: KARTOGRAPH_EXTRACTION_SKILLS_DIR - value: /app/skills volumeMounts: - name: spicedb-ca mountPath: /etc/spicedb-ca readOnly: true - - name: extraction-skills - mountPath: /app/skills livenessProbe: httpGet: path: /health @@ -194,5 +190,3 @@ spec: items: - key: service-ca.crt path: service-ca.crt - - name: extraction-skills - emptyDir: {} diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py index 1bcd11c79..880409afa 100644 --- a/src/agent-runtime/kartograph_agent_runtime/settings.py +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -20,7 +20,6 @@ class AgentRuntimeSettings(BaseSettings): tenant_id: str = Field(default="", alias="KARTOGRAPH_TENANT_ID") knowledge_graph_id: str = Field(default="", alias="KARTOGRAPH_KNOWLEDGE_GRAPH_ID") session_id: str = Field(default="", alias="KARTOGRAPH_SESSION_ID") - skills_dir: str = Field(default="/app/skills", alias="KARTOGRAPH_SKILLS_DIR") workspace_dir: str = Field(default="/workspace", alias="KARTOGRAPH_WORKSPACE_DIR") anthropic_api_key: str = Field(default="", alias="ANTHROPIC_API_KEY") vertex_project_id: str = Field(default="", alias="ANTHROPIC_VERTEX_PROJECT_ID") diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index bf405fd5a..526f3ec14 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -56,7 +56,6 @@ def __init__( session_ttl: timedelta = timedelta(minutes=30), container_network: str | None = None, sticky_service_port: int = 8787, - container_skills_mount: str = "/app/skills", container_work_mount: str = "/workspace", vertex_project_id: str = "", vertex_region: str = "us-east5", @@ -74,7 +73,6 @@ def __init__( self._session_ttl = session_ttl self._container_network = container_network self._sticky_service_port = sticky_service_port - self._container_skills_mount = container_skills_mount self._container_work_mount = container_work_mount self._vertex_project_id = vertex_project_id self._vertex_region = vertex_region @@ -286,7 +284,6 @@ def _start_runtime( "KARTOGRAPH_KNOWLEDGE_GRAPH_ID": knowledge_graph_id, "KARTOGRAPH_USER_ID": user_id, "KARTOGRAPH_SESSION_MODE": mode, - "KARTOGRAPH_SKILLS_DIR": self._container_skills_mount, "KARTOGRAPH_WORKSPACE_DIR": self._container_work_mount, "KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS": str(int(self._agent_turn_timeout_seconds)), "KARTOGRAPH_AGENT_MAX_TURNS": str(int(self._agent_max_turns)), @@ -309,7 +306,6 @@ def _start_runtime( "KARTOGRAPH_API_BASE_URL": bootstrap.api_base_url, } ) - binds.append(f"{bootstrap.host_skills_dir}:{self._container_skills_mount}:ro") binds.extend( build_sticky_session_workspace_binds( host_session_work_dir=bootstrap.host_session_work_dir, diff --git a/src/api/extraction/infrastructure/runtime_context_builder.py b/src/api/extraction/infrastructure/runtime_context_builder.py index 9c349f12b..651b91690 100644 --- a/src/api/extraction/infrastructure/runtime_context_builder.py +++ b/src/api/extraction/infrastructure/runtime_context_builder.py @@ -12,11 +12,10 @@ class FilesystemExtractionRuntimeContextBuilder: - """Prepare runtime directories from JobPackage + skills mount path.""" + """Prepare runtime directories from JobPackage archives.""" - def __init__(self, *, work_dir: Path, skills_dir: Path) -> None: + def __init__(self, *, work_dir: Path) -> None: self._work_dir = work_dir - self._skills_dir = skills_dir def build(self, *, sync_run_id: str, job_package_id: str) -> ExtractionRuntimeContext: package_id = JobPackageId(value=job_package_id) @@ -28,7 +27,6 @@ def build(self, *, sync_run_id: str, job_package_id: str) -> ExtractionRuntimeCo repository_files_dir = run_root / "repository-files" ingestion_context_dir.mkdir(parents=True, exist_ok=True) repository_files_dir.mkdir(parents=True, exist_ok=True) - self._skills_dir.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(archive_path) as archive: for entry_name in archive.namelist(): @@ -47,6 +45,5 @@ def build(self, *, sync_run_id: str, job_package_id: str) -> ExtractionRuntimeCo return ExtractionRuntimeContext( ingestion_context_dir=str(ingestion_context_dir), repository_files_dir=str(repository_files_dir), - skills_dir=str(self._skills_dir), job_package_archive=str(archive_path), ) diff --git a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py index 4193d1abf..50b8896a6 100644 --- a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py +++ b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py @@ -73,6 +73,5 @@ async def build( tenant_id=tenant_id, credentials=credentials, host_session_work_dir=str(host_session_work_dir), - host_skills_dir=self._runtime_settings.skills_dir, api_base_url=self._runtime_settings.api_base_url, ) \ No newline at end of file diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py index e74d8eae5..b1eb7d1bb 100644 --- a/src/api/extraction/infrastructure/workload_runtime_factory.py +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -82,7 +82,6 @@ def create_sticky_session_runtime_manager( session_ttl=timedelta(minutes=resolved.session_ttl_minutes), container_network=resolved.container_network, sticky_service_port=resolved.sticky_service_port, - container_skills_mount=resolved.container_skills_mount, container_work_mount=resolved.container_work_mount, vertex_project_id=resolved.vertex_project_id, vertex_region=resolved.vertex_region, diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index fdf9844d0..af8d33bea 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -42,11 +42,9 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): ) worker_command: tuple[str, ...] = Field(default=("sleep", "3600")) sticky_service_port: int = Field(default=8787, ge=1024, le=65535) - container_skills_mount: str = Field(default="/app/skills") container_work_mount: str = Field(default="/workspace") session_ttl_minutes: int = Field(default=30, ge=1, le=24 * 60) job_package_work_dir: str = Field(default="/tmp/kartograph/job_packages") - skills_dir: str = Field(default="/app/skills") api_base_url: str = Field(default="http://api:8000") workload_token_signing_key: str = Field( default="", diff --git a/src/api/extraction/ports/runtime.py b/src/api/extraction/ports/runtime.py index 5a46b12e8..14ee058a0 100644 --- a/src/api/extraction/ports/runtime.py +++ b/src/api/extraction/ports/runtime.py @@ -29,7 +29,6 @@ class StickySessionRuntimeBootstrap: tenant_id: str credentials: ScopedWorkloadCredentials host_session_work_dir: str - host_skills_dir: str api_base_url: str diff --git a/src/api/extraction/ports/services.py b/src/api/extraction/ports/services.py index 851dfd3bc..fb5e0d1fb 100644 --- a/src/api/extraction/ports/services.py +++ b/src/api/extraction/ports/services.py @@ -15,7 +15,6 @@ class ExtractionRuntimeContext: ingestion_context_dir: str repository_files_dir: str - skills_dir: str job_package_archive: str @@ -46,8 +45,8 @@ async def run( data_source_id: Identifier for the data source being extracted knowledge_graph_id: Identifier for the target knowledge graph job_package_id: Identifier for the JobPackage to process - runtime_context: Resolved runtime context paths for ingestion resources, - reconstructed repository files, and skills availability. + runtime_context: Resolved runtime context paths for ingestion resources and + reconstructed repository files. workload_credentials: Short-lived runtime credentials injected into the worker Returns: diff --git a/src/api/main.py b/src/api/main.py index f34d056a9..cf91c92be 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -57,9 +57,6 @@ # Default work directory for JobPackage ZIP archives _JOB_PACKAGE_WORK_DIR = Path("/tmp/kartograph/job_packages") # noqa: S108 -_EXTRACTION_SKILLS_DIR = Path( - os.getenv("KARTOGRAPH_EXTRACTION_SKILLS_DIR", "/app/skills") -) # Scheduler polling interval (seconds) _SCHEDULER_POLL_INTERVAL_SECONDS = 60 @@ -387,7 +384,6 @@ async def handle(self, event_type: str, payload: dict[str, Any]) -> None: kg_repo = KnowledgeGraphRepository(session=session, outbox=outbox) runtime_context_builder = FilesystemExtractionRuntimeContextBuilder( work_dir=_JOB_PACKAGE_WORK_DIR, - skills_dir=_EXTRACTION_SKILLS_DIR, ) extraction_handler = ExtractionEventHandler( extraction_service=self._extraction_service, diff --git a/src/api/tests/integration/extraction/test_workload_credential_injection.py b/src/api/tests/integration/extraction/test_workload_credential_injection.py index da85476eb..ad869f34c 100644 --- a/src/api/tests/integration/extraction/test_workload_credential_injection.py +++ b/src/api/tests/integration/extraction/test_workload_credential_injection.py @@ -75,7 +75,6 @@ def build(self, *, sync_run_id: str, job_package_id: str) -> ExtractionRuntimeCo return ExtractionRuntimeContext( ingestion_context_dir="/tmp/ingestion-context", repository_files_dir="/tmp/repository-files", - skills_dir="/app/skills", job_package_archive="/tmp/job-package.zip", ) diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py index 3b779f5e3..fed138208 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_event_handler.py @@ -100,7 +100,6 @@ def build(self, *, sync_run_id: str, job_package_id: str) -> ExtractionRuntimeCo return ExtractionRuntimeContext( ingestion_context_dir="/tmp/ingestion-context", repository_files_dir="/tmp/repository-files", - skills_dir="/app/skills", job_package_archive="/tmp/job-package.zip", ) @@ -179,7 +178,6 @@ async def test_runs_extraction_on_job_package_produced( assert call["job_package_id"] == "pkg-001" assert call["data_source_id"] == "ds-001" assert call["knowledge_graph_id"] == "kg-001" - assert call["runtime_context"].skills_dir == "/app/skills" async def test_emits_mutation_log_produced_on_success( self, diff --git a/src/api/tests/unit/extraction/infrastructure/test_runtime_context_builder.py b/src/api/tests/unit/extraction/infrastructure/test_runtime_context_builder.py index 9e5bf93a5..d33684e4d 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_runtime_context_builder.py +++ b/src/api/tests/unit/extraction/infrastructure/test_runtime_context_builder.py @@ -58,12 +58,8 @@ async def test_build_materializes_ingestion_context_and_repository_files(tmp_pat work_dir = tmp_path / "work" work_dir.mkdir(parents=True, exist_ok=True) package_id = _build_job_package(work_dir) - skills_dir = tmp_path / "skills" - builder = FilesystemExtractionRuntimeContextBuilder( - work_dir=work_dir, - skills_dir=skills_dir, - ) + builder = FilesystemExtractionRuntimeContextBuilder(work_dir=work_dir) runtime = builder.build(sync_run_id="run-123", job_package_id=package_id) assert Path(runtime.ingestion_context_dir).exists() @@ -71,4 +67,3 @@ async def test_build_materializes_ingestion_context_and_repository_files(tmp_pat assert Path(runtime.repository_files_dir, "src/main.py").read_text() == ( "print('hello runtime context')\n" ) - assert Path(runtime.skills_dir).exists() diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py index e9c9c6148..0471fe57b 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py @@ -36,7 +36,6 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: tenant_id="tenant-1", credentials=credentials, host_session_work_dir="/tmp/session-work", - host_skills_dir="/tmp/skills", api_base_url="http://api:8000", ) @@ -52,7 +51,6 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: assert spec.command == () assert spec.network == "kartograph_kartograph" assert spec.env["KARTOGRAPH_WORKLOAD_TOKEN"] == credentials.token - assert "/tmp/skills:/app/skills:ro" in spec.binds assert "/tmp/session-work:/workspace" in spec.binds assert "/tmp/session-work/repository-files:/workspace/repository-files:ro" in spec.binds assert "/host/.config/gcloud:/gcloud/config:ro" in spec.binds From 39d402872734c28b8fe81c1b5522965326cac210 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 11 Jun 2026 19:57:03 -0400 Subject: [PATCH 118/153] feat(extraction): let GMA save extraction job sets via workload tools Expose extraction-jobs read/write on the workload API and wire MCP tools so the assistant persists approved job set configs instead of directing operators to fill the UI manually. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 34 ++- .../extraction_jobs_tools.py | 129 +++++++++ .../kartograph_agent_runtime/schema_tools.py | 39 ++- .../kartograph_agent_runtime/tools.py | 28 ++ src/agent-runtime/tests/test_schema_tools.py | 8 + .../application/skill_resolution_service.py | 22 +- .../ports/workload_extraction_jobs.py | 46 +++ .../presentation/workload_routes.py | 108 +++++++ .../extraction_workload/dependencies.py | 14 + .../extraction_jobs_service.py | 273 ++++++++++++++++++ .../presentation/test_workload_routes.py | 121 ++++++++ 11 files changed, 797 insertions(+), 25 deletions(-) create mode 100644 src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py create mode 100644 src/api/extraction/ports/workload_extraction_jobs.py create mode 100644 src/api/infrastructure/extraction_workload/extraction_jobs_service.py diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index b25b71699..900a8b44a 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -6,6 +6,7 @@ PromptDetail = Literal["full", "compact"] +from kartograph_agent_runtime.extraction_jobs_tools import KARTOGRAPH_EXTRACTION_JOBS_TOOL_NAMES from kartograph_agent_runtime.schema_tools import ( KARTOGRAPH_SCHEMA_TOOL_NAMES, WORKSPACE_FILE_TOOL_NAMES, @@ -57,6 +58,20 @@ Writes persist to the platform database for the active knowledge graph. """.strip() +_EXTRACTION_JOBS_TOOLS_REFERENCE = """ +## Extraction job tools (extraction-jobs UI mode) + +| Tool | Purpose | +|------|---------| +| `kartograph_get_extraction_jobs_config` | Read saved job sets and live entity instance counts | +| `kartograph_save_extraction_jobs_config` | Save job sets and regenerate pending jobs (operator-approved configs) | +| `kartograph_get_extraction_jobs_plan_summary` | Projected job counts per job set before/after save | +| `kartograph_get_extraction_jobs_status` | Queue metrics: pending/in-progress/completed/failed jobs | + +When the operator approves a job set proposal, call `kartograph_save_extraction_jobs_config` — +do not ask them to manually fill the extraction-jobs form. +""".strip() + _TOOLS_COMPACT_REFERENCE = ( "Tools: kartograph_* schema MCP tools, plus Read/Write/Edit/Grep/Glob/Bash. " "Prepopulation: {label}.py → out/{label}_instances.json → entities_to_jsonl.py or " @@ -186,10 +201,25 @@ def build_agent_system_prompt( if prompt_detail == "compact": tools_block = f"## Tools\n\n{_TOOLS_COMPACT_REFERENCE}" else: - kartograph_tools = ", ".join(f"`{name}`" for name in KARTOGRAPH_SCHEMA_TOOL_NAMES) + kartograph_tools = ", ".join( + f"`{name}`" + for name in ( + *KARTOGRAPH_SCHEMA_TOOL_NAMES, + *( + KARTOGRAPH_EXTRACTION_JOBS_TOOL_NAMES + if ui_mode == "extraction-jobs" + else () + ), + ) + ) file_tools = ", ".join(f"`{name}`" for name in WORKSPACE_FILE_TOOL_NAMES) + extraction_jobs_block = ( + f"\n\n{_EXTRACTION_JOBS_TOOLS_REFERENCE}" + if ui_mode == "extraction-jobs" + else "" + ) tools_block = ( - f"{_TOOLS_QUICK_REFERENCE}\n\n" + f"{_TOOLS_QUICK_REFERENCE}{extraction_jobs_block}\n\n" f"Registered Kartograph tools: {kartograph_tools}.\n" f"Registered workspace tools: {file_tools}." ) diff --git a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py new file mode 100644 index 000000000..47525473c --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py @@ -0,0 +1,129 @@ +"""MCP tool handlers for extraction job set configuration.""" + +from __future__ import annotations + +from typing import Any + +from claude_agent_sdk import tool + +from kartograph_agent_runtime.tools import RuntimeTooling + +KARTOGRAPH_EXTRACTION_JOBS_TOOL_NAMES = ( + "kartograph_get_extraction_jobs_config", + "kartograph_save_extraction_jobs_config", + "kartograph_get_extraction_jobs_plan_summary", + "kartograph_get_extraction_jobs_status", +) + + +def append_extraction_jobs_tools(*, tooling: RuntimeTooling, tools: list[Any]) -> None: + """Register extraction job configuration tools on the Kartograph MCP server.""" + + @tool( + "kartograph_get_extraction_jobs_config", + ( + "Read saved extraction job sets for this knowledge graph, including live " + "entity type instance counts. Call before proposing or saving changes." + ), + {}, + ) + async def get_extraction_jobs_config(_args: dict[str, Any]) -> dict[str, Any]: + try: + return RuntimeTooling.format_tool_result( + await tooling.get_extraction_jobs_config(), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [ + {"type": "text", "text": f"Failed to read extraction jobs config: {exc}"} + ], + "is_error": True, + } + + @tool( + "kartograph_save_extraction_jobs_config", + ( + "Save extraction job sets for this knowledge graph and regenerate pending jobs. " + "Pass the full job_sets array (read existing config first and merge edits). " + "Each job set needs: name, strategy (by_instances or by_files), description, " + "entity_type + instances_per_job for by_instances, or file_patterns + files_per_job " + "for by_files." + ), + { + "version": str, + "job_sets": list, + }, + ) + async def save_extraction_jobs_config(args: dict[str, Any]) -> dict[str, Any]: + job_sets = args.get("job_sets") + if not isinstance(job_sets, list): + return { + "content": [{"type": "text", "text": "job_sets must be a list of job set objects."}], + "is_error": True, + } + payload: dict[str, Any] = { + "version": str(args.get("version") or "1.0"), + "job_sets": job_sets, + } + try: + return RuntimeTooling.format_tool_result( + await tooling.save_extraction_jobs_config(payload=payload), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [ + {"type": "text", "text": f"Failed to save extraction jobs config: {exc}"} + ], + "is_error": True, + } + + @tool( + "kartograph_get_extraction_jobs_plan_summary", + ( + "Return projected pending job counts per configured job set based on live " + "graph instances and file catalog matches." + ), + {}, + ) + async def get_extraction_jobs_plan_summary(_args: dict[str, Any]) -> dict[str, Any]: + try: + return RuntimeTooling.format_tool_result( + await tooling.get_extraction_jobs_plan_summary(), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [ + {"type": "text", "text": f"Failed to load extraction jobs plan summary: {exc}"} + ], + "is_error": True, + } + + @tool( + "kartograph_get_extraction_jobs_status", + ( + "Return materialized extraction job queue metrics: counts by status, recent jobs, " + "and active workers." + ), + {}, + ) + async def get_extraction_jobs_status(_args: dict[str, Any]) -> dict[str, Any]: + try: + return RuntimeTooling.format_tool_result( + await tooling.get_extraction_jobs_status(), + ) + except Exception as exc: # noqa: BLE001 + return { + "content": [ + {"type": "text", "text": f"Failed to load extraction jobs status: {exc}"} + ], + "is_error": True, + } + + tools.extend( + [ + get_extraction_jobs_config, + save_extraction_jobs_config, + get_extraction_jobs_plan_summary, + get_extraction_jobs_status, + ] + ) diff --git a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py index c00fbc846..713c20f39 100644 --- a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py @@ -6,6 +6,10 @@ from claude_agent_sdk import create_sdk_mcp_server, tool +from kartograph_agent_runtime.extraction_jobs_tools import ( + KARTOGRAPH_EXTRACTION_JOBS_TOOL_NAMES, + append_extraction_jobs_tools, +) from kartograph_agent_runtime.tools import RuntimeTooling WORKSPACE_FILE_TOOL_NAMES = ("Read", "Write", "Edit", "Grep", "Glob", "Bash") @@ -25,7 +29,9 @@ "kartograph_check_graph_slugs", ) -GMA_ALLOWED_TOOL_NAMES = KARTOGRAPH_SCHEMA_TOOL_NAMES + WORKSPACE_FILE_TOOL_NAMES +GMA_ALLOWED_TOOL_NAMES = ( + KARTOGRAPH_SCHEMA_TOOL_NAMES + KARTOGRAPH_EXTRACTION_JOBS_TOOL_NAMES + WORKSPACE_FILE_TOOL_NAMES +) def build_kartograph_schema_mcp_server(tooling: RuntimeTooling): @@ -327,21 +333,24 @@ async def search_graph_by_slug(args: dict[str, Any]) -> dict[str, Any]: "is_error": True, } + mcp_tools: list[Any] = [ + get_schema_authoring_guide, + get_workspace_readiness, + get_schema_ontology, + save_schema_ontology, + validate_graph_mutations, + apply_graph_mutations, + validate_graph_mutations_from_file, + apply_graph_mutations_from_file, + list_instances_by_type, + list_relationship_instances, + search_graph_by_slug, + check_graph_slugs, + ] + append_extraction_jobs_tools(tooling=tooling, tools=mcp_tools) + return create_sdk_mcp_server( name="kartograph", version="1.0.0", - tools=[ - get_schema_authoring_guide, - get_workspace_readiness, - get_schema_ontology, - save_schema_ontology, - validate_graph_mutations, - apply_graph_mutations, - validate_graph_mutations_from_file, - apply_graph_mutations_from_file, - list_instances_by_type, - list_relationship_instances, - search_graph_by_slug, - check_graph_slugs, - ], + tools=mcp_tools, ) diff --git a/src/agent-runtime/kartograph_agent_runtime/tools.py b/src/agent-runtime/kartograph_agent_runtime/tools.py index 478743dee..7e10bd386 100644 --- a/src/agent-runtime/kartograph_agent_runtime/tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/tools.py @@ -157,6 +157,34 @@ async def search_graph_by_slug( response.raise_for_status() return response.json() + async def get_extraction_jobs_config(self) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/extraction-jobs" + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=self._headers()) + response.raise_for_status() + return response.json() + + async def save_extraction_jobs_config(self, *, payload: dict[str, Any]) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/extraction-jobs" + async with httpx.AsyncClient(timeout=120.0) as client: + response = await client.put(url, headers=self._headers(), json=payload) + response.raise_for_status() + return response.json() + + async def get_extraction_jobs_plan_summary(self) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/extraction-jobs/plan-summary" + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=self._headers()) + response.raise_for_status() + return response.json() + + async def get_extraction_jobs_status(self) -> dict[str, Any]: + url = f"{self._base_url()}/extraction/workloads/extraction-jobs/status" + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=self._headers()) + response.raise_for_status() + return response.json() + async def propose_mutation( self, *, operation: str, summary: str, payload: dict[str, Any] | None = None ) -> dict[str, Any]: diff --git a/src/agent-runtime/tests/test_schema_tools.py b/src/agent-runtime/tests/test_schema_tools.py index 594539006..65279c504 100644 --- a/src/agent-runtime/tests/test_schema_tools.py +++ b/src/agent-runtime/tests/test_schema_tools.py @@ -23,6 +23,14 @@ def test_schema_tool_names_cover_authoring_surface() -> None: assert "kartograph_list_relationship_instances" in KARTOGRAPH_SCHEMA_TOOL_NAMES +def test_gma_allowed_tools_include_extraction_jobs_tools() -> None: + from kartograph_agent_runtime.extraction_jobs_tools import KARTOGRAPH_EXTRACTION_JOBS_TOOL_NAMES + from kartograph_agent_runtime.schema_tools import GMA_ALLOWED_TOOL_NAMES + + for tool_name in KARTOGRAPH_EXTRACTION_JOBS_TOOL_NAMES: + assert tool_name in GMA_ALLOWED_TOOL_NAMES + + def test_gma_allowed_tools_include_workspace_file_tools() -> None: from kartograph_agent_runtime.schema_tools import GMA_ALLOWED_TOOL_NAMES, WORKSPACE_FILE_TOOL_NAMES diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 70cdeb6f3..4ad03a695 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -145,13 +145,18 @@ class ResolvedExtractionSkillPack: "job_setup": ( "Prioritize extraction job set authoring: by_instances batches with required " "per-instance extraction descriptions (no separate extraction_plan.md). " - "Each description tells the extraction worker what to enrich for assigned entity slugs." + "Each description tells the extraction worker what to enrich for assigned entity slugs. " + "When the operator approves a proposed configuration, persist it yourself with " + "kartograph_save_extraction_jobs_config — never instruct them to manually enter " + "values in the extraction-jobs UI." ), "job_set_contract": ( - "Job sets are saved via extraction-jobs API on the knowledge graph. Each set needs: " - "name, strategy (by_instances primary), entity_type, instances_per_job, and description. " - "Saving regenerates pending jobs from live graph instances. Job sets run sequentially; " - "jobs within a set run concurrently up to worker count." + "Use kartograph_get_extraction_jobs_config before editing and " + "kartograph_save_extraction_jobs_config to persist job sets for this knowledge graph. " + "Each set needs: name, strategy (by_instances primary), entity_type, instances_per_job, " + "and description. Saving regenerates pending jobs from live graph instances. " + "Use kartograph_get_extraction_jobs_plan_summary to cite projected job counts. " + "Job sets run sequentially; jobs within a set run concurrently up to worker count." ), "minor_edits": ( "Allow focused direct graph edits while preserving mutation-log " @@ -176,9 +181,10 @@ class ResolvedExtractionSkillPack: GraphManagementUiMode.EXTRACTION_JOBS: { "ui_mode_framing": ( "Focus on extraction job set setup: define by_instances batches with per-instance " - "extraction descriptions, save to regenerate pending jobs, then guide the operator " - "to Run extraction. Use ontology schema panels for context. JobPackage readiness " - "still applies when file-backed context is required." + "extraction descriptions, persist via kartograph_save_extraction_jobs_config when " + "the operator approves, then guide them to Run extraction. Use " + "kartograph_list_instances_by_type to size batches. JobPackage readiness still " + "applies when file-backed context is required." ), }, GraphManagementUiMode.ONE_OFF_MUTATIONS: { diff --git a/src/api/extraction/ports/workload_extraction_jobs.py b/src/api/extraction/ports/workload_extraction_jobs.py new file mode 100644 index 000000000..bdecdba35 --- /dev/null +++ b/src/api/extraction/ports/workload_extraction_jobs.py @@ -0,0 +1,46 @@ +"""Port for extraction job configuration via workload runtime tokens.""" + +from __future__ import annotations + +from typing import Any, Protocol + + +class IWorkloadExtractionJobsService(Protocol): + """Read and write extraction job sets scoped to one knowledge graph.""" + + async def get_document( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + ) -> dict[str, Any]: + """Return saved job sets plus live entity type instance counts.""" + ... + + async def save_document( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + payload: dict[str, Any], + ) -> dict[str, Any]: + """Validate, persist job sets, and regenerate pending materialized jobs.""" + ... + + async def get_plan_summary( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + ) -> dict[str, Any]: + """Return projected job counts per configured job set.""" + ... + + async def get_database_status( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + ) -> dict[str, Any]: + """Return materialized job queue and run metrics.""" + ... diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index 6d957aa11..a769bc8aa 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -7,6 +7,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query, status from pydantic import BaseModel, Field +from extraction.ports.workload_extraction_jobs import IWorkloadExtractionJobsService from extraction.ports.workload_graph import IWorkloadGraphReader from extraction.ports.workload_schema import IWorkloadSchemaService from extraction.presentation.workload_auth import ( @@ -14,6 +15,7 @@ get_workload_auth_context, ) from infrastructure.extraction_workload.dependencies import ( + get_workload_extraction_jobs_service, get_workload_graph_reader, get_workload_schema_service, ) @@ -434,3 +436,109 @@ async def workload_get_workspace_readiness( graph_reader=reader, ) return WorkloadReadinessResponse(**snapshot) + + +class WorkloadExtractionJobsDocumentRequest(BaseModel): + """Extraction job set configuration matching the management extraction-jobs API.""" + + version: str = "1.0" + job_sets: list[dict[str, Any]] = Field(default_factory=list) + + +class WorkloadExtractionJobsDocumentResponse(BaseModel): + """Saved extraction job sets plus entity type counts.""" + + version: str + job_sets: list[dict[str, Any]] + entity_types: list[dict[str, Any]] = Field(default_factory=list) + generated_jobs: int | None = None + + +@router.get( + "/extraction-jobs", + response_model=WorkloadExtractionJobsDocumentResponse, +) +async def workload_get_extraction_jobs( + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + service: Annotated[ + IWorkloadExtractionJobsService, Depends(get_workload_extraction_jobs_service) + ] = ..., +) -> WorkloadExtractionJobsDocumentResponse: + _require_chat_scope(auth) + try: + payload = await service.get_document( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + ) + except ValueError as exc: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(exc), + ) from exc + return WorkloadExtractionJobsDocumentResponse.model_validate(payload) + + +@router.put( + "/extraction-jobs", + response_model=WorkloadExtractionJobsDocumentResponse, +) +async def workload_save_extraction_jobs( + request: WorkloadExtractionJobsDocumentRequest, + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + service: Annotated[ + IWorkloadExtractionJobsService, Depends(get_workload_extraction_jobs_service) + ] = ..., +) -> WorkloadExtractionJobsDocumentResponse: + _require_chat_scope(auth) + try: + payload = await service.save_document( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + payload=request.model_dump(), + ) + except ValueError as exc: + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail=str(exc), + ) from exc + return WorkloadExtractionJobsDocumentResponse.model_validate(payload) + + +@router.get("/extraction-jobs/plan-summary") +async def workload_get_extraction_jobs_plan_summary( + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + service: Annotated[ + IWorkloadExtractionJobsService, Depends(get_workload_extraction_jobs_service) + ] = ..., +) -> dict[str, Any]: + _require_chat_scope(auth) + try: + return await service.get_plan_summary( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + ) + except ValueError as exc: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(exc), + ) from exc + + +@router.get("/extraction-jobs/status") +async def workload_get_extraction_jobs_status( + auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., + service: Annotated[ + IWorkloadExtractionJobsService, Depends(get_workload_extraction_jobs_service) + ] = ..., +) -> dict[str, Any]: + _require_chat_scope(auth) + try: + return await service.get_database_status( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + ) + except ValueError as exc: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(exc), + ) from exc diff --git a/src/api/infrastructure/extraction_workload/dependencies.py b/src/api/infrastructure/extraction_workload/dependencies.py index a3638c9ee..be5c9cbd1 100644 --- a/src/api/infrastructure/extraction_workload/dependencies.py +++ b/src/api/infrastructure/extraction_workload/dependencies.py @@ -9,10 +9,14 @@ from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer from extraction.infrastructure.workload_runtime_factory import get_workload_credential_issuer +from extraction.ports.workload_extraction_jobs import IWorkloadExtractionJobsService from extraction.ports.workload_graph import IWorkloadGraphReader from extraction.ports.workload_schema import IWorkloadSchemaService from infrastructure.database.connection_pool import ConnectionPool from infrastructure.dependencies import get_age_connection_pool +from infrastructure.extraction_workload.extraction_jobs_service import ( + GraphWorkloadExtractionJobsService, +) from infrastructure.extraction_workload.graph_mutation_writer import ( GraphWorkloadGraphMutationWriter, ) @@ -52,3 +56,13 @@ def get_workload_schema_service( ), graph_reader=GraphWorkloadGraphReader(pool=pool, settings=get_database_settings()), ) + + +def get_workload_extraction_jobs_service( + session: Annotated[AsyncSession, Depends(get_write_session)], + pool: Annotated[ConnectionPool, Depends(get_age_connection_pool)], +) -> IWorkloadExtractionJobsService: + return GraphWorkloadExtractionJobsService( + session=session, + connection_pool=pool, + ) diff --git a/src/api/infrastructure/extraction_workload/extraction_jobs_service.py b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py new file mode 100644 index 000000000..ecee7bda1 --- /dev/null +++ b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py @@ -0,0 +1,273 @@ +"""Workload-scoped extraction job configuration for Graph Management Assistant tools.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from starlette.concurrency import run_in_threadpool +from sqlalchemy.ext.asyncio import AsyncSession + +from infrastructure.management.extraction_job_materializer import ( + build_repository_file_catalog, + entity_instance_counts_from_graph, + match_file_patterns, + materialize_jobs_from_config, + projected_job_count, +) +from extraction.domain.extraction_job import ExtractionJobStatus +from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader +from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository +from extraction.infrastructure.workload_runtime_settings import get_extraction_workload_runtime_settings +from graph.infrastructure.bulk_data_reader import fetch_bulk_graph_data +from infrastructure.database.connection_pool import ConnectionPool +from infrastructure.outbox.repository import OutboxRepository +from management.domain.extraction_job_config import ( + ExtractionJobConfigDocument, + ExtractionJobSetDefinition, + ExtractionJobSetStrategy, +) +from management.domain.value_objects import KnowledgeGraphId +from management.infrastructure.repositories.knowledge_graph_repository import ( + KnowledgeGraphRepository, +) + + +class GraphWorkloadExtractionJobsService: + """Persist extraction job sets using workload JWT tenant/KG scope (no end-user session).""" + + def __init__( + self, + *, + session: AsyncSession, + connection_pool: ConnectionPool, + ) -> None: + self._session = session + self._connection_pool = connection_pool + outbox = OutboxRepository(session=session) + self._knowledge_graph_repository = KnowledgeGraphRepository(session=session, outbox=outbox) + self._extraction_job_repository = ExtractionJobRepository(session=session) + + async def _assert_knowledge_graph_in_tenant( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + ) -> None: + kg = await self._knowledge_graph_repository.get_by_id( + KnowledgeGraphId(value=knowledge_graph_id) + ) + if kg is None or kg.tenant_id != tenant_id: + raise ValueError(f"Knowledge graph '{knowledge_graph_id}' not found") + + async def _load_graph_data(self, *, tenant_id: str) -> dict[str, Any]: + graph_name = f"tenant_{tenant_id}" + return await run_in_threadpool( + fetch_bulk_graph_data, + self._connection_pool, + graph_name, + ) + + async def get_document( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + ) -> dict[str, Any]: + await self._assert_knowledge_graph_in_tenant( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + ) + config = await self._knowledge_graph_repository.get_extraction_job_config( + knowledge_graph_id + ) + document = config or ExtractionJobConfigDocument.empty() + graph_data = await self._load_graph_data(tenant_id=tenant_id) + counts = entity_instance_counts_from_graph( + knowledge_graph_id=knowledge_graph_id, + graph_data=graph_data, + ) + entity_types = [ + {"name": name, "instance_count": count} + for name, count in sorted(counts.items(), key=lambda item: item[0]) + ] + return { + **document.to_dict(), + "entity_types": entity_types, + } + + async def save_document( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + payload: dict[str, Any], + ) -> dict[str, Any]: + await self._assert_knowledge_graph_in_tenant( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + ) + if await self._extraction_job_repository.has_in_progress_jobs( + knowledge_graph_id=knowledge_graph_id + ): + raise ValueError("Cannot save job sets while extraction jobs are in progress.") + + document = ExtractionJobConfigDocument( + version=str(payload.get("version") or "1.0"), + job_sets=tuple( + ExtractionJobSetDefinition.from_dict(row) + for row in (payload.get("job_sets") or []) + ), + ) + graph_data = await self._load_graph_data(tenant_id=tenant_id) + counts = entity_instance_counts_from_graph( + knowledge_graph_id=knowledge_graph_id, + graph_data=graph_data, + ) + errors = document.validation_errors(entity_instance_counts=counts) + if errors: + raise ValueError("; ".join(errors)) + + await self._knowledge_graph_repository.save_extraction_job_config( + knowledge_graph_id, + document, + ) + + runtime_settings = get_extraction_workload_runtime_settings() + prepared_reader = SqlPreparedJobPackageReader( + session=self._session, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ) + job_packages = await prepared_reader.list_latest_for_knowledge_graph( + knowledge_graph_id=knowledge_graph_id, + ) + jobs = materialize_jobs_from_config( + knowledge_graph_id=knowledge_graph_id, + config=document, + graph_data=graph_data, + job_packages=job_packages, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ) + generated = await self._extraction_job_repository.replace_pending_jobs( + knowledge_graph_id=knowledge_graph_id, + jobs=jobs, + ) + await self._session.commit() + + saved = await self.get_document( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + ) + saved["generated_jobs"] = generated + return saved + + async def get_plan_summary( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + ) -> dict[str, Any]: + document_payload = await self.get_document( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + ) + counts = { + row["name"]: row["instance_count"] for row in document_payload.get("entity_types", []) + } + runtime_settings = get_extraction_workload_runtime_settings() + prepared_reader = SqlPreparedJobPackageReader( + session=self._session, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ) + job_packages = await prepared_reader.list_latest_for_knowledge_graph( + knowledge_graph_id=knowledge_graph_id, + ) + file_catalog = build_repository_file_catalog( + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + job_packages=job_packages, + ) + job_sets = [] + for raw in document_payload.get("job_sets", []): + job_set = ExtractionJobSetDefinition.from_dict(raw) + matched_file_count = None + if job_set.strategy == ExtractionJobSetStrategy.BY_FILES: + matched_file_count = len(match_file_patterns(file_catalog, job_set.file_patterns)) + job_sets.append( + { + **raw, + "projected_jobs": projected_job_count( + job_set, + entity_instance_counts=counts, + matched_file_count=matched_file_count, + ), + } + ) + return {"job_sets": job_sets, "entity_types": document_payload.get("entity_types", [])} + + async def get_database_status( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + ) -> dict[str, Any]: + await self._assert_knowledge_graph_in_tenant( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + ) + counts = await self._extraction_job_repository.count_by_status( + knowledge_graph_id=knowledge_graph_id + ) + jobs_by_set = await self._extraction_job_repository.count_by_job_set( + knowledge_graph_id=knowledge_graph_id + ) + recent_jobs = await self._extraction_job_repository.list_recent_jobs( + knowledge_graph_id=knowledge_graph_id, + limit=20, + ) + active_workers = await self._extraction_job_repository.list_active_workers( + knowledge_graph_id=knowledge_graph_id + ) + token_metrics = await self._extraction_job_repository.aggregate_token_metrics( + knowledge_graph_id=knowledge_graph_id + ) + avg_completed = await self._extraction_job_repository.avg_completed_job_seconds( + knowledge_graph_id=knowledge_graph_id + ) + graph_data = await self._load_graph_data(tenant_id=tenant_id) + entity_counts = entity_instance_counts_from_graph( + knowledge_graph_id=knowledge_graph_id, + graph_data=graph_data, + ) + return { + "exists": True, + "jobsByStatus": { + "pending": counts.get("pending", 0), + "in_progress": counts.get("in_progress", 0), + "completed": counts.get("completed", 0), + "failed": counts.get("failed", 0), + }, + "jobsBySet": jobs_by_set, + "recentJobs": [ + { + "jobId": job.job_id, + "jobSet": job.job_set_name, + "status": job.status.value, + "workerId": job.worker_id, + "startedAt": job.started_at.isoformat() if job.started_at else None, + "completedAt": job.completed_at.isoformat() if job.completed_at else None, + "inputTokens": job.input_tokens, + "outputTokens": job.output_tokens, + "writeOps": job.entities_created + + job.entities_modified + + job.relationships_created, + "assistantPreview": job.description[:120] if job.description else None, + } + for job in recent_jobs + ], + "activeWorkers": active_workers, + "avgCompletedJobSeconds": avg_completed, + "entitiesByType": entity_counts, + "entitiesTotal": sum(entity_counts.values()), + **token_metrics, + "hasInProgressJobs": counts.get(ExtractionJobStatus.IN_PROGRESS.value, 0) > 0, + } diff --git a/src/api/tests/unit/extraction/presentation/test_workload_routes.py b/src/api/tests/unit/extraction/presentation/test_workload_routes.py index 7b2c5aa69..6cd4d2276 100644 --- a/src/api/tests/unit/extraction/presentation/test_workload_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_workload_routes.py @@ -11,6 +11,7 @@ from extraction.presentation.workload_auth import WorkloadAuthContext, get_workload_auth_context from extraction.ports.workload_graph import WorkloadGraphNode, WorkloadGraphRelationship from infrastructure.extraction_workload.dependencies import ( + get_workload_extraction_jobs_service, get_workload_graph_reader, get_workload_schema_service, ) @@ -117,9 +118,54 @@ async def partition_slugs_by_existence(self, **kwargs): return existing, missing +class _FakeExtractionJobsService: + def __init__(self) -> None: + self.saved_payload: dict[str, object] | None = None + + async def get_document(self, *, tenant_id: str, knowledge_graph_id: str) -> dict[str, object]: + if self.saved_payload is None: + return {"version": "1.0", "job_sets": [], "entity_types": [{"name": "Adapter", "instance_count": 19}]} + return dict(self.saved_payload) + + async def save_document( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + payload: dict[str, object], + ) -> dict[str, object]: + self.saved_payload = { + **payload, + "entity_types": [{"name": "Adapter", "instance_count": 19}], + "generated_jobs": 7, + } + return dict(self.saved_payload) + + async def get_plan_summary(self, *, tenant_id: str, knowledge_graph_id: str) -> dict[str, object]: + job_sets = list((self.saved_payload or {}).get("job_sets") or []) + return { + "job_sets": [{**row, "projected_jobs": 7} for row in job_sets], + "entity_types": [{"name": "Adapter", "instance_count": 19}], + } + + async def get_database_status(self, *, tenant_id: str, knowledge_graph_id: str) -> dict[str, object]: + return { + "exists": True, + "jobsByStatus": {"pending": 7, "in_progress": 0, "completed": 0, "failed": 0}, + "jobsBySet": {}, + "recentJobs": [], + "activeWorkers": [], + "avgCompletedJobSeconds": None, + "entitiesByType": {"Adapter": 19}, + "entitiesTotal": 19, + "hasInProgressJobs": False, + } + + @pytest.fixture def workload_client() -> tuple[TestClient, _FakeSchemaService, str]: fake = _FakeSchemaService() + extraction_jobs_fake = _FakeExtractionJobsService() fake.saved = OntologyConfig( node_types=( NodeTypeDefinition(label="service", prepopulated=True, prepopulated_instance_count=0), @@ -147,6 +193,7 @@ def workload_client() -> tuple[TestClient, _FakeSchemaService, str]: app = FastAPI() app.include_router(workload_routes.router, prefix="/extraction") app.dependency_overrides[get_workload_schema_service] = lambda: fake + app.dependency_overrides[get_workload_extraction_jobs_service] = lambda: extraction_jobs_fake app.dependency_overrides[get_workload_graph_reader] = lambda: _FakeGraphReader() app.dependency_overrides[get_workload_auth_context] = lambda: WorkloadAuthContext( credentials=credentials, @@ -285,3 +332,77 @@ def test_workload_apply_graph_mutations(workload_client: tuple[TestClient, _Fake assert response.status_code == 200 assert response.json()["applied"] is True assert fake.applied_jsonl is not None + + +def test_workload_get_extraction_jobs_config(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, _fake, token = workload_client + response = client.get( + "/extraction/workloads/extraction-jobs", + headers={"X-Workload-Token": token}, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["version"] == "1.0" + assert payload["entity_types"][0]["name"] == "Adapter" + assert payload["entity_types"][0]["instance_count"] == 19 + + +def test_workload_save_extraction_jobs_config(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, _fake, token = workload_client + job_set = { + "name": "Adapter Deep Extraction", + "strategy": "by_instances", + "entity_type": "Adapter", + "instances_per_job": 3, + "description": "Enrich each Adapter with implementation and config details.", + } + response = client.put( + "/extraction/workloads/extraction-jobs", + headers={"X-Workload-Token": token}, + json={"version": "1.0", "job_sets": [job_set]}, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["generated_jobs"] == 7 + assert payload["job_sets"][0]["name"] == "Adapter Deep Extraction" + + +def test_workload_get_extraction_jobs_plan_summary( + workload_client: tuple[TestClient, _FakeSchemaService, str], +) -> None: + client, _fake, token = workload_client + client.put( + "/extraction/workloads/extraction-jobs", + headers={"X-Workload-Token": token}, + json={ + "version": "1.0", + "job_sets": [ + { + "name": "Adapter Deep Extraction", + "strategy": "by_instances", + "entity_type": "Adapter", + "instances_per_job": 3, + "description": "Enrich adapters.", + } + ], + }, + ) + response = client.get( + "/extraction/workloads/extraction-jobs/plan-summary", + headers={"X-Workload-Token": token}, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["job_sets"][0]["projected_jobs"] == 7 + + +def test_workload_get_extraction_jobs_status(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: + client, _fake, token = workload_client + response = client.get( + "/extraction/workloads/extraction-jobs/status", + headers={"X-Workload-Token": token}, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["jobsByStatus"]["pending"] == 7 + assert payload["entitiesByType"]["Adapter"] == 19 From 40ea110863a2ba40aba691c837c1fce7d0d52839 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 11 Jun 2026 22:21:29 -0400 Subject: [PATCH 119/153] feat(extraction): require exhaustive per-instance job set descriptions Default by_instances descriptions to full property and relationship coverage with explicit per-field notes, and align GMA skills, save tool guidance, and worker prompts on the same contract. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 17 ++++++++++ .../extraction_jobs_tools.py | 4 ++- .../application/skill_resolution_service.py | 34 ++++++++++++++++--- .../infrastructure/extraction_job_prompt.py | 5 +++ .../test_skill_resolution_service.py | 2 ++ .../test_extraction_job_prompt.py | 2 ++ 6 files changed, 59 insertions(+), 5 deletions(-) diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 900a8b44a..af8c971ea 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -70,6 +70,23 @@ When the operator approves a job set proposal, call `kartograph_save_extraction_jobs_config` — do not ask them to manually fill the extraction-jobs form. + +### Per-instance description (by_instances job sets) + +The job set `description` is the shared brief for every assigned instance in that set. +**Default: capture everything** — every schema property filled or updated, and every +applicable relationship instance created or updated for each assigned slug. + +Before writing a description: +1. `kartograph_get_schema_ontology` — list properties on the target entity type and every + relationship type where that type is source or target. +2. Structure the description explicitly: + - **Properties:** one note per property; add extra notes for special handling (enums, + derived values, secrets, multi-file sources). + - **Relationships:** one note per relationship type; require every relationship instance + involving the assigned entity to be considered, resolved, and created or updated. +3. Avoid vague theme-only headings without mapping to concrete properties and relationship types. + Narrow scope only when the operator explicitly asks. """.strip() _TOOLS_COMPACT_REFERENCE = ( diff --git a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py index 47525473c..3175df6fd 100644 --- a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py @@ -47,7 +47,9 @@ async def get_extraction_jobs_config(_args: dict[str, Any]) -> dict[str, Any]: "Pass the full job_sets array (read existing config first and merge edits). " "Each job set needs: name, strategy (by_instances or by_files), description, " "entity_type + instances_per_job for by_instances, or file_patterns + files_per_job " - "for by_files." + "for by_files. For by_instances, the description must follow exhaustive per-instance " + "coverage: one note per property, extra notes for special properties, and one note " + "per relationship type requiring every related instance to be created or updated." ), { "version": str, diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 4ad03a695..9edfd9c79 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -146,10 +146,35 @@ class ResolvedExtractionSkillPack: "Prioritize extraction job set authoring: by_instances batches with required " "per-instance extraction descriptions (no separate extraction_plan.md). " "Each description tells the extraction worker what to enrich for assigned entity slugs. " - "When the operator approves a proposed configuration, persist it yourself with " + "Follow per_instance_description_authoring — default to exhaustive property and " + "relationship coverage unless the operator narrows scope. When the operator approves " + "a proposed configuration, persist it yourself with " "kartograph_save_extraction_jobs_config — never instruct them to manually enter " "values in the extraction-jobs UI." ), + "per_instance_description_authoring": ( + "The job set `description` is the shared per-instance brief for every by_instances job " + "in that set. Default stance: capture everything related to each assigned instance — " + "populate or update every schema property and every applicable relationship instance; " + "partial coverage is incomplete unless the operator explicitly narrows scope. " + "Before drafting: call kartograph_get_schema_ontology (and " + "kartograph_get_schema_authoring_guide if needed) for the target entity_type — list " + "all properties on that type and every edge_types row where it is a source or target " + "label. Structure the description explicitly, in order: " + "(1) Properties — one numbered note per property: what to extract, where to find it " + "in repository-files/, and the expected value shape; add a separate note for any " + "property needing special handling (enums, derived values, secrets/redaction, " + "multi-file joins, defaults when absent). " + "(2) Relationships — one numbered note per relationship type: require discovering " + "and creating or updating every relationship instance that includes the assigned " + "entity (as source or target); explain how to resolve counterpart entity slugs and " + "when to create missing endpoints. For bidirectional types, cover both directions. " + "Do not substitute vague theme headings (e.g. 'Implementation Analysis') for this " + "property- and relationship-level checklist — themes may group notes but every " + "property and relationship type from the ontology must appear. When the operator " + "requests focused extraction, state explicitly which properties or relationship types " + "are out of scope." + ), "job_set_contract": ( "Use kartograph_get_extraction_jobs_config before editing and " "kartograph_save_extraction_jobs_config to persist job sets for this knowledge graph. " @@ -180,9 +205,10 @@ class ResolvedExtractionSkillPack: }, GraphManagementUiMode.EXTRACTION_JOBS: { "ui_mode_framing": ( - "Focus on extraction job set setup: define by_instances batches with per-instance " - "extraction descriptions, persist via kartograph_save_extraction_jobs_config when " - "the operator approves, then guide them to Run extraction. Use " + "Focus on extraction job set setup: by_instances batches whose description follows " + "per_instance_description_authoring (exhaustive property + relationship coverage by " + "default). Persist via kartograph_save_extraction_jobs_config when the operator " + "approves, then guide them to Run extraction. Use kartograph_get_schema_ontology and " "kartograph_list_instances_by_type to size batches. JobPackage readiness still " "applies when file-backed context is required." ), diff --git a/src/api/extraction/infrastructure/extraction_job_prompt.py b/src/api/extraction/infrastructure/extraction_job_prompt.py index bca8ae2ec..84129c951 100644 --- a/src/api/extraction/infrastructure/extraction_job_prompt.py +++ b/src/api/extraction/infrastructure/extraction_job_prompt.py @@ -18,6 +18,11 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: if job.target_instances: lines.extend( [ + "## Coverage default", + "For each assigned instance: populate or update every schema property and every", + "applicable relationship instance (create missing edges; update existing ones).", + "Treat partial coverage as incomplete unless the job instructions below narrow scope.", + "", "## Target entity instances", "Process only the instances listed below. Use the workload API to read existing graph", "context and emit JSONL mutations for new or updated entities and relationships.", diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 958d771e8..6b1f9700f 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -63,6 +63,8 @@ async def test_extraction_mode_uses_extraction_defaults(self): ) assert "job_setup" in resolved.skills + assert "per_instance_description_authoring" in resolved.skills + assert "every schema property" in resolved.skills["per_instance_description_authoring"] assert "minor_edits" in resolved.skills assert "schema_edits_secondary" in resolved.skills assert "extraction" in resolved.system_prompt.lower() diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py index 3eb669d0e..c8db7e461 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py @@ -34,6 +34,8 @@ def test_build_extraction_job_prompt_includes_instances_and_files() -> None: prompt = build_extraction_job_prompt(job=job) assert "Extract acceptance criteria." in prompt + assert "## Coverage default" in prompt + assert "every schema property" in prompt assert "Feature: feature-a" in prompt assert "repo-a/features/a.feature" in prompt assert "job-context.json" in prompt From bccf1b73424b975bcfa5845add2333cbd5839fe9 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 11 Jun 2026 23:04:30 -0400 Subject: [PATCH 120/153] feat(extraction): template per-instance job descriptions by property and edge Require EntityType -> rel -> CounterpartType lines and forbid theme-only sections so GMA writes ontology-grounded extraction briefs. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 29 ++++++------- .../extraction_jobs_tools.py | 6 +-- .../application/skill_resolution_service.py | 41 +++++++++---------- .../test_skill_resolution_service.py | 3 +- 4 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index af8c971ea..6b501f1c8 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -73,20 +73,21 @@ ### Per-instance description (by_instances job sets) -The job set `description` is the shared brief for every assigned instance in that set. -**Default: capture everything** — every schema property filled or updated, and every -applicable relationship instance created or updated for each assigned slug. - -Before writing a description: -1. `kartograph_get_schema_ontology` — list properties on the target entity type and every - relationship type where that type is source or target. -2. Structure the description explicitly: - - **Properties:** one note per property; add extra notes for special handling (enums, - derived values, secrets, multi-file sources). - - **Relationships:** one note per relationship type; require every relationship instance - involving the assigned entity to be considered, resolved, and created or updated. -3. Avoid vague theme-only headings without mapping to concrete properties and relationship types. - Narrow scope only when the operator explicitly asks. +Use this template (substitute real entity and relationship names from `kartograph_get_schema_ontology`): + +``` +For each of the instances of {EntityType} you've been assigned, capture everything into the knowledge graph: all properties of that instance and every relationship instance an instance of {EntityType} can have. + +Properties: +- {property_name}: {how to extract, where in repository-files/, value shape} +- ... + +{EntityType} -> {relationship_label} -> {CounterpartType}: {when to create/update; how to resolve counterpart slug} +{EntityType} -> {other_rel} -> {OtherType}: ... +``` + +Do **not** use theme-only sections (Implementation Analysis, Configuration Details, etc.). +When the operator approves, save via `kartograph_save_extraction_jobs_config`. """.strip() _TOOLS_COMPACT_REFERENCE = ( diff --git a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py index 3175df6fd..0c8af0c46 100644 --- a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py @@ -47,9 +47,9 @@ async def get_extraction_jobs_config(_args: dict[str, Any]) -> dict[str, Any]: "Pass the full job_sets array (read existing config first and merge edits). " "Each job set needs: name, strategy (by_instances or by_files), description, " "entity_type + instances_per_job for by_instances, or file_patterns + files_per_job " - "for by_files. For by_instances, the description must follow exhaustive per-instance " - "coverage: one note per property, extra notes for special properties, and one note " - "per relationship type requiring every related instance to be created or updated." + "for by_files. For by_instances, description must match per_instance_description_authoring: " + "opening capture-everything paragraph, Properties section listing each property, then " + "one '{EntityType} -> {rel} -> {CounterpartType}:' line per ontology relationship." ), { "version": str, diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 9edfd9c79..c20c8a70e 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -153,27 +153,26 @@ class ResolvedExtractionSkillPack: "values in the extraction-jobs UI." ), "per_instance_description_authoring": ( - "The job set `description` is the shared per-instance brief for every by_instances job " - "in that set. Default stance: capture everything related to each assigned instance — " - "populate or update every schema property and every applicable relationship instance; " - "partial coverage is incomplete unless the operator explicitly narrows scope. " - "Before drafting: call kartograph_get_schema_ontology (and " - "kartograph_get_schema_authoring_guide if needed) for the target entity_type — list " - "all properties on that type and every edge_types row where it is a source or target " - "label. Structure the description explicitly, in order: " - "(1) Properties — one numbered note per property: what to extract, where to find it " - "in repository-files/, and the expected value shape; add a separate note for any " - "property needing special handling (enums, derived values, secrets/redaction, " - "multi-file joins, defaults when absent). " - "(2) Relationships — one numbered note per relationship type: require discovering " - "and creating or updating every relationship instance that includes the assigned " - "entity (as source or target); explain how to resolve counterpart entity slugs and " - "when to create missing endpoints. For bidirectional types, cover both directions. " - "Do not substitute vague theme headings (e.g. 'Implementation Analysis') for this " - "property- and relationship-level checklist — themes may group notes but every " - "property and relationship type from the ontology must appear. When the operator " - "requests focused extraction, state explicitly which properties or relationship types " - "are out of scope." + "The job set `description` is the shared per-instance brief for every by_instances job. " + "Before drafting, call kartograph_get_schema_ontology for the target entity_type and " + "enumerate every property on that type plus every edge_types row where it appears as " + "source or target. Write the description using this exact shape (replace EntityX/Y and " + "relationship labels with real ontology names — never use placeholder EntityX in saved " + "text): " + "Opening (one paragraph): 'For each of the instances of {EntityType} you've been " + "assigned, capture everything into the knowledge graph: all properties of that " + "instance and every relationship instance an instance of {EntityType} can have.' " + "Then 'Properties:' — list each property on {EntityType} by name; under it give " + "extraction instructions (source files, value shape, defaults). Call out special " + "handling inline per property when needed (enums, secrets, derived values). " + "Then one line per relationship type from the ontology, formatted exactly: " + "'{EntityType} -> {relationship_label} -> {CounterpartType}: ' followed by when to " + "create or update that edge, how to resolve the counterpart slug, and whether to " + "create missing endpoints. Include both directions when the type is bidirectional. " + "FORBIDDEN: thematic sections only (e.g. 'Implementation Analysis', 'Configuration " + "Details', 'Operational Characteristics') that do not name every property and every " + "'Entity -> rel -> Entity' line from the ontology. Narrow scope only when the operator " + "explicitly excludes named properties or relationship types." ), "job_set_contract": ( "Use kartograph_get_extraction_jobs_config before editing and " diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 6b1f9700f..488d7b9aa 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -64,7 +64,8 @@ async def test_extraction_mode_uses_extraction_defaults(self): assert "job_setup" in resolved.skills assert "per_instance_description_authoring" in resolved.skills - assert "every schema property" in resolved.skills["per_instance_description_authoring"] + assert "EntityType} ->" in resolved.skills["per_instance_description_authoring"] + assert "Implementation Analysis" in resolved.skills["per_instance_description_authoring"] assert "minor_edits" in resolved.skills assert "schema_edits_secondary" in resolved.skills assert "extraction" in resolved.system_prompt.lower() From 05612f2d3b7eb49a33883a9fe6ba725f108c0bf7 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 11 Jun 2026 23:04:51 -0400 Subject: [PATCH 121/153] fix(ui): keep job status visible during extraction polling refresh Background polls no longer toggle the loading state that hid Job Status every 1.5s; stale data stays on screen with a subtle header spinner. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../GraphExtractionJobsWorkspace.vue | 54 ++++++++++++++----- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue index 6ce4494c4..72144dbf1 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue @@ -78,6 +78,7 @@ const selectedOntologyTab = ref<OntologyTab>('entities') const jobSetsReloadNonce = ref(0) const dbStatus = ref<DbStatus | null>(null) const dbLoading = ref(true) +const dbRefreshing = ref(false) const dbError = ref<string | null>(null) const extractionRunState = ref<ExtractionRunState | null>(null) const planSummary = ref<PlanSummary | null>(null) @@ -98,28 +99,54 @@ let clockInterval: ReturnType<typeof setInterval> | null = null const basePath = computed(() => `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`) -async function loadDatabaseStatus() { - dbLoading.value = true - dbError.value = null +async function loadDatabaseStatus(options?: { background?: boolean }) { + const background = options?.background ?? false + const hasExistingData = dbStatus.value !== null + + if (background && hasExistingData) { + dbRefreshing.value = true + } else { + dbLoading.value = true + } + if (!background) { + dbError.value = null + } try { dbStatus.value = await apiFetch<DbStatus>(`${basePath.value}/database-status`) + dbError.value = null } catch (e: unknown) { - dbError.value = e instanceof Error ? e.message : 'Failed to load status' + if (!background || !hasExistingData) { + dbError.value = e instanceof Error ? e.message : 'Failed to load status' + } } finally { dbLoading.value = false + dbRefreshing.value = false } } async function loadExtractionRunState() { - extractionRunState.value = await apiFetch<ExtractionRunState>(`${basePath.value}/run-state`) + try { + extractionRunState.value = await apiFetch<ExtractionRunState>(`${basePath.value}/run-state`) + } catch { + // Keep prior run state during background refresh failures. + } } async function loadPlanSummary() { - planSummary.value = await apiFetch<PlanSummary>(`${basePath.value}/plan-summary`) + try { + planSummary.value = await apiFetch<PlanSummary>(`${basePath.value}/plan-summary`) + } catch { + // Keep prior plan summary during background refresh failures. + } } -async function refreshAll() { - await Promise.all([loadDatabaseStatus(), loadExtractionRunState(), loadPlanSummary()]) +async function refreshAll(options?: { background?: boolean }) { + const background = options?.background ?? dbStatus.value !== null + await Promise.all([ + loadDatabaseStatus({ background }), + loadExtractionRunState(), + loadPlanSummary(), + ]) } const workerCount = computed(() => Math.max(1, Math.floor(Number(workers.value) || 1))) @@ -243,7 +270,7 @@ async function resetByKind(kind: 'stale' | 'completed' | 'failed' | 'all') { function startAutoRefresh() { if (autoRefreshInterval) return - autoRefreshInterval = setInterval(() => { void refreshAll() }, 1500) + autoRefreshInterval = setInterval(() => { void refreshAll({ background: true }) }, 1500) } function stopAutoRefresh() { @@ -254,7 +281,7 @@ function stopAutoRefresh() { function onJobSetsSaved() { jobSetsReloadNonce.value += 1 - void refreshAll() + void refreshAll({ background: dbStatus.value !== null }) } watch( @@ -267,7 +294,7 @@ watch( watch( () => props.reloadNonce, - () => { void refreshAll() }, + () => { void refreshAll({ background: dbStatus.value !== null }) }, ) onMounted(() => { @@ -422,15 +449,16 @@ onUnmounted(() => { <CardTitle class="flex items-center gap-2 text-base"> <ClipboardList class="size-4" /> Job Status + <Loader2 v-if="dbRefreshing" class="size-3.5 animate-spin text-muted-foreground" /> </CardTitle> <CardDescription>Aggregate job metrics and maintenance actions.</CardDescription> </CardHeader> <CardContent class="space-y-4"> - <div v-if="dbLoading" class="flex items-center gap-2 text-sm text-muted-foreground"> + <div v-if="dbLoading && !dbStatus" class="flex items-center gap-2 text-sm text-muted-foreground"> <Loader2 class="size-4 animate-spin" /> Loading job status... </div> - <div v-else-if="dbError" class="text-sm text-destructive">{{ dbError }}</div> + <div v-else-if="dbError && !dbStatus" class="text-sm text-destructive">{{ dbError }}</div> <template v-else-if="dbStatus"> <div class="grid gap-3 sm:grid-cols-2 lg:grid-cols-5"> <div class="rounded-lg border p-3 text-center"> From 1a991643873e77deac31fefa2cc5b21615a1779d Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 12 Jun 2026 00:57:54 -0400 Subject: [PATCH 122/153] feat(extraction): job set controls, live activity, and agentic-ci fixes Enable/disable job sets with partial pending-job sync while runs are active, individual job cancel with container teardown, clearer regenerate UX, and relationship ownership rules for per-instance descriptions. Fix agentic-ci prompt delivery, host-reachable API URLs, GCP mounts, and activity watch parsing. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../extraction_jobs_tools.py | 4 +- .../application/skill_resolution_service.py | 34 ++- .../agentic_ci_extraction_job_runner.py | 153 +++++++++- .../container_workload_runtime.py | 19 +- .../infrastructure/extraction_job_activity.py | 267 +++++++++++++++++ .../extraction_job_container.py | 21 ++ .../infrastructure/extraction_job_prompt.py | 35 +++ .../extraction_run_orchestrator.py | 41 +++ .../repositories/extraction_job_repository.py | 156 ++++++++++ .../infrastructure/vertex_runtime_env.py | 23 ++ .../workload_runtime_settings.py | 7 + .../extraction_workload/dependencies.py | 4 +- .../extraction_jobs_service.py | 48 +-- .../management/extraction_job_materializer.py | 4 + .../management/extraction_jobs_service.py | 202 +++++++++++-- .../domain/extraction_job_config.py | 14 + .../extraction_relationship_authoring.py | 75 +++++ .../extraction_jobs_routes.py | 70 ++++- .../test_skill_resolution_service.py | 1 + .../test_agentic_ci_extraction_job_runner.py | 98 +++++++ .../test_extraction_job_activity.py | 78 +++++ .../test_extraction_job_materializer.py | 34 +++ .../domain/test_extraction_job_config.py | 10 + .../test_extraction_relationship_authoring.py | 57 ++++ .../GraphExtractionJobSetsPanel.vue | 36 ++- .../GraphExtractionJobWatchDialog.vue | 255 ++++++++++++++++ .../GraphExtractionJobsWorkspace.vue | 277 +++++++++++++++++- 27 files changed, 1913 insertions(+), 110 deletions(-) create mode 100644 src/api/extraction/infrastructure/extraction_job_activity.py create mode 100644 src/api/extraction/infrastructure/extraction_job_container.py create mode 100644 src/api/management/domain/extraction_relationship_authoring.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_activity.py create mode 100644 src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py create mode 100644 src/dev-ui/app/components/graph-management/GraphExtractionJobWatchDialog.vue diff --git a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py index 0c8af0c46..77036692b 100644 --- a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py @@ -49,7 +49,9 @@ async def get_extraction_jobs_config(_args: dict[str, Any]) -> dict[str, Any]: "entity_type + instances_per_job for by_instances, or file_patterns + files_per_job " "for by_files. For by_instances, description must match per_instance_description_authoring: " "opening capture-everything paragraph, Properties section listing each property, then " - "one '{EntityType} -> {rel} -> {CounterpartType}:' line per ontology relationship." + "one '{EntityType} -> {rel} -> {CounterpartType}:' line per relationship this entity " + "type owns (include only when this entity type has more live instances than the " + "counterpart type)." ), { "version": str, diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index c20c8a70e..a59b4d44e 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -155,30 +155,39 @@ class ResolvedExtractionSkillPack: "per_instance_description_authoring": ( "The job set `description` is the shared per-instance brief for every by_instances job. " "Before drafting, call kartograph_get_schema_ontology for the target entity_type and " - "enumerate every property on that type plus every edge_types row where it appears as " - "source or target. Write the description using this exact shape (replace EntityX/Y and " - "relationship labels with real ontology names — never use placeholder EntityX in saved " - "text): " + "kartograph_get_extraction_jobs_config (or plan summary entity_types) for live instance " + "counts. Enumerate every property on the target entity type. For relationships, apply " + "the duplicate-work rule by default: when EntityX relates to EntityY, include a " + "'{EntityX} -> {relationship_label} -> {EntityY}:' line only if EntityX has MORE live " + "instances than EntityY. Omit that relationship line from EntityY job sets when " + "EntityY has fewer or equal instances — the higher-volume side owns that edge work. " + "Write the description using this exact shape (replace with real ontology names — " + "never use placeholder EntityX in saved text): " "Opening (one paragraph): 'For each of the instances of {EntityType} you've been " "assigned, capture everything into the knowledge graph: all properties of that " - "instance and every relationship instance an instance of {EntityType} can have.' " + "instance and every applicable relationship instance this job set owns (see lines " + "below).' " "Then 'Properties:' — list each property on {EntityType} by name; under it give " "extraction instructions (source files, value shape, defaults). Call out special " "handling inline per property when needed (enums, secrets, derived values). " - "Then one line per relationship type from the ontology, formatted exactly: " + "Then one line per relationship type this job set owns (after applying the " + "instance-count rule), formatted exactly: " "'{EntityType} -> {relationship_label} -> {CounterpartType}: ' followed by when to " "create or update that edge, how to resolve the counterpart slug, and whether to " - "create missing endpoints. Include both directions when the type is bidirectional. " + "create missing endpoints. Do NOT list relationship types owned by the counterpart " + "entity type's job set under the duplicate-work rule. " "FORBIDDEN: thematic sections only (e.g. 'Implementation Analysis', 'Configuration " "Details', 'Operational Characteristics') that do not name every property and every " - "'Entity -> rel -> Entity' line from the ontology. Narrow scope only when the operator " + "included 'Entity -> rel -> Entity' line. Narrow scope only when the operator " "explicitly excludes named properties or relationship types." ), "job_set_contract": ( "Use kartograph_get_extraction_jobs_config before editing and " "kartograph_save_extraction_jobs_config to persist job sets for this knowledge graph. " - "Each set needs: name, strategy (by_instances primary), entity_type, instances_per_job, " - "and description. Saving regenerates pending jobs from live graph instances. " + "Each set needs: name, enabled (default true), strategy (by_instances primary), " + "entity_type, instances_per_job, and description. Disabled sets are kept in config " + "but excluded from the job queue. Saving syncs pending jobs for enabled sets only and " + "can be done while other jobs are running. " "Use kartograph_get_extraction_jobs_plan_summary to cite projected job counts. " "Job sets run sequentially; jobs within a set run concurrently up to worker count." ), @@ -205,8 +214,9 @@ class ResolvedExtractionSkillPack: GraphManagementUiMode.EXTRACTION_JOBS: { "ui_mode_framing": ( "Focus on extraction job set setup: by_instances batches whose description follows " - "per_instance_description_authoring (exhaustive property + relationship coverage by " - "default). Persist via kartograph_save_extraction_jobs_config when the operator " + "per_instance_description_authoring (all properties; relationship lines only where " + "this entity type has more instances than the counterpart). Persist via " + "kartograph_save_extraction_jobs_config when the operator " "approves, then guide them to Run extraction. Use kartograph_get_schema_ontology and " "kartograph_list_instances_by_type to size batches. JobPackage readiness still " "applies when file-backed context is required." diff --git a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py index 01aac9106..e521cdf8b 100644 --- a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py @@ -2,10 +2,13 @@ from __future__ import annotations +import json import os import re import subprocess import tempfile +import time +from datetime import UTC, datetime from pathlib import Path from typing import Any @@ -13,12 +16,27 @@ from agentic_ci import otel from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.infrastructure.extraction_job_activity import ( + activity_log_path, + append_activity_line, + append_activity_message, + format_activity_log_line, + format_claude_code_stream_line, +) from extraction.infrastructure.extraction_job_metrics import metrics_from_otel_log -from extraction.infrastructure.extraction_job_prompt import build_extraction_job_prompt +from extraction.infrastructure.extraction_job_prompt import ( + EXTRACTION_JOB_INVOKE_PROMPT, + build_extraction_job_prompt, + write_extraction_prompt_file, +) from extraction.infrastructure.extraction_job_workdir_materializer import ( ExtractionJobWorkdirMaterializer, ) -from extraction.infrastructure.vertex_runtime_env import build_vertex_container_env +from extraction.infrastructure.vertex_runtime_env import ( + build_gcloud_adc_env, + build_gcloud_config_bind, + build_vertex_container_env, +) from extraction.infrastructure.workload_runtime_factory import get_workload_credential_issuer from extraction.infrastructure.workload_runtime_settings import ( ExtractionWorkloadRuntimeSettings, @@ -29,7 +47,6 @@ from shared_kernel.container_runtime.ports import ContainerRuntimeError _CONTAINER_NAME_SAFE = re.compile(r"[^a-zA-Z0-9_.-]+") -_GCLOUD_ADC_FILENAME = "application_default_credentials.json" def _sanitize_container_name(job_id: str) -> str: @@ -37,6 +54,21 @@ def _sanitize_container_name(job_id: str) -> str: return f"kartograph-extract-{cleaned}"[:63].rstrip("-_.") +def _strip_harness_binary(command: list[str]) -> list[str]: + """Drop the CLI binary when the image entrypoint already execs it.""" + if command and command[0] in {"claude", "opencode"}: + return command[1:] + return command + + +def _patch_job_context_api_base(workdir: Path, api_base_url: str) -> None: + """Rewrite api_base_url so host-network job containers can reach the API.""" + context_path = workdir / "job-context.json" + context = json.loads(context_path.read_text(encoding="utf-8")) + context["api_base_url"] = api_base_url.rstrip("/") + context_path.write_text(json.dumps(context, indent=2) + "\n", encoding="utf-8") + + class AgenticCiExtractionJobRunner(IExtractionJobRunner): """Execute one extraction job using opendatahub-io/agentic-ci harness and containers.""" @@ -63,6 +95,7 @@ async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, An tenant_id=tenant_id, credentials=credentials, ) + _patch_job_context_api_base(workdir, self._settings.agentic_ci_api_base_url) prompt = build_extraction_job_prompt(job=job) return await self._run_in_container(job=job, workdir=workdir, prompt=prompt) @@ -96,7 +129,12 @@ def _run_in_container_sync( otel_log = Path(otel_log_path) env = self._build_container_env(otel_port=otel_port) binds = self._build_binds(workdir=workdir) - command = self._harness.build_args(prompt, model) + write_extraction_prompt_file(workdir=workdir, prompt=prompt) + command = _strip_harness_binary( + self._harness.build_args(EXTRACTION_JOB_INVOKE_PROMPT, model) + ) + log_path = activity_log_path(workdir) + append_activity_line(log_path, f"📡 Processing job {job.job_id}...") rc = self._run_foreground( binary=binary, image=self._settings.agentic_ci_image, @@ -105,7 +143,9 @@ def _run_in_container_sync( binds=binds, command=command, timeout_seconds=self._settings.agentic_ci_timeout_seconds, + activity_log_path=log_path, ) + append_activity_line(log_path, f"✅ Container finished with exit code {rc}") if otel_proc is not None: otel.stop_collector(otel_proc) otel_proc = None @@ -135,9 +175,11 @@ def _resolve_model(self) -> str: return self._harness.default_model() def _build_container_env(self, *, otel_port: int) -> dict[str, str]: + model = self._resolve_model() env: dict[str, str] = { "DISABLE_AUTOUPDATER": "1", - "AGENT_MODEL": self._resolve_model(), + "AGENT_MODEL": model, + self._harness.model_env_var(): model, } if self._harness.auth_mode == "api-key": api_key = os.environ.get("ANTHROPIC_API_KEY", "").strip() @@ -150,6 +192,9 @@ def _build_container_env(self, *, otel_port: int) -> dict[str, str]: region=self._settings.vertex_region, ) ) + if self._settings.gcloud_config_mount: + container_gcloud = self._settings.gcloud_config_container_path.rstrip("/") + env.update(build_gcloud_adc_env(container_config_path=container_gcloud)) if self._harness.supports_otel and otel_port: env.update( { @@ -166,18 +211,12 @@ def _build_container_env(self, *, otel_port: int) -> dict[str, str]: def _build_binds(self, *, workdir: Path) -> list[str]: binds = [f"{workdir}:/workspace:z"] if self._settings.gcloud_config_mount and self._settings.vertex_enabled(): - mount_target = self._harness.credential_mount_target() - gcloud_root = self._settings.gcloud_config_mount.rstrip("/") - adc = f"{gcloud_root}/{_GCLOUD_ADC_FILENAME}" - config = f"{gcloud_root}/configurations/config_default" - if Path(adc).is_file(): - binds.append( - f"{adc}:{mount_target}/.config/gcloud/application_default_credentials.json:ro,z" - ) - if Path(config).is_file(): - binds.append( - f"{config}:{mount_target}/.config/gcloud/configurations/config_default:ro,z" + binds.append( + build_gcloud_config_bind( + host_mount=self._settings.gcloud_config_mount, + container_path=self._settings.gcloud_config_container_path, ) + ) return binds def _run_foreground( @@ -190,6 +229,7 @@ def _run_foreground( binds: list[str], command: list[str], timeout_seconds: int, + activity_log_path: Path | None = None, ) -> int: cmd = [ binary, @@ -215,6 +255,15 @@ def _run_foreground( cmd.extend(["--volume", bind]) cmd.append(image) cmd.extend(command) + if activity_log_path is not None: + return self._run_foreground_streaming( + cmd=cmd, + binary=binary, + name=name, + timeout_seconds=timeout_seconds, + activity_log_path=activity_log_path, + ) + try: result = subprocess.run( cmd, @@ -234,3 +283,75 @@ def _run_foreground( f"{binary} run failed for {name}: {detail or 'unknown error'}" ) return int(result.returncode) + + def _run_foreground_streaming( + self, + *, + cmd: list[str], + binary: str, + name: str, + timeout_seconds: int, + activity_log_path: Path, + ) -> int: + started = time.monotonic() + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + ) + captured_tail: list[str] = [] + try: + assert proc.stdout is not None + with activity_log_path.open("a", encoding="utf-8") as log_handle: + for line in proc.stdout: + if time.monotonic() - started > timeout_seconds: + proc.kill() + append_activity_message( + activity_log_path, + kind="error", + text=f"Container timed out after {timeout_seconds}s", + ) + raise RuntimeError( + f"agentic-ci container timed out after {timeout_seconds}s" + ) + cleaned = line.rstrip("\n") + if not cleaned: + continue + parsed = format_claude_code_stream_line(cleaned) + if parsed: + ts = datetime.now(UTC).isoformat() + for kind, text in parsed: + log_handle.write(f"{ts} {format_activity_log_line(kind=kind, text=text)}\n") + captured_tail.append(text) + else: + ts = datetime.now(UTC).isoformat() + log_handle.write(f"{ts} {format_activity_log_line(kind='info', text=cleaned)}\n") + captured_tail.append(cleaned) + log_handle.flush() + if len(captured_tail) > 20: + captured_tail.pop(0) + rc = proc.wait(timeout=30) + except subprocess.TimeoutExpired as exc: + proc.kill() + subprocess.run([binary, "rm", "-f", name], capture_output=True, check=False) + append_activity_line(activity_log_path, "❌ Container wait timed out") + raise RuntimeError( + f"agentic-ci container timed out after {timeout_seconds}s" + ) from exc + + if rc != 0: + detail = next( + (line for line in reversed(captured_tail) if line.strip()), + f"exit code {rc}", + ) + append_activity_message( + activity_log_path, + kind="error", + text=f"Container failed: {detail}", + ) + raise ContainerRuntimeError( + f"{binary} run failed for {name}: {detail}" + ) + return int(rc) diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index 526f3ec14..173e9f210 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -11,7 +11,10 @@ from extraction.infrastructure.sticky_session_workspace_binds import ( build_sticky_session_workspace_binds, ) -from extraction.infrastructure.vertex_runtime_env import build_vertex_container_env +from extraction.infrastructure.vertex_runtime_env import ( + build_gcloud_adc_env, + build_vertex_container_env, +) from extraction.ports.runtime import ( EphemeralWorkerLaunchRequest, EphemeralWorkerLaunchResult, @@ -32,18 +35,6 @@ def _sanitize_container_name(prefix: str, identifier: str) -> str: return name[:63].rstrip("-_.") or f"{prefix}runtime" -_GCLOUD_ADC_FILENAME = "application_default_credentials.json" - - -def _gcloud_adc_env(*, container_config_path: str) -> dict[str, str]: - base = container_config_path.rstrip("/") - return { - "CLOUDSDK_CONFIG": base, - "GOOGLE_APPLICATION_CREDENTIALS": f"{base}/{_GCLOUD_ADC_FILENAME}", - "HOME": "/tmp", - } - - class ContainerStickySessionRuntimeManager(IStickySessionRuntimeManager): """Sticky runtime manager backed by real container lifecycle operations.""" @@ -323,7 +314,7 @@ def _start_runtime( if self._gcloud_config_mount: container_gcloud = self._gcloud_config_container_path.rstrip("/") binds.append(f"{self._gcloud_config_mount}:{container_gcloud}:ro") - env.update(_gcloud_adc_env(container_config_path=container_gcloud)) + env.update(build_gcloud_adc_env(container_config_path=container_gcloud)) container_user: str | None = None if self._container_run_uid is not None and self._container_run_gid is not None: diff --git a/src/api/extraction/infrastructure/extraction_job_activity.py b/src/api/extraction/infrastructure/extraction_job_activity.py new file mode 100644 index 000000000..f422e39c3 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_activity.py @@ -0,0 +1,267 @@ +"""Read and write per-job agent activity logs for live extraction UI.""" + +from __future__ import annotations + +import json +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus +from extraction.infrastructure.workload_runtime_settings import ( + ExtractionWorkloadRuntimeSettings, + get_extraction_workload_runtime_settings, +) + +AGENT_ACTIVITY_LOG = "agent_activity.log" +_PREVIEW_MAX_LEN = 220 +_ACTIVITY_KIND_EMOJI = { + "info": "📡", + "system": "⚙️", + "thought": "💭", + "tool": "🔧", + "error": "❌", + "success": "✅", +} + + +def job_workdir( + *, + knowledge_graph_id: str, + job_id: str, + settings: ExtractionWorkloadRuntimeSettings | None = None, +) -> Path: + runtime = settings or get_extraction_workload_runtime_settings() + return Path(runtime.extraction_job_work_dir) / knowledge_graph_id / job_id + + +def activity_log_path(workdir: Path) -> Path: + return workdir / AGENT_ACTIVITY_LOG + + +def append_activity_line(log_path: Path, message: str) -> None: + ts = datetime.now(UTC).isoformat() + log_path.parent.mkdir(parents=True, exist_ok=True) + with log_path.open("a", encoding="utf-8") as handle: + handle.write(f"{ts} {message}\n") + + +def format_activity_log_line(*, kind: str, text: str) -> str: + emoji = _ACTIVITY_KIND_EMOJI.get(kind, "•") + return f"{emoji} {text.strip()}" + + +def append_activity_message(log_path: Path, *, kind: str, text: str) -> None: + if not text.strip(): + return + append_activity_line(log_path, format_activity_log_line(kind=kind, text=text)) + + +def format_claude_code_stream_line(raw_line: str) -> list[tuple[str, str]]: + """Parse one claude-code JSONL stdout line into human-readable activity messages.""" + stripped = raw_line.strip() + if not stripped: + return [] + try: + event = json.loads(stripped) + except json.JSONDecodeError: + if stripped.startswith("{"): + return [] + return [("info", stripped)] + + event_type = str(event.get("type") or "") + if event_type == "assistant": + message = event.get("message") or {} + blocks = message.get("content") or [] + rendered: list[tuple[str, str]] = [] + for block in blocks: + if not isinstance(block, dict): + continue + block_type = str(block.get("type") or "") + if block_type == "text": + text = str(block.get("text") or "").strip() + if text: + rendered.append(("thought", text)) + elif block_type == "tool_use": + tool_name = str(block.get("name") or "tool") + rendered.append(("tool", f"Using tool: {tool_name}")) + elif block_type == "thinking": + text = str(block.get("thinking") or block.get("text") or "").strip() + if text: + rendered.append(("thought", text)) + return rendered + + if event_type == "system": + subtype = str(event.get("subtype") or "") + if subtype == "init": + model = str(event.get("model") or "unknown") + return [("system", f"Agent initialized (model: {model})")] + if subtype == "status": + status = str(event.get("status") or "working") + return [("system", f"Status: {status}")] + return [] + + if event_type == "result": + if event.get("is_error"): + error_text = str(event.get("result") or event.get("error") or "Extraction failed") + return [("error", error_text)] + result_text = str(event.get("result") or "").strip() + if result_text: + return [("success", result_text[:500])] + return [("success", "Job completed")] + + return [] + + +def _split_log_line(line: str) -> tuple[str, str, str]: + """Return (timestamp, kind, body) parsed from one stored log line.""" + timestamp = "" + body = line.strip() + if body and body[0].isdigit(): + parts = body.split(" ", 1) + if len(parts) == 2 and "T" in parts[0]: + timestamp, body = parts[0], parts[1] + + for emoji, kind in ( + ("📡", "info"), + ("⚙️", "system"), + ("💭", "thought"), + ("🔧", "tool"), + ("❌", "error"), + ("✅", "success"), + ): + prefix = f"{emoji} " + if body.startswith(prefix): + return timestamp, kind, body[len(prefix) :].strip() + + return timestamp, "info", body + + +def parse_activity_messages(raw_log: str) -> list[dict[str, str]]: + """Expand stored activity log lines into UI-friendly message rows.""" + messages: list[dict[str, str]] = [] + for line in raw_log.splitlines(): + if not line.strip(): + continue + timestamp, kind, body = _split_log_line(line) + if body.startswith("{") and body.endswith("}"): + for parsed_kind, parsed_text in format_claude_code_stream_line(body): + messages.append( + { + "timestamp": timestamp, + "kind": parsed_kind, + "text": parsed_text, + } + ) + continue + messages.append({"timestamp": timestamp, "kind": kind, "text": body}) + return messages + + +def read_activity_log(workdir: Path) -> str: + path = activity_log_path(workdir) + if not path.is_file(): + return "" + return path.read_text(encoding="utf-8") + + +def read_assistant_preview(workdir: Path, *, job_id: str) -> str | None: + """Return the latest thought/tool/error line for one job from its activity log.""" + content = read_activity_log(workdir) + if not content.strip(): + return None + + lines = [line for line in content.splitlines() if line.strip()] + start_idx = -1 + marker = f"Processing job {job_id}" + for index in range(len(lines) - 1, -1, -1): + if marker in lines[index]: + start_idx = index + break + + section_start = start_idx if start_idx >= 0 else 0 + section_end = len(lines) + for index in range(section_start + 1, len(lines)): + if "Processing job " in lines[index] and marker not in lines[index]: + section_end = index + break + + for index in range(section_end - 1, section_start - 1, -1): + messages = parse_activity_messages(lines[index]) + if messages: + for message in reversed(messages): + if message["kind"] in {"thought", "tool", "error", "success", "system"}: + return message["text"][:_PREVIEW_MAX_LEN] + line = lines[index] + for prefix in ("💭 ", "🔧 ", "❌ ", "⚙️ ", "✅ ", "📡 "): + marker_idx = line.find(prefix) + if marker_idx >= 0: + return line[marker_idx + len(prefix) :].strip()[:_PREVIEW_MAX_LEN] + return None + + +def serialize_recent_job( + job: ExtractionJobRecord, + *, + settings: ExtractionWorkloadRuntimeSettings | None = None, +) -> dict[str, Any]: + """Shape one job row for database-status recentJobs and live activity UI.""" + runtime = settings or get_extraction_workload_runtime_settings() + workdir = job_workdir( + knowledge_graph_id=job.knowledge_graph_id, + job_id=job.job_id, + settings=runtime, + ) + preview = read_assistant_preview(workdir, job_id=job.job_id) + if not preview and job.status == ExtractionJobStatus.FAILED and job.error_message: + preview = job.error_message[:_PREVIEW_MAX_LEN] + if not preview and job.description: + preview = job.description[:_PREVIEW_MAX_LEN] + + return { + "jobId": job.job_id, + "jobSet": job.job_set_name, + "status": job.status.value, + "workerId": job.worker_id, + "startedAt": job.started_at.isoformat() if job.started_at else None, + "completedAt": job.completed_at.isoformat() if job.completed_at else None, + "inputTokens": job.input_tokens, + "outputTokens": job.output_tokens, + "cacheReadTokens": job.cache_read_tokens, + "cacheCreationTokens": job.cache_creation_tokens, + "costUsd": job.cost_usd, + "entitiesCreated": job.entities_created, + "entitiesModified": job.entities_modified, + "relationshipsCreated": job.relationships_created, + "writeOps": job.entities_created + job.entities_modified + job.relationships_created, + "instanceCount": len(job.target_instances), + "fileCount": len(job.target_files), + "assistantPreview": preview, + "errorMessage": job.error_message, + } + + +def serialize_job_detail( + job: ExtractionJobRecord, + *, + settings: ExtractionWorkloadRuntimeSettings | None = None, +) -> dict[str, Any]: + """Full job detail for watch dialog and drill-down panels.""" + runtime = settings or get_extraction_workload_runtime_settings() + payload = serialize_recent_job(job, settings=runtime) + workdir = job_workdir( + knowledge_graph_id=job.knowledge_graph_id, + job_id=job.job_id, + settings=runtime, + ) + payload.update( + { + "strategy": job.strategy, + "description": job.description, + "attempt": job.attempt, + "targetInstances": [instance.to_dict() for instance in job.target_instances], + "targetFiles": [target_file.to_dict() for target_file in job.target_files], + "hasActivityLog": activity_log_path(workdir).is_file(), + } + ) + return payload diff --git a/src/api/extraction/infrastructure/extraction_job_container.py b/src/api/extraction/infrastructure/extraction_job_container.py new file mode 100644 index 000000000..2e6d30c6c --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_container.py @@ -0,0 +1,21 @@ +"""Container lifecycle helpers for agentic-ci extraction jobs.""" + +from __future__ import annotations + +import re + +from shared_kernel.container_runtime.factory import create_container_runtime + +_CONTAINER_NAME_SAFE = re.compile(r"[^a-zA-Z0-9_.-]+") + + +def extraction_job_container_name(job_id: str) -> str: + cleaned = _CONTAINER_NAME_SAFE.sub("-", job_id).strip("-") + return f"kartograph-extract-{cleaned}"[:63].rstrip("-_.") + + +def stop_extraction_job_container(*, job_id: str, container_engine: str = "auto") -> bool: + """Stop and remove the extraction container for one job, if it exists.""" + runtime = create_container_runtime(container_engine) + name = extraction_job_container_name(job_id) + return runtime.remove_by_name(name, force=True) diff --git a/src/api/extraction/infrastructure/extraction_job_prompt.py b/src/api/extraction/infrastructure/extraction_job_prompt.py index 84129c951..3ed751920 100644 --- a/src/api/extraction/infrastructure/extraction_job_prompt.py +++ b/src/api/extraction/infrastructure/extraction_job_prompt.py @@ -2,8 +2,26 @@ from __future__ import annotations +from pathlib import Path + from extraction.domain.extraction_job import ExtractionJobRecord +EXTRACTION_PROMPT_FILENAME = "extraction_prompt.md" + +EXTRACTION_JOB_INVOKE_PROMPT = ( + "You are running a Kartograph extraction job in /workspace. " + f"Read {EXTRACTION_PROMPT_FILENAME} and job-context.json, then follow the instructions " + "completely. Use the workload API credentials in job-context.json to apply all required " + "graph mutations before you finish." +) + + +def write_extraction_prompt_file(*, workdir: Path, prompt: str) -> Path: + """Materialize the full job instructions for the agent to read from disk.""" + path = workdir / EXTRACTION_PROMPT_FILENAME + path.write_text(prompt.strip() + "\n", encoding="utf-8") + return path + def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: """Return the agent prompt for one materialized extraction job.""" @@ -48,6 +66,23 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: lines.append("") lines.extend( [ + "## Workload API", + "This container has no Kartograph MCP tools. Call the workload HTTP API with Bash/curl.", + "Read api_base_url and workload_token from job-context.json.", + "Send header `X-Workload-Token: <workload_token>` on every request.", + "", + "Base path: `{api_base_url}/extraction/workloads`", + "", + "Useful endpoints:", + "- GET `/schema/authoring-guide` — JSONL mutation shapes and rules", + "- GET `/schema/ontology` — current graph schema", + "- GET `/graph/search?q=...` — search existing nodes", + "- GET `/graph/instances?entity_type=...` — list instances by type", + "- POST `/mutations/validate` with body `{\"jsonl\": \"...\"}` — dry-run", + "- POST `/mutations/apply` with body `{\"jsonl\": \"...\"}` — apply mutations", + "", + "Write `.jsonl` files in the workspace when batches are large. Validate before apply.", + "", "## Completion", "When finished, ensure all required mutations are applied through the workload API.", "Do not modify files outside repository-files/.", diff --git a/src/api/extraction/infrastructure/extraction_run_orchestrator.py b/src/api/extraction/infrastructure/extraction_run_orchestrator.py index a09a2aa8b..772f6e002 100644 --- a/src/api/extraction/infrastructure/extraction_run_orchestrator.py +++ b/src/api/extraction/infrastructure/extraction_run_orchestrator.py @@ -135,6 +135,17 @@ async def _worker_loop(self, state: _OrchestratorState, *, worker_index: int) -> worker_id=worker_id, ) if job is None: + counts = await repo.count_by_status( + knowledge_graph_id=state.knowledge_graph_id + ) + if counts.get("in_progress", 0) > 0: + await session.commit() + await asyncio.sleep(2) + continue + if counts.get("pending", 0) > 0: + await session.commit() + await asyncio.sleep(2) + continue await session.commit() await self._maybe_finish_run(state) break @@ -194,6 +205,36 @@ def is_live(self, *, knowledge_graph_id: str) -> bool: state = self._active.get(knowledge_graph_id) return state is not None and not state.stop_event.is_set() + async def ensure_workers_for_pending( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + ) -> None: + """Start worker tasks when pending jobs exist but the pool has stopped.""" + if self.is_live(knowledge_graph_id=knowledge_graph_id): + return + + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + counts = await repo.count_by_status(knowledge_graph_id=knowledge_graph_id) + run = await repo.get_run(knowledge_graph_id=knowledge_graph_id) + + pending = counts.get("pending", 0) + if pending <= 0: + return + if run is None or run.status not in { + ExtractionRunStatus.RUNNING, + ExtractionRunStatus.PAUSING, + }: + return + + await self.start( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + worker_count=run.worker_count, + ) + _orchestrator_singleton: ExtractionRunOrchestrator | None = None diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index c821236a9..c841e2d85 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -101,6 +101,146 @@ async def replace_pending_jobs( await self._session.flush() return len(jobs) + async def sync_pending_jobs( + self, + *, + knowledge_graph_id: str, + jobs: list[ExtractionJobRecord], + configured_job_set_names: set[str], + enabled_job_set_names: set[str], + blocked_job_set_names: set[str], + ) -> tuple[int, tuple[str, ...]]: + """Replace pending jobs per enabled job set without touching active work.""" + warnings: list[str] = [] + for job_set_name in sorted(configured_job_set_names): + if job_set_name not in enabled_job_set_names: + await self._delete_pending_for_job_set( + knowledge_graph_id=knowledge_graph_id, + job_set_name=job_set_name, + ) + continue + if job_set_name in blocked_job_set_names: + in_progress = await self.count_in_progress_for_job_set( + knowledge_graph_id=knowledge_graph_id, + job_set_name=job_set_name, + ) + warnings.append( + f"Skipped refreshing pending jobs for '{job_set_name}' because " + f"{in_progress} job(s) are still running." + ) + continue + await self._delete_pending_for_job_set( + knowledge_graph_id=knowledge_graph_id, + job_set_name=job_set_name, + ) + for job in jobs: + if job.job_set_name != job_set_name: + continue + self._session.add( + ExtractionJobModel( + id=job.id, + knowledge_graph_id=job.knowledge_graph_id, + job_id=job.job_id, + job_set_name=job.job_set_name, + strategy=job.strategy, + status=job.status.value, + order_index=job.order_index, + description=job.description, + target_instances=[ + instance.to_dict() for instance in job.target_instances + ], + target_files=[ + target_file.to_dict() for target_file in job.target_files + ], + ) + ) + + stale_names = await self._list_pending_job_set_names(knowledge_graph_id=knowledge_graph_id) + for job_set_name in stale_names: + if job_set_name not in configured_job_set_names: + await self._delete_pending_for_job_set( + knowledge_graph_id=knowledge_graph_id, + job_set_name=job_set_name, + ) + + await self._session.flush() + generated = len( + [ + job + for job in jobs + if job.job_set_name in enabled_job_set_names + and job.job_set_name not in blocked_job_set_names + ] + ) + return generated, tuple(warnings) + + async def _delete_pending_for_job_set( + self, + *, + knowledge_graph_id: str, + job_set_name: str, + ) -> None: + await self._session.execute( + delete(ExtractionJobModel).where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.job_set_name == job_set_name, + ExtractionJobModel.status == ExtractionJobStatus.PENDING.value, + ) + ) + + async def _list_pending_job_set_names(self, *, knowledge_graph_id: str) -> set[str]: + stmt = ( + select(ExtractionJobModel.job_set_name) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.PENDING.value, + ) + .distinct() + ) + result = await self._session.execute(stmt) + return {str(row[0]) for row in result.all()} + + async def count_in_progress_for_job_set( + self, + *, + knowledge_graph_id: str, + job_set_name: str, + ) -> int: + stmt = select(func.count()).where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.job_set_name == job_set_name, + ExtractionJobModel.status == ExtractionJobStatus.IN_PROGRESS.value, + ) + result = await self._session.execute(stmt) + return int(result.scalar_one()) + + async def job_set_names_with_in_progress(self, *, knowledge_graph_id: str) -> set[str]: + stmt = ( + select(ExtractionJobModel.job_set_name) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.IN_PROGRESS.value, + ) + .distinct() + ) + result = await self._session.execute(stmt) + return {str(row[0]) for row in result.all()} + + async def delete_pending_job( + self, + *, + knowledge_graph_id: str, + job_id: str, + ) -> bool: + result = await self._session.execute( + delete(ExtractionJobModel).where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.job_id == job_id, + ExtractionJobModel.status == ExtractionJobStatus.PENDING.value, + ) + ) + return int(result.rowcount or 0) > 0 + async def count_by_status(self, *, knowledge_graph_id: str) -> dict[str, int]: stmt = ( select(ExtractionJobModel.status, func.count()) @@ -148,6 +288,22 @@ async def has_in_progress_jobs(self, *, knowledge_graph_id: str) -> bool: result = await self._session.execute(stmt) return int(result.scalar_one()) > 0 + async def get_by_job_id( + self, + *, + knowledge_graph_id: str, + job_id: str, + ) -> ExtractionJobRecord | None: + stmt = select(ExtractionJobModel).where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.job_id == job_id, + ) + result = await self._session.execute(stmt) + model = result.scalar_one_or_none() + if model is None: + return None + return _job_model_to_record(model) + async def list_recent_jobs( self, *, diff --git a/src/api/extraction/infrastructure/vertex_runtime_env.py b/src/api/extraction/infrastructure/vertex_runtime_env.py index a3738335d..82d0431b7 100644 --- a/src/api/extraction/infrastructure/vertex_runtime_env.py +++ b/src/api/extraction/infrastructure/vertex_runtime_env.py @@ -2,7 +2,12 @@ from __future__ import annotations +import json import os +from typing import Any + +GCLOUD_ADC_FILENAME = "application_default_credentials.json" +DEFAULT_GCLOUD_CONTAINER_PATH = "/gcloud/config" def is_truthy_env(value: str | None) -> bool: @@ -31,6 +36,24 @@ def build_vertex_container_env( return env +def build_gcloud_adc_env(*, container_config_path: str) -> dict[str, str]: + """Env vars so Google client libraries find ADC inside extraction containers.""" + base = container_config_path.rstrip("/") + return { + "CLOUDSDK_CONFIG": base, + "GOOGLE_APPLICATION_CREDENTIALS": f"{base}/{GCLOUD_ADC_FILENAME}", + "HOME": "/tmp", + } + + +def build_gcloud_config_bind( + *, + host_mount: str, + container_path: str = DEFAULT_GCLOUD_CONTAINER_PATH, +) -> str: + return f"{host_mount.rstrip('/')}:{container_path.rstrip('/')}:ro,z" + + def claude_model_configured() -> bool: """Return True when Vertex or direct Anthropic API credentials are configured.""" if vertex_enabled_from_env(): diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index af8d33bea..08a562f9a 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -31,6 +31,13 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): agentic_ci_image: str = Field(default="ghcr.io/opendatahub-io/ai-helpers:latest") agentic_ci_harness: str = Field(default="claude-code") agentic_ci_model: str = Field(default="") + agentic_ci_api_base_url: str = Field( + default="http://127.0.0.1:8000", + description=( + "API base URL reachable from agentic-ci job containers. " + "Jobs use --network host, so docker service names like api:8000 will not resolve." + ), + ) agentic_ci_timeout_seconds: int = Field(default=1200, ge=60, le=7200) extraction_job_work_dir: str = Field(default="/tmp/kartograph/extraction_jobs") sticky_command: tuple[str, ...] = Field( diff --git a/src/api/infrastructure/extraction_workload/dependencies.py b/src/api/infrastructure/extraction_workload/dependencies.py index be5c9cbd1..134343ba2 100644 --- a/src/api/infrastructure/extraction_workload/dependencies.py +++ b/src/api/infrastructure/extraction_workload/dependencies.py @@ -5,7 +5,7 @@ from functools import lru_cache from typing import Annotated -from fastapi import Depends +from fastapi import Depends, Request from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer from extraction.infrastructure.workload_runtime_factory import get_workload_credential_issuer @@ -59,10 +59,12 @@ def get_workload_schema_service( def get_workload_extraction_jobs_service( + request: Request, session: Annotated[AsyncSession, Depends(get_write_session)], pool: Annotated[ConnectionPool, Depends(get_age_connection_pool)], ) -> IWorkloadExtractionJobsService: return GraphWorkloadExtractionJobsService( session=session, connection_pool=pool, + session_factory=request.app.state.write_sessionmaker, ) diff --git a/src/api/infrastructure/extraction_workload/extraction_jobs_service.py b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py index ecee7bda1..d5b00c85a 100644 --- a/src/api/infrastructure/extraction_workload/extraction_jobs_service.py +++ b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py @@ -15,7 +15,10 @@ materialize_jobs_from_config, projected_job_count, ) +from infrastructure.management.extraction_jobs_service import _format_pending_sync_message from extraction.domain.extraction_job import ExtractionJobStatus +from extraction.infrastructure.extraction_job_activity import serialize_recent_job +from extraction.infrastructure.extraction_run_orchestrator import get_extraction_run_orchestrator from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository from extraction.infrastructure.workload_runtime_settings import get_extraction_workload_runtime_settings @@ -41,9 +44,11 @@ def __init__( *, session: AsyncSession, connection_pool: ConnectionPool, + session_factory: Any | None = None, ) -> None: self._session = session self._connection_pool = connection_pool + self._session_factory = session_factory outbox = OutboxRepository(session=session) self._knowledge_graph_repository = KnowledgeGraphRepository(session=session, outbox=outbox) self._extraction_job_repository = ExtractionJobRepository(session=session) @@ -107,10 +112,6 @@ async def save_document( tenant_id=tenant_id, knowledge_graph_id=knowledge_graph_id, ) - if await self._extraction_job_repository.has_in_progress_jobs( - knowledge_graph_id=knowledge_graph_id - ): - raise ValueError("Cannot save job sets while extraction jobs are in progress.") document = ExtractionJobConfigDocument( version=str(payload.get("version") or "1.0"), @@ -148,10 +149,24 @@ async def save_document( job_packages=job_packages, job_package_work_dir=Path(runtime_settings.job_package_work_dir), ) - generated = await self._extraction_job_repository.replace_pending_jobs( + configured_names = {job_set.name for job_set in document.job_sets} + enabled_names = {job_set.name for job_set in document.enabled_job_sets()} + blocked_names = await self._extraction_job_repository.job_set_names_with_in_progress( + knowledge_graph_id=knowledge_graph_id, + ) + generated, warnings = await self._extraction_job_repository.sync_pending_jobs( knowledge_graph_id=knowledge_graph_id, jobs=jobs, + configured_job_set_names=configured_names, + enabled_job_set_names=enabled_names, + blocked_job_set_names=blocked_names, ) + if self._session_factory is not None: + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await orchestrator.ensure_workers_for_pending( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + ) await self._session.commit() saved = await self.get_document( @@ -159,6 +174,13 @@ async def save_document( knowledge_graph_id=knowledge_graph_id, ) saved["generated_jobs"] = generated + saved["warnings"] = list(warnings) + saved["message"] = _format_pending_sync_message( + generated_jobs=generated, + enabled_job_set_count=len(enabled_names), + disabled_job_set_count=len(configured_names - enabled_names), + warnings=warnings, + ) return saved async def get_plan_summary( @@ -238,6 +260,7 @@ async def get_database_status( knowledge_graph_id=knowledge_graph_id, graph_data=graph_data, ) + runtime_settings = get_extraction_workload_runtime_settings() return { "exists": True, "jobsByStatus": { @@ -248,20 +271,7 @@ async def get_database_status( }, "jobsBySet": jobs_by_set, "recentJobs": [ - { - "jobId": job.job_id, - "jobSet": job.job_set_name, - "status": job.status.value, - "workerId": job.worker_id, - "startedAt": job.started_at.isoformat() if job.started_at else None, - "completedAt": job.completed_at.isoformat() if job.completed_at else None, - "inputTokens": job.input_tokens, - "outputTokens": job.output_tokens, - "writeOps": job.entities_created - + job.entities_modified - + job.relationships_created, - "assistantPreview": job.description[:120] if job.description else None, - } + serialize_recent_job(job, settings=runtime_settings) for job in recent_jobs ], "activeWorkers": active_workers, diff --git a/src/api/infrastructure/management/extraction_job_materializer.py b/src/api/infrastructure/management/extraction_job_materializer.py index fd29026b7..596991e7e 100644 --- a/src/api/infrastructure/management/extraction_job_materializer.py +++ b/src/api/infrastructure/management/extraction_job_materializer.py @@ -167,6 +167,8 @@ def materialize_jobs_from_config( order_index = 0 for job_set in config.job_sets: + if not job_set.enabled: + continue if job_set.strategy == ExtractionJobSetStrategy.BY_INSTANCES: entity_type = job_set.entity_type or "" instances = instances_by_type.get(entity_type, []) @@ -229,6 +231,8 @@ def projected_job_count( entity_instance_counts: dict[str, int], matched_file_count: int | None = None, ) -> int | None: + if not job_set.enabled: + return 0 if job_set.strategy == ExtractionJobSetStrategy.BY_INSTANCES: total = entity_instance_counts.get(job_set.entity_type or "", 0) per_job = job_set.instances_per_job diff --git a/src/api/infrastructure/management/extraction_jobs_service.py b/src/api/infrastructure/management/extraction_jobs_service.py index d77d3903e..69342190f 100644 --- a/src/api/infrastructure/management/extraction_jobs_service.py +++ b/src/api/infrastructure/management/extraction_jobs_service.py @@ -15,8 +15,16 @@ materialize_jobs_from_config, projected_job_count, ) +from extraction.infrastructure.extraction_job_container import stop_extraction_job_container from extraction.infrastructure.extraction_run_orchestrator import get_extraction_run_orchestrator from extraction.domain.extraction_job import ExtractionJobStatus, ExtractionRunStatus +from extraction.infrastructure.extraction_job_activity import ( + job_workdir, + parse_activity_messages, + read_activity_log, + serialize_job_detail, + serialize_recent_job, +) from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository from extraction.infrastructure.workload_runtime_settings import get_extraction_workload_runtime_settings @@ -33,6 +41,23 @@ ) +def _format_pending_sync_message( + *, + generated_jobs: int, + enabled_job_set_count: int, + disabled_job_set_count: int, + warnings: tuple[str, ...], +) -> str: + parts = [ + f"Synced {generated_jobs} pending job(s) from {enabled_job_set_count} enabled job set(s)." + ] + if disabled_job_set_count: + parts.append(f"{disabled_job_set_count} disabled job set(s) were excluded.") + if warnings: + parts.append(" ".join(warnings)) + return " ".join(parts) + + class ExtractionJobsService: """Coordinate extraction job sets, materialization, and run orchestration.""" @@ -120,21 +145,12 @@ async def save_extraction_jobs_document( await self._session.commit() return document.to_dict() - async def regenerate_jobs( + async def _materialize_and_sync_pending_jobs( self, *, - user_id: str, kg_id: str, + document: ExtractionJobConfigDocument, ) -> dict[str, Any]: - kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) - if kg is None: - raise ValueError(f"Knowledge graph '{kg_id}' not found") - - if await self._extraction_job_repository.has_in_progress_jobs(knowledge_graph_id=kg_id): - raise ValueError("Cannot regenerate jobs while extraction jobs are in progress.") - - config = await self._knowledge_graph_repository.get_extraction_job_config(kg_id) - document = config or ExtractionJobConfigDocument.empty() graph_data = await self._load_graph_data() runtime_settings = get_extraction_workload_runtime_settings() prepared_reader = SqlPreparedJobPackageReader( @@ -151,12 +167,108 @@ async def regenerate_jobs( job_packages=job_packages, job_package_work_dir=Path(runtime_settings.job_package_work_dir), ) - generated = await self._extraction_job_repository.replace_pending_jobs( + configured_names = {job_set.name for job_set in document.job_sets} + enabled_names = {job_set.name for job_set in document.enabled_job_sets()} + blocked_names = await self._extraction_job_repository.job_set_names_with_in_progress( + knowledge_graph_id=kg_id, + ) + generated, warnings = await self._extraction_job_repository.sync_pending_jobs( knowledge_graph_id=kg_id, jobs=jobs, + configured_job_set_names=configured_names, + enabled_job_set_names=enabled_names, + blocked_job_set_names=blocked_names, + ) + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await orchestrator.ensure_workers_for_pending( + tenant_id=self._tenant_id, + knowledge_graph_id=kg_id, + ) + disabled_count = len(configured_names - enabled_names) + message = _format_pending_sync_message( + generated_jobs=generated, + enabled_job_set_count=len(enabled_names), + disabled_job_set_count=disabled_count, + warnings=warnings, + ) + return { + "success": True, + "generated_jobs": generated, + "warnings": list(warnings), + "message": message, + } + + async def regenerate_jobs( + self, + *, + user_id: str, + kg_id: str, + ) -> dict[str, Any]: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + raise ValueError(f"Knowledge graph '{kg_id}' not found") + + config = await self._knowledge_graph_repository.get_extraction_job_config(kg_id) + document = config or ExtractionJobConfigDocument.empty() + result = await self._materialize_and_sync_pending_jobs( + kg_id=kg_id, + document=document, + ) + await self._session.commit() + return result + + async def cancel_job( + self, + *, + user_id: str, + kg_id: str, + job_id: str, + ) -> dict[str, Any]: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + raise ValueError(f"Knowledge graph '{kg_id}' not found") + + job = await self._extraction_job_repository.get_by_job_id( + knowledge_graph_id=kg_id, + job_id=job_id, + ) + if job is None: + raise ValueError(f"Extraction job '{job_id}' not found") + + runtime_settings = get_extraction_workload_runtime_settings() + if job.status == ExtractionJobStatus.PENDING: + removed = await self._extraction_job_repository.delete_pending_job( + knowledge_graph_id=kg_id, + job_id=job_id, + ) + if not removed: + raise ValueError(f"Job '{job_id}' is no longer pending.") + await self._session.commit() + return { + "success": True, + "message": f"Removed pending job {job_id} from the queue.", + } + + if job.status != ExtractionJobStatus.IN_PROGRESS: + raise ValueError( + f"Job '{job_id}' is {job.status.value} and cannot be cancelled. " + "Use Reset Failed or Reset All Jobs to re-queue finished jobs." + ) + + stop_extraction_job_container( + job_id=job_id, + container_engine=runtime_settings.container_engine, + ) + await self._extraction_job_repository.mark_job_failed( + knowledge_graph_id=kg_id, + job_id=job_id, + error_message="Cancelled by operator", ) await self._session.commit() - return {"success": True, "generated_jobs": generated} + return { + "success": True, + "message": f"Cancelled running job {job_id} and stopped its container.", + } async def get_database_status( self, @@ -190,6 +302,7 @@ async def get_database_status( knowledge_graph_id=kg_id, graph_data=graph_data, ) + runtime_settings = get_extraction_workload_runtime_settings() return { "exists": True, "jobsByStatus": { @@ -200,18 +313,7 @@ async def get_database_status( }, "jobsBySet": jobs_by_set, "recentJobs": [ - { - "jobId": job.job_id, - "jobSet": job.job_set_name, - "status": job.status.value, - "workerId": job.worker_id, - "startedAt": job.started_at.isoformat() if job.started_at else None, - "completedAt": job.completed_at.isoformat() if job.completed_at else None, - "inputTokens": job.input_tokens, - "outputTokens": job.output_tokens, - "writeOps": job.entities_created + job.entities_modified + job.relationships_created, - "assistantPreview": job.description[:120] if job.description else None, - } + serialize_recent_job(job, settings=runtime_settings) for job in recent_jobs ], "activeWorkers": active_workers, @@ -221,6 +323,56 @@ async def get_database_status( **token_metrics, } + async def get_job_detail( + self, + *, + user_id: str, + kg_id: str, + job_id: str, + ) -> dict[str, Any] | None: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + return None + job = await self._extraction_job_repository.get_by_job_id( + knowledge_graph_id=kg_id, + job_id=job_id, + ) + if job is None: + return None + runtime_settings = get_extraction_workload_runtime_settings() + return serialize_job_detail(job, settings=runtime_settings) + + async def get_job_activity( + self, + *, + user_id: str, + kg_id: str, + job_id: str, + ) -> dict[str, Any] | None: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + return None + job = await self._extraction_job_repository.get_by_job_id( + knowledge_graph_id=kg_id, + job_id=job_id, + ) + if job is None: + return None + runtime_settings = get_extraction_workload_runtime_settings() + workdir = job_workdir( + knowledge_graph_id=kg_id, + job_id=job_id, + settings=runtime_settings, + ) + raw_log = read_activity_log(workdir) + return { + "jobId": job.job_id, + "status": job.status.value, + "log": raw_log, + "messages": parse_activity_messages(raw_log), + "detail": serialize_job_detail(job, settings=runtime_settings), + } + async def get_extraction_run_state( self, *, diff --git a/src/api/management/domain/extraction_job_config.py b/src/api/management/domain/extraction_job_config.py index cff6445d0..07e764d54 100644 --- a/src/api/management/domain/extraction_job_config.py +++ b/src/api/management/domain/extraction_job_config.py @@ -25,6 +25,7 @@ class ExtractionJobSetDefinition: instances_per_job: int | None = None file_patterns: tuple[str, ...] = field(default_factory=tuple) files_per_job: int | None = None + enabled: bool = True def __post_init__(self) -> None: if not self.name or not self.name.strip(): @@ -32,6 +33,8 @@ def __post_init__(self) -> None: def validation_errors(self, *, entity_instance_counts: dict[str, int]) -> tuple[str, ...]: """Return human-readable validation errors for this job set.""" + if not self.enabled: + return () errors: list[str] = [] if self.strategy == ExtractionJobSetStrategy.BY_INSTANCES: if not self.entity_type or not self.entity_type.strip(): @@ -61,6 +64,7 @@ def to_dict(self) -> dict[str, Any]: payload: dict[str, Any] = { "name": self.name, "strategy": self.strategy.value, + "enabled": self.enabled, } if self.description: payload["description"] = self.description @@ -79,6 +83,12 @@ def to_dict(self) -> dict[str, Any]: def from_dict(cls, data: dict[str, Any]) -> ExtractionJobSetDefinition: strategy = ExtractionJobSetStrategy(str(data["strategy"])) raw_patterns = data.get("file_patterns") or [] + enabled_raw = data.get("enabled", True) + enabled = bool(enabled_raw) if not isinstance(enabled_raw, str) else enabled_raw.lower() not in { + "0", + "false", + "no", + } return cls( name=str(data["name"]), strategy=strategy, @@ -89,6 +99,7 @@ def from_dict(cls, data: dict[str, Any]) -> ExtractionJobSetDefinition: else None, file_patterns=tuple(str(pattern) for pattern in raw_patterns), files_per_job=int(data["files_per_job"]) if data.get("files_per_job") is not None else None, + enabled=enabled, ) @@ -99,6 +110,9 @@ class ExtractionJobConfigDocument: version: str job_sets: tuple[ExtractionJobSetDefinition, ...] = field(default_factory=tuple) + def enabled_job_sets(self) -> tuple[ExtractionJobSetDefinition, ...]: + return tuple(job_set for job_set in self.job_sets if job_set.enabled) + def validation_errors(self, *, entity_instance_counts: dict[str, int]) -> tuple[str, ...]: errors: list[str] = [] seen_names: set[str] = set() diff --git a/src/api/management/domain/extraction_relationship_authoring.py b/src/api/management/domain/extraction_relationship_authoring.py new file mode 100644 index 000000000..b9a2c23fe --- /dev/null +++ b/src/api/management/domain/extraction_relationship_authoring.py @@ -0,0 +1,75 @@ +"""Rules for which relationship types belong in per-instance extraction descriptions.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + + +@dataclass(frozen=True) +class RelationshipAuthoringLine: + """One relationship type an extraction job set should cover.""" + + entity_type: str + relationship_label: str + counterpart_type: str + + +def relationship_authoring_lines_for_entity_type( + entity_type: str, + *, + edge_types: list[dict[str, Any]], + entity_instance_counts: dict[str, int], +) -> tuple[RelationshipAuthoringLine, ...]: + """Return relationship lines EntityX jobs should cover to avoid duplicate work. + + When EntityX relates to EntityY, only the side with more live instances + should create/update that relationship in its extraction jobs. The side + with fewer (or equal) instances omits the line. + """ + entity_count = entity_instance_counts.get(entity_type, 0) + if entity_count <= 0: + return () + + lines: list[RelationshipAuthoringLine] = [] + seen: set[tuple[str, str, str]] = set() + + for edge in edge_types: + source_type = str(edge.get("source_type") or edge.get("sourceType") or "").strip() + target_type = str(edge.get("target_type") or edge.get("targetType") or "").strip() + label = str(edge.get("label") or edge.get("name") or edge.get("type") or "").strip() + if not label: + continue + + if source_type == entity_type and target_type: + counterpart = target_type + counterpart_count = entity_instance_counts.get(counterpart, 0) + if entity_count > counterpart_count: + key = (entity_type, label, counterpart) + if key not in seen: + seen.add(key) + lines.append( + RelationshipAuthoringLine( + entity_type=entity_type, + relationship_label=label, + counterpart_type=counterpart, + ) + ) + continue + + if target_type == entity_type and source_type: + counterpart = source_type + counterpart_count = entity_instance_counts.get(counterpart, 0) + if entity_count > counterpart_count: + key = (entity_type, label, counterpart) + if key not in seen: + seen.add(key) + lines.append( + RelationshipAuthoringLine( + entity_type=entity_type, + relationship_label=label, + counterpart_type=counterpart, + ) + ) + + return tuple(sorted(lines, key=lambda line: (line.relationship_label, line.counterpart_type))) diff --git a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py index 4758fcde0..1432db587 100644 --- a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py +++ b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py @@ -19,6 +19,7 @@ class ExtractionJobSetModel(BaseModel): name: str strategy: str + enabled: bool = True description: str | None = None entity_type: str | None = None instances_per_job: int | None = None @@ -46,6 +47,7 @@ class ActionResponse(BaseModel): message: str | None = None generated_jobs: int | None = None reset_count: int | None = None + warnings: list[str] = Field(default_factory=list) def _handle_value_error(exc: ValueError) -> HTTPException: @@ -104,6 +106,8 @@ async def save_extraction_jobs( if payload is None: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge graph not found") payload["last_regenerated_jobs"] = regenerated.get("generated_jobs") + payload["last_regenerate_warnings"] = regenerated.get("warnings") + payload["last_regenerate_message"] = regenerated.get("message") return ExtractionJobsDocumentResponse.model_validate(payload) @@ -127,11 +131,75 @@ async def regenerate_extraction_jobs( raise _handle_value_error(exc) return ActionResponse( success=True, - message=f"Regenerated {result.get('generated_jobs', 0)} jobs", + message=str(result.get("message") or f"Regenerated {result.get('generated_jobs', 0)} jobs"), generated_jobs=int(result.get("generated_jobs") or 0), + warnings=[str(item) for item in result.get("warnings") or []], ) +@router.post( + "/knowledge-graphs/{kg_id}/extraction-jobs/jobs/{job_id}/cancel", + response_model=ActionResponse, +) +async def cancel_extraction_job( + kg_id: str, + job_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.cancel_job( + user_id=current_user.user_id.value, + kg_id=kg_id, + job_id=job_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + except ValueError as exc: + raise _handle_value_error(exc) + return ActionResponse(success=True, message=result.get("message")) + + +@router.get("/knowledge-graphs/{kg_id}/extraction-jobs/jobs/{job_id}") +async def get_extraction_job_detail( + kg_id: str, + job_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> dict[str, Any]: + try: + payload = await service.get_job_detail( + user_id=current_user.user_id.value, + kg_id=kg_id, + job_id=job_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Extraction job not found") + return payload + + +@router.get("/knowledge-graphs/{kg_id}/extraction-jobs/jobs/{job_id}/activity") +async def get_extraction_job_activity( + kg_id: str, + job_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> dict[str, Any]: + try: + payload = await service.get_job_activity( + user_id=current_user.user_id.value, + kg_id=kg_id, + job_id=job_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Extraction job not found") + return payload + + @router.get("/knowledge-graphs/{kg_id}/extraction-jobs/database-status") async def get_extraction_database_status( kg_id: str, diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 488d7b9aa..f0a97a466 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -65,6 +65,7 @@ async def test_extraction_mode_uses_extraction_defaults(self): assert "job_setup" in resolved.skills assert "per_instance_description_authoring" in resolved.skills assert "EntityType} ->" in resolved.skills["per_instance_description_authoring"] + assert "MORE live instances" in resolved.skills["per_instance_description_authoring"] assert "Implementation Analysis" in resolved.skills["per_instance_description_authoring"] assert "minor_edits" in resolved.skills assert "schema_edits_secondary" in resolved.skills diff --git a/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py b/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py new file mode 100644 index 000000000..37f03e233 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py @@ -0,0 +1,98 @@ +"""Unit tests for agentic-ci extraction container credential wiring.""" + +from __future__ import annotations + +from pathlib import Path + +from extraction.infrastructure.agentic_ci_extraction_job_runner import ( + AgenticCiExtractionJobRunner, + _patch_job_context_api_base, + _strip_harness_binary, +) +from extraction.infrastructure.extraction_job_prompt import ( + EXTRACTION_JOB_INVOKE_PROMPT, + write_extraction_prompt_file, +) +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings + + +def test_strip_harness_binary_removes_leading_claude() -> None: + command = [ + "claude", + "--permission-mode", + "bypassPermissions", + "-p", + "do the job", + ] + + assert _strip_harness_binary(command) == [ + "--permission-mode", + "bypassPermissions", + "-p", + "do the job", + ] + + +def test_write_extraction_prompt_file_materializes_instructions(tmp_path: Path) -> None: + write_extraction_prompt_file(workdir=tmp_path, prompt="Extract all entities.") + + prompt_path = tmp_path / "extraction_prompt.md" + assert prompt_path.is_file() + assert prompt_path.read_text(encoding="utf-8") == "Extract all entities.\n" + + +def test_extraction_job_invoke_prompt_references_materialized_file() -> None: + assert "extraction_prompt.md" in EXTRACTION_JOB_INVOKE_PROMPT + assert "job-context.json" in EXTRACTION_JOB_INVOKE_PROMPT + + +def test_patch_job_context_api_base_rewrites_host_reachable_url(tmp_path: Path) -> None: + context_path = tmp_path / "job-context.json" + context_path.write_text( + '{"api_base_url": "http://api:8000", "workload_token": "tok"}', + encoding="utf-8", + ) + + _patch_job_context_api_base(tmp_path, "http://127.0.0.1:8000") + + import json + + updated = json.loads(context_path.read_text(encoding="utf-8")) + assert updated["api_base_url"] == "http://127.0.0.1:8000" + assert updated["workload_token"] == "tok" + + +def test_build_binds_mounts_full_gcloud_config_for_vertex() -> None: + runner = AgenticCiExtractionJobRunner( + settings=ExtractionWorkloadRuntimeSettings( + gcloud_config_mount="/host/.config/gcloud", + gcloud_config_container_path="/gcloud/config", + ) + ) + binds = runner._build_binds(workdir=__import__("pathlib").Path("/tmp/job-workdir")) + + assert "/tmp/job-workdir:/workspace:z" in binds + assert "/host/.config/gcloud:/gcloud/config:ro,z" in binds + + +def test_build_container_env_sets_google_application_credentials_for_vertex(monkeypatch) -> None: + monkeypatch.setenv("CLAUDE_CODE_USE_VERTEX", "1") + runner = AgenticCiExtractionJobRunner( + settings=ExtractionWorkloadRuntimeSettings( + vertex_project_id="my-project", + vertex_region="us-east5", + gcloud_config_mount="/host/.config/gcloud", + gcloud_config_container_path="/gcloud/config", + ) + ) + env = runner._build_container_env(otel_port=0) + + assert env["CLAUDE_MODEL"] == runner._resolve_model() + assert env["AGENT_MODEL"] == runner._resolve_model() + assert env["CLAUDE_CODE_USE_VERTEX"] == "1" + assert env["ANTHROPIC_VERTEX_PROJECT_ID"] == "my-project" + assert env["GOOGLE_APPLICATION_CREDENTIALS"] == ( + "/gcloud/config/application_default_credentials.json" + ) + assert env["CLOUDSDK_CONFIG"] == "/gcloud/config" + assert env["HOME"] == "/tmp" diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_activity.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_activity.py new file mode 100644 index 000000000..52a1ddc60 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_activity.py @@ -0,0 +1,78 @@ +"""Unit tests for extraction job activity logs.""" + +from __future__ import annotations + +import json +from pathlib import Path + +from extraction.infrastructure.extraction_job_activity import ( + append_activity_line, + append_activity_message, + format_claude_code_stream_line, + parse_activity_messages, + read_assistant_preview, + read_activity_log, +) + + +def test_format_claude_code_stream_line_parses_assistant_thoughts() -> None: + payload = json.dumps( + { + "type": "assistant", + "message": { + "content": [ + {"type": "text", "text": "Inspecting adapter configuration files."}, + {"type": "tool_use", "name": "Read", "input": {"path": "job-context.json"}}, + ] + }, + } + ) + messages = format_claude_code_stream_line(payload) + assert messages == [ + ("thought", "Inspecting adapter configuration files."), + ("tool", "Using tool: Read"), + ] + + +def test_format_claude_code_stream_line_parses_result_error() -> None: + payload = json.dumps( + { + "type": "result", + "is_error": True, + "result": "API Error: Could not load the default credentials.", + } + ) + assert format_claude_code_stream_line(payload) == [ + ("error", "API Error: Could not load the default credentials.") + ] + + +def test_parse_activity_messages_expands_legacy_json_lines(tmp_path: Path) -> None: + log_path = tmp_path / "agent_activity.log" + append_activity_line(log_path, "📡 Processing job adapter_batch_0001_abcd") + append_activity_message(log_path, kind="system", text="Agent initialized (model: claude-opus-4-6)") + append_activity_message(log_path, kind="thought", text="Scanning repository-files for adapter config") + append_activity_message(log_path, kind="error", text="API Error: Could not load the default credentials.") + + messages = parse_activity_messages(read_activity_log(tmp_path)) + + assert len(messages) >= 4 + assert messages[0]["kind"] == "info" + assert any(message["kind"] == "thought" for message in messages) + assert messages[-1]["text"].startswith("API Error") + + +def test_read_assistant_preview_returns_latest_thought_for_job(tmp_path: Path) -> None: + log_path = tmp_path / "agent_activity.log" + append_activity_line(log_path, "📡 Processing job adapter_batch_0001_abcd") + append_activity_message(log_path, kind="thought", text="Scanning repository-files for adapter config") + append_activity_message(log_path, kind="tool", text="Using tool: kartograph_apply_graph_mutations") + append_activity_message(log_path, kind="thought", text="Linked adapter to three Resource entities") + + preview = read_assistant_preview(tmp_path, job_id="adapter_batch_0001_abcd") + + assert preview == "Linked adapter to three Resource entities" + + +def test_read_activity_log_returns_empty_when_missing(tmp_path: Path) -> None: + assert read_activity_log(tmp_path) == "" diff --git a/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer.py b/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer.py index b77f3827d..40e4609b1 100644 --- a/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer.py +++ b/src/api/tests/unit/infrastructure/management/test_extraction_job_materializer.py @@ -52,3 +52,37 @@ def test_materialize_by_instances_batches_graph_nodes() -> None: assert jobs[0].target_instances[0].slug == "feature-a" assert jobs[0].description.startswith("Extract acceptance") assert all(job.status.value == "pending" for job in jobs) + + +def test_materialize_skips_disabled_job_sets() -> None: + config = ExtractionJobConfigDocument( + version="1.0", + job_sets=( + ExtractionJobSetDefinition( + name="disabled", + strategy=ExtractionJobSetStrategy.BY_INSTANCES, + entity_type="Feature", + instances_per_job=1, + description="Should not materialize.", + enabled=False, + ), + ), + ) + graph_data = { + "nodes": [ + { + "knowledge_graph_id": "kg-1", + "type": "Feature", + "slug": "feature-a", + }, + ], + "edges": [], + } + + jobs = materialize_jobs_from_config( + knowledge_graph_id="kg-1", + config=config, + graph_data=graph_data, + ) + + assert jobs == [] diff --git a/src/api/tests/unit/management/domain/test_extraction_job_config.py b/src/api/tests/unit/management/domain/test_extraction_job_config.py index 3e505b1f3..7dc48cd0f 100644 --- a/src/api/tests/unit/management/domain/test_extraction_job_config.py +++ b/src/api/tests/unit/management/domain/test_extraction_job_config.py @@ -18,6 +18,16 @@ def test_by_instances_requires_description_and_entity_type() -> None: assert any("description" in err.lower() for err in errors) +def test_disabled_job_set_skips_validation() -> None: + job_set = ExtractionJobSetDefinition( + name="disabled_set", + strategy=ExtractionJobSetStrategy.BY_INSTANCES, + enabled=False, + ) + errors = job_set.validation_errors(entity_instance_counts={}) + assert errors == () + + def test_document_rejects_duplicate_job_set_names() -> None: document = ExtractionJobConfigDocument( version="1.0", diff --git a/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py b/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py new file mode 100644 index 000000000..d5b8a271e --- /dev/null +++ b/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py @@ -0,0 +1,57 @@ +"""Tests for relationship ownership rules in per-instance descriptions.""" + +from management.domain.extraction_relationship_authoring import ( + relationship_authoring_lines_for_entity_type, +) + + +def test_includes_relationship_when_entity_has_more_instances() -> None: + lines = relationship_authoring_lines_for_entity_type( + "Adapter", + edge_types=[ + {"label": "deploys", "source_type": "Adapter", "target_type": "Cluster"}, + ], + entity_instance_counts={"Adapter": 10, "Cluster": 3}, + ) + + assert len(lines) == 1 + assert lines[0].relationship_label == "deploys" + assert lines[0].counterpart_type == "Cluster" + + +def test_omits_relationship_when_entity_has_fewer_instances() -> None: + lines = relationship_authoring_lines_for_entity_type( + "Cluster", + edge_types=[ + {"label": "deploys", "source_type": "Adapter", "target_type": "Cluster"}, + ], + entity_instance_counts={"Adapter": 10, "Cluster": 3}, + ) + + assert lines == () + + +def test_omits_relationship_when_counts_are_equal() -> None: + lines = relationship_authoring_lines_for_entity_type( + "Adapter", + edge_types=[ + {"label": "connects", "source_type": "Adapter", "target_type": "Service"}, + ], + entity_instance_counts={"Adapter": 5, "Service": 5}, + ) + + assert lines == () + + +def test_includes_inbound_relationship_when_target_side_has_more_instances() -> None: + lines = relationship_authoring_lines_for_entity_type( + "Service", + edge_types=[ + {"label": "exposes", "source_type": "Service", "target_type": "Route"}, + ], + entity_instance_counts={"Service": 8, "Route": 2}, + ) + + assert len(lines) == 1 + assert lines[0].entity_type == "Service" + assert lines[0].counterpart_type == "Route" diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue index 161efcb17..395401a47 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue @@ -26,6 +26,7 @@ const inputClass = interface ExtractionJobSet { name: string + enabled?: boolean description?: string strategy: 'by_instances' | 'by_files' entity_type?: string @@ -68,7 +69,9 @@ async function load() { : [] doc.value = cloneDoc({ version: data.version || '1.0', - job_sets: Array.isArray(data.job_sets) ? data.job_sets : [], + job_sets: Array.isArray(data.job_sets) + ? data.job_sets.map((js) => ({ enabled: js.enabled !== false, ...js })) + : [], }) } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e) @@ -100,6 +103,7 @@ function addJobSet() { const index = doc.value.job_sets.length + 1 doc.value.job_sets.push({ name: `job_set_${index}`, + enabled: true, strategy: 'by_instances', entity_type: entityTypeOptions.value[0]?.name ?? '', instances_per_job: 4, @@ -112,7 +116,11 @@ function buildPayload(): ExtractionJobsDocument { return { version: doc.value.version || '1.0', job_sets: doc.value.job_sets.map((js) => { - const base = { name: js.name, strategy: js.strategy } as ExtractionJobSet + const base = { + name: js.name, + strategy: js.strategy, + enabled: js.enabled !== false, + } as ExtractionJobSet if (typeof js.description === 'string' && js.description.trim()) { base.description = js.description.trim() } @@ -137,6 +145,7 @@ function getEntityTypeInstanceCount(entityType?: string): number | null { } function jobSetErrors(js: ExtractionJobSet): string[] { + if (js.enabled === false) return [] const errs: string[] = [] if (js.strategy === 'by_instances') { if (!js.entity_type?.trim()) errs.push('Entity type is required for by_instances.') @@ -151,6 +160,7 @@ function jobSetErrors(js: ExtractionJobSet): string[] { } function projectedJobCount(js: ExtractionJobSet): number | null { + if (js.enabled === false) return 0 if (js.strategy !== 'by_instances') return null const total = getEntityTypeInstanceCount(js.entity_type) const perJob = Number(js.instances_per_job) @@ -168,11 +178,18 @@ async function save() { if (!doc.value) return saving.value = true try { - await apiFetch( + const res = await apiFetch<{ message?: string; warnings?: string[] }>( `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`, { method: 'PUT', body: buildPayload() }, ) - toast.success('Saved job sets and regenerated pending jobs') + toast.success('Saved job sets', { + description: res.message || 'Pending jobs were synced for enabled job sets.', + }) + if (Array.isArray(res.warnings) && res.warnings.length > 0) { + toast.warning('Some job sets were not refreshed', { + description: res.warnings.join(' '), + }) + } emit('saved') await load() } catch (e: unknown) { @@ -219,7 +236,7 @@ defineExpose({ refresh: load }) Job sets </CardTitle> <CardDescription> - Author with the assistant above or edit directly. Save regenerates pending jobs from live graph instances. + Author with the assistant above or edit directly. Save syncs pending jobs for enabled sets only — you can add or enable sets while extraction is running. </CardDescription> </CardHeader> <CardContent class="space-y-6"> @@ -231,12 +248,19 @@ defineExpose({ refresh: load }) v-for="(js, idx) in doc.job_sets" :key="`${js.name}-${idx}`" class="space-y-4 rounded-xl border border-cyan-500/30 bg-gradient-to-br from-cyan-500/10 via-card to-card p-4 md:p-5" + :class="js.enabled === false ? 'opacity-60' : ''" > <div class="flex flex-wrap items-start justify-between gap-3"> <div class="space-y-1"> <input v-model="js.name" :class="inputClass" placeholder="Job set name" /> </div> - <Badge variant="outline" class="text-[11px]">#{{ idx + 1 }}</Badge> + <div class="flex flex-wrap items-center gap-2"> + <label class="flex items-center gap-2 text-xs text-muted-foreground"> + <input v-model="js.enabled" type="checkbox" class="size-4 rounded border-border" /> + Enabled + </label> + <Badge variant="outline" class="text-[11px]">#{{ idx + 1 }}</Badge> + </div> </div> <div class="grid gap-2 sm:grid-cols-2"> diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobWatchDialog.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobWatchDialog.vue new file mode 100644 index 000000000..3699a43d2 --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobWatchDialog.vue @@ -0,0 +1,255 @@ +<script setup lang="ts"> +import { computed, nextTick, onUnmounted, ref, watch } from 'vue' +import { Loader2 } from 'lucide-vue-next' +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog' +import { Badge } from '@/components/ui/badge' +import { Separator } from '@/components/ui/separator' + +const props = defineProps<{ + kgId: string + jobId: string | null + open: boolean +}>() + +const emit = defineEmits<{ + 'update:open': [value: boolean] +}>() + +const { apiFetch } = useApiClient() + +interface ActivityMessage { + timestamp: string + kind: string + text: string +} + +interface JobActivityPayload { + jobId: string + status: string + log: string + messages: ActivityMessage[] + detail: { + jobSet: string + workerId: string | null + inputTokens: number + outputTokens: number + cacheReadTokens: number + cacheCreationTokens: number + costUsd: number + entitiesCreated: number + entitiesModified: number + relationshipsCreated: number + writeOps: number + instanceCount: number + fileCount: number + errorMessage: string | null + targetInstances: Array<{ slug: string; entity_type: string }> + targetFiles: Array<{ path: string; repository_folder: string }> + } +} + +const loading = ref(false) +const error = ref<string | null>(null) +const payload = ref<JobActivityPayload | null>(null) +const logContainer = ref<HTMLElement | null>(null) + +let pollTimer: ReturnType<typeof setInterval> | null = null + +const isLive = computed(() => payload.value?.status === 'in_progress') +const displayMessages = computed(() => payload.value?.messages || []) + +async function loadActivity() { + if (!props.jobId) return + if (!payload.value) loading.value = true + error.value = null + try { + payload.value = await apiFetch<JobActivityPayload>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs/jobs/${encodeURIComponent(props.jobId)}/activity`, + ) + } catch (e: unknown) { + error.value = e instanceof Error ? e.message : 'Failed to load activity' + } finally { + loading.value = false + } +} + +function startPolling() { + stopPolling() + pollTimer = setInterval(() => { void loadActivity() }, 1000) +} + +function stopPolling() { + if (!pollTimer) return + clearInterval(pollTimer) + pollTimer = null +} + +watch( + () => [props.open, props.jobId] as const, + ([open, jobId]) => { + if (!open || !jobId) { + stopPolling() + payload.value = null + return + } + void loadActivity() + startPolling() + }, + { immediate: true }, +) + +watch( + () => displayMessages.value.length, + async () => { + await nextTick() + const el = logContainer.value + if (el) el.scrollTop = el.scrollHeight + }, +) + +onUnmounted(stopPolling) + +function formatCompactNumber(value: number): string { + return new Intl.NumberFormat(undefined, { notation: 'compact', maximumFractionDigits: 1 }).format(value) +} + +function messageLabel(kind: string): string { + switch (kind) { + case 'thought': + return 'Thinking' + case 'tool': + return 'Tool' + case 'system': + return 'System' + case 'error': + return 'Error' + case 'success': + return 'Done' + default: + return 'Info' + } +} + +function messageClass(kind: string): string { + switch (kind) { + case 'thought': + return 'border-l-primary/60 bg-primary/5' + case 'tool': + return 'border-l-amber-500/60 bg-amber-500/5' + case 'system': + return 'border-l-muted-foreground/40 bg-muted/30' + case 'error': + return 'border-l-destructive/60 bg-destructive/5 text-destructive' + case 'success': + return 'border-l-green-600/60 bg-green-600/5' + default: + return 'border-l-border bg-muted/10' + } +} +</script> + +<template> + <Dialog :open="open" @update:open="emit('update:open', $event)"> + <DialogContent class="flex max-h-[90dvh] flex-col gap-0 overflow-hidden sm:max-w-4xl"> + <DialogHeader> + <DialogTitle class="flex items-center gap-2"> + Watch extraction job + <Badge v-if="payload" variant="outline" class="font-mono text-[10px]"> + {{ payload.status }} + </Badge> + <Loader2 v-if="isLive" class="size-3.5 animate-spin text-muted-foreground" /> + </DialogTitle> + <DialogDescription class="font-mono text-xs"> + {{ jobId || '—' }} + </DialogDescription> + </DialogHeader> + + <div v-if="loading && !payload" class="flex items-center gap-2 py-8 text-sm text-muted-foreground"> + <Loader2 class="size-4 animate-spin" /> + Loading activity... + </div> + <div v-else-if="error" class="py-4 text-sm text-destructive">{{ error }}</div> + <template v-else-if="payload"> + <div class="grid min-h-0 flex-1 gap-4 py-3 sm:grid-cols-[minmax(0,1fr)_220px]"> + <div class="flex min-h-0 flex-col gap-2"> + <p class="text-xs font-medium text-foreground/90"> + Agent activity + <span v-if="isLive" class="text-muted-foreground">(live, refreshes every 1s)</span> + </p> + <div + ref="logContainer" + class="min-h-[40dvh] max-h-[60dvh] flex-1 space-y-2 overflow-y-auto rounded-lg border bg-muted/10 p-2" + > + <div + v-if="displayMessages.length === 0" + class="p-3 text-xs text-muted-foreground" + > + Waiting for agent output. Thoughts and tool calls appear here as the worker runs. + </div> + <div + v-for="(message, index) in displayMessages" + :key="`${message.timestamp}-${index}-${message.text.slice(0, 24)}`" + class="rounded-md border-l-2 px-2 py-1.5 text-[11px] leading-relaxed" + :class="messageClass(message.kind)" + > + <div class="mb-0.5 flex items-center gap-2 text-[10px] text-muted-foreground"> + <span class="font-medium uppercase tracking-wide">{{ messageLabel(message.kind) }}</span> + <span v-if="message.timestamp" class="font-mono">{{ message.timestamp }}</span> + </div> + <p class="whitespace-pre-wrap break-words">{{ message.text }}</p> + </div> + </div> + </div> + + <div class="space-y-3 overflow-y-auto text-xs"> + <div> + <p class="font-medium">{{ payload.detail.jobSet }}</p> + <p v-if="payload.detail.workerId" class="font-mono text-muted-foreground"> + {{ payload.detail.workerId }} + </p> + </div> + + <Separator /> + + <div class="space-y-1 text-muted-foreground"> + <p>Tokens: {{ formatCompactNumber(payload.detail.inputTokens) }} in / {{ formatCompactNumber(payload.detail.outputTokens) }} out</p> + <p v-if="payload.detail.cacheReadTokens || payload.detail.cacheCreationTokens"> + Cache: {{ formatCompactNumber(payload.detail.cacheReadTokens) }} read / {{ formatCompactNumber(payload.detail.cacheCreationTokens) }} write + </p> + <p>Cost: ${{ payload.detail.costUsd.toFixed(4) }}</p> + </div> + + <div class="space-y-1 text-muted-foreground"> + <p class="font-medium text-foreground">Graph writes</p> + <p>{{ payload.detail.entitiesCreated }} entities created</p> + <p>{{ payload.detail.entitiesModified }} entities modified</p> + <p>{{ payload.detail.relationshipsCreated }} relationships created</p> + <p class="font-medium text-foreground">{{ payload.detail.writeOps }} total write ops</p> + </div> + + <div v-if="payload.detail.instanceCount" class="space-y-1"> + <p class="font-medium">Target instances ({{ payload.detail.instanceCount }})</p> + <p + v-for="instance in payload.detail.targetInstances" + :key="`${instance.entity_type}:${instance.slug}`" + class="font-mono text-[10px] text-muted-foreground" + > + {{ instance.entity_type }}: {{ instance.slug }} + </p> + </div> + + <p v-if="payload.detail.errorMessage" class="text-destructive"> + {{ payload.detail.errorMessage }} + </p> + </div> + </div> + </template> + </DialogContent> + </Dialog> +</template> diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue index 72144dbf1..162234eeb 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue @@ -8,6 +8,8 @@ import { Settings, ClipboardList, AlertCircle, + Eye, + XCircle, } from 'lucide-vue-next' import { Card, CardHeader, CardTitle, CardDescription, CardContent } from '@/components/ui/card' import { Button } from '@/components/ui/button' @@ -16,6 +18,7 @@ import { Separator } from '@/components/ui/separator' import GraphExtractionJobSetsPanel from '@/components/graph-management/GraphExtractionJobSetsPanel.vue' import GraphDesignEntitiesPanel from '@/components/graph-management/GraphDesignEntitiesPanel.vue' import GraphDesignRelationshipsPanel from '@/components/graph-management/GraphDesignRelationshipsPanel.vue' +import GraphExtractionJobWatchDialog from '@/components/graph-management/GraphExtractionJobWatchDialog.vue' const props = defineProps<{ kgId: string @@ -45,6 +48,11 @@ interface DbStatus { inputTokens: number outputTokens: number writeOps: number + entitiesCreated?: number + entitiesModified?: number + relationshipsCreated?: number + instanceCount?: number + errorMessage?: string | null assistantPreview: string | null }> activeWorkers?: Array<{ @@ -57,6 +65,11 @@ interface DbStatus { }> } +type RecentJobEvent = DbStatus['recentJobs'][number] & { + eventKey: string + seenAtMs: number +} + interface ExtractionRunState { live: boolean status: string @@ -68,6 +81,7 @@ interface PlanSummary { job_sets: Array<{ name: string strategy: string + enabled?: boolean entity_type?: string instances_per_job?: number projected_jobs?: number | null @@ -93,6 +107,39 @@ const resettingFailed = ref(false) const resettingAll = ref(false) const optimisticLiveUntilMs = ref<number | null>(null) const nowMs = ref(Date.now()) +const lastStatusRefreshMs = ref<number | null>(null) +const recentJobEvents = ref<RecentJobEvent[]>([]) +const watchJobId = ref<string | null>(null) +const watchDialogOpen = ref(false) +const cancellingJobId = ref<string | null>(null) + +function resolveApiErrorDescription(e: unknown): string { + const err = e as { data?: { detail?: unknown }; message?: string } + const detail = err.data?.detail + if (typeof detail === 'string' && detail.trim()) return detail + if (detail && typeof detail === 'object' && 'message' in detail) { + return String((detail as { message: string }).message) + } + if (err.message) return err.message + return 'Request failed' +} + +function resolveRegenerateFailureMessage(description: string): { title: string; hint?: string } { + const lower = description.toLowerCase() + if (lower.includes('in progress') || lower.includes('still running')) { + return { + title: 'Cannot refresh jobs for that set yet', + hint: 'Wait for running jobs to finish, use Cancel on individual jobs, or use Kill extraction to stop all workers — then Regenerate jobs again.', + } + } + if (lower.includes('description') || lower.includes('entity type')) { + return { + title: 'Job set configuration is invalid', + hint: 'Fix the errors shown in the Job Sets panel, then Save job sets or Regenerate jobs.', + } + } + return { title: 'Regenerate failed', hint: description } +} let autoRefreshInterval: ReturnType<typeof setInterval> | null = null let clockInterval: ReturnType<typeof setInterval> | null = null @@ -112,7 +159,10 @@ async function loadDatabaseStatus(options?: { background?: boolean }) { dbError.value = null } try { - dbStatus.value = await apiFetch<DbStatus>(`${basePath.value}/database-status`) + const status = await apiFetch<DbStatus>(`${basePath.value}/database-status`) + dbStatus.value = status + mergeRecentJobEvents(status.recentJobs || []) + lastStatusRefreshMs.value = Date.now() dbError.value = null } catch (e: unknown) { if (!background || !hasExistingData) { @@ -177,6 +227,69 @@ const plannedVsMaterializedMismatch = computed(() => { if (planned <= 0) return false return planned !== materializedJobsTotal.value }) +const recentJobs = computed(() => recentJobEvents.value) +const activeWorkerCount = computed(() => dbStatus.value?.activeWorkers?.length || 0) +const idleWorkerCount = computed(() => Math.max(0, workerCount.value - activeWorkerCount.value)) +const statusAgeSeconds = computed(() => { + if (!lastStatusRefreshMs.value) return null + return Math.max(0, Math.floor((nowMs.value - lastStatusRefreshMs.value) / 1000)) +}) +const showOptimisticLiveActivity = computed( + () => Boolean(optimisticLiveUntilMs.value && nowMs.value < optimisticLiveUntilMs.value), +) + +function mergeRecentJobEvents(incoming: DbStatus['recentJobs']) { + const now = Date.now() + const existingByJobId = new Map(recentJobEvents.value.map((event) => [event.jobId, event] as const)) + for (const job of incoming) { + existingByJobId.set(job.jobId, { ...job, eventKey: job.jobId, seenAtMs: now }) + } + const maxAgeMs = 15 * 60 * 1000 + const merged = Array.from(existingByJobId.values()).filter((event) => now - event.seenAtMs <= maxAgeMs) + merged.sort((a, b) => { + const aTs = Date.parse(a.completedAt || a.startedAt || '') || a.seenAtMs + const bTs = Date.parse(b.completedAt || b.startedAt || '') || b.seenAtMs + return bTs - aTs + }) + recentJobEvents.value = merged.slice(0, 80) +} + +function clearRecentJobEvents() { + recentJobEvents.value = [] +} + +function openWatch(jobId: string) { + watchJobId.value = jobId + watchDialogOpen.value = true +} + +function recentJobBadgeVariant(status: string): 'default' | 'outline' | 'secondary' | 'destructive' | 'success' { + if (status === 'in_progress') return 'default' + if (status === 'failed') return 'destructive' + if (status === 'completed') return 'success' + return 'outline' +} + +function formatRecentWhen(startedAt: string | null, completedAt: string | null): string { + if (completedAt && startedAt) { + const startMs = Date.parse(startedAt) + const endMs = Date.parse(completedAt) + if (Number.isFinite(startMs) && Number.isFinite(endMs) && endMs >= startMs) { + const deltaSec = Math.max(0, Math.floor((endMs - startMs) / 1000)) + if (deltaSec < 60) return `${deltaSec}s` + const mins = Math.floor(deltaSec / 60) + const secs = deltaSec % 60 + if (mins < 60) return `${mins}m ${secs}s` + const hours = Math.floor(mins / 60) + return `${hours}h ${mins % 60}m` + } + } + return completedAt || startedAt || '—' +} + +function formatCompactNumber(value: number): string { + return new Intl.NumberFormat(undefined, { notation: 'compact', maximumFractionDigits: 1 }).format(value) +} async function startExtraction() { startingExtraction.value = true @@ -234,21 +347,52 @@ async function killExtraction() { async function regenerateJobs() { regeneratingJobs.value = true try { - const res = await apiFetch<{ generated_jobs?: number; message?: string }>( + const res = await apiFetch<{ generated_jobs?: number; message?: string; warnings?: string[] }>( `${basePath.value}/regenerate`, { method: 'POST' }, ) - toast.success('Jobs regenerated', { description: res.message }) + toast.success('Jobs synced', { + description: res.message || `Synced ${res.generated_jobs ?? 0} pending job(s).`, + }) + if (Array.isArray(res.warnings) && res.warnings.length > 0) { + toast.warning('Some job sets were skipped', { + description: `${res.warnings.join(' ')} Save job sets after those jobs finish, or cancel them first.`, + duration: 10000, + }) + } await refreshAll() } catch (e: unknown) { - toast.error('Regenerate failed', { - description: e instanceof Error ? e.message : 'Request failed', + const description = resolveApiErrorDescription(e) + const failure = resolveRegenerateFailureMessage(description) + toast.error(failure.title, { + description: failure.hint || description, + duration: 10000, }) } finally { regeneratingJobs.value = false } } +async function cancelJob(jobId: string) { + cancellingJobId.value = jobId + try { + const res = await apiFetch<{ message?: string }>( + `${basePath.value}/jobs/${encodeURIComponent(jobId)}/cancel`, + { method: 'POST' }, + ) + toast.success('Job cancelled', { description: res.message }) + await refreshAll({ background: true }) + } catch (e: unknown) { + toast.error('Cancel failed', { description: resolveApiErrorDescription(e) }) + } finally { + cancellingJobId.value = null + } +} + +function canCancelJob(status: string): boolean { + return status === 'pending' || status === 'in_progress' +} + async function resetByKind(kind: 'stale' | 'completed' | 'failed' | 'all') { const map = { stale: { ref: resettingRunning, path: 'reset-stale' }, @@ -290,6 +434,7 @@ watch( if (active) startAutoRefresh() else if (!optimisticLiveUntilMs.value) stopAutoRefresh() }, + { immediate: true }, ) watch( @@ -419,31 +564,133 @@ onUnmounted(() => { </div> </div> - <div v-if="extractionRunLive" class="rounded-lg border border-primary/30 bg-primary/5 p-3 text-xs"> - Extraction run is live — status refreshes every 1.5s. - </div> - <div v-if="plannedVsMaterializedMismatch" class="flex items-start gap-2 rounded-lg border border-amber-500/40 bg-amber-500/5 p-3 text-xs"> <AlertCircle class="mt-0.5 size-4 shrink-0 text-amber-600" /> <div> Planned job count ({{ plannedKnownTotalJobs }}) differs from materialized total ({{ materializedJobsTotal }}). + Regenerate syncs pending jobs for enabled sets only; running jobs are left untouched. <Button size="sm" variant="link" class="h-auto p-0 text-xs" :disabled="regeneratingJobs" @click="regenerateJobs"> Regenerate jobs </Button> </div> </div> - <div v-if="(dbStatus?.activeWorkers?.length || 0) > 0" class="space-y-2"> - <p class="text-xs font-medium text-muted-foreground">Active workers</p> - <div class="flex flex-wrap gap-2"> - <Badge v-for="worker in dbStatus?.activeWorkers" :key="worker.workerId" variant="outline" class="font-mono text-[10px]"> - {{ worker.workerId }} → {{ worker.jobId }} - </Badge> + <div class="rounded-lg border bg-card p-3"> + <div class="mb-2 flex flex-wrap items-center justify-between gap-2"> + <p class="text-xs font-medium text-foreground/90">Live extraction activity</p> + <div class="flex flex-wrap items-center gap-1.5"> + <Badge variant="outline" class="font-mono text-[11px]"> + {{ completedJobsCount }} completed · {{ inProgressJobsCount }} running · {{ pendingJobsCount }} ready + </Badge> + <Badge variant="outline" class="font-mono text-[11px]"> + workers: {{ activeWorkerCount }}/{{ workerCount }} + </Badge> + <Badge v-if="idleWorkerCount > 0" variant="outline" class="font-mono text-[11px]"> + {{ idleWorkerCount }} idle + </Badge> + <Badge v-if="statusAgeSeconds !== null" variant="outline" class="font-mono text-[11px]"> + updated {{ statusAgeSeconds }}s ago + </Badge> + </div> + </div> + <div class="mb-3 h-1.5 overflow-hidden rounded-full bg-muted"> + <div + class="h-full bg-primary/80 transition-all" + :style="{ width: `${extractionProgressPercent}%` }" + /> + </div> + <div class="space-y-2"> + <div class="flex items-center justify-between gap-2"> + <p class="text-xs font-medium text-foreground/90">Recent job events</p> + <Button + variant="ghost" + size="sm" + class="h-7 px-2 text-[11px]" + :disabled="recentJobs.length === 0" + @click="clearRecentJobEvents" + > + Clear events + </Button> + </div> + <div v-if="recentJobs.length === 0" class="text-xs text-muted-foreground"> + {{ + startingExtraction || showOptimisticLiveActivity + ? 'Starting extraction workers. Job events will appear as jobs are claimed and completed.' + : 'No job events yet.' + }} + </div> + <div v-else class="max-h-80 space-y-1 overflow-y-auto pr-1"> + <div + v-for="job in recentJobs" + :key="`recent-${job.jobId}`" + class="rounded-md border bg-muted/10 px-2 py-1.5" + > + <div class="flex flex-wrap items-center justify-between gap-2 text-[11px]"> + <div class="flex flex-wrap items-center gap-2"> + <Badge :variant="recentJobBadgeVariant(job.status)" class="font-mono">{{ job.status }}</Badge> + <span class="font-medium text-foreground">{{ job.jobSet }}</span> + <span class="font-mono text-muted-foreground">{{ job.jobId }}</span> + </div> + <div class="flex flex-wrap items-center gap-2 text-muted-foreground"> + <span v-if="job.workerId" class="font-mono">{{ job.workerId }}</span> + <span>{{ formatRecentWhen(job.startedAt, job.completedAt) }}</span> + <Button + variant="ghost" + size="sm" + class="h-6 px-2 text-[10px]" + @click="openWatch(job.jobId)" + > + <Eye class="mr-1 size-3" /> + Watch + </Button> + <Button + v-if="canCancelJob(job.status)" + variant="ghost" + size="sm" + class="h-6 px-2 text-[10px] text-destructive hover:text-destructive" + :disabled="cancellingJobId === job.jobId" + @click="cancelJob(job.jobId)" + > + <Loader2 v-if="cancellingJobId === job.jobId" class="mr-1 size-3 animate-spin" /> + <XCircle v-else class="mr-1 size-3" /> + Cancel + </Button> + </div> + </div> + <div class="flex flex-wrap items-center gap-2 text-[10px] text-muted-foreground"> + <span class="font-mono"> + tokens {{ formatCompactNumber(job.inputTokens) }} in / {{ formatCompactNumber(job.outputTokens) }} out + </span> + <span class="font-mono"> + writes {{ job.writeOps }} + <template v-if="job.entitiesCreated || job.entitiesModified || job.relationshipsCreated"> + ({{ job.entitiesCreated || 0 }}+{{ job.entitiesModified || 0 }}e / {{ job.relationshipsCreated || 0 }}r) + </template> + </span> + <span v-if="job.instanceCount" class="font-mono">{{ job.instanceCount }} instances</span> + </div> + <p + v-if="job.assistantPreview" + class="line-clamp-1 text-[10px] leading-snug text-muted-foreground" + > + {{ job.assistantPreview }} + </p> + <p v-if="job.errorMessage" class="line-clamp-1 text-[10px] text-destructive"> + {{ job.errorMessage }} + </p> + </div> + </div> </div> </div> </CardContent> </Card> + <GraphExtractionJobWatchDialog + v-model:open="watchDialogOpen" + :kg-id="kgId" + :job-id="watchJobId" + /> + <Card> <CardHeader> <CardTitle class="flex items-center gap-2 text-base"> From 1d91598a9f6d8d7f0a5573cb5eae77cdca0fc663 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 12 Jun 2026 13:23:17 -0400 Subject: [PATCH 123/153] feat(extraction): materialize job workdirs and gate on applied mutations Mirror agentic-ci context/verdict patterns: unpack JobPackages into repository-files (instance path targeting with full fallback), copy helpers/workload-mutations.sh, pre-create writable mutations/, and fail jobs unless mutations/result.json reports operations_applied > 0. Tighten per-instance description skills with Adapter/Resource/ComponentTest counts. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../extraction_jobs_tools.py | 3 +- .../application/skill_resolution_service.py | 19 +- .../observability/extraction_job_probe.py | 50 +++ .../agentic_ci_extraction_job_runner.py | 14 + .../extraction_job_helpers/__init__.py | 7 + .../workload-mutations.sh | 121 ++++++++ .../infrastructure/extraction_job_prompt.py | 52 ++-- .../extraction_job_repository_files.py | 288 ++++++++++++++++++ .../infrastructure/extraction_job_verdict.py | 66 ++++ .../extraction_job_workdir_layout.py | 48 +++ .../extraction_job_workdir_materializer.py | 138 +++++---- .../test_agentic_ci_extraction_job_runner.py | 2 + .../test_extraction_job_repository_files.py | 129 ++++++++ .../test_extraction_job_verdict.py | 73 +++++ ...est_extraction_job_workdir_materializer.py | 135 ++++++++ .../test_extraction_relationship_authoring.py | 18 ++ 16 files changed, 1081 insertions(+), 82 deletions(-) create mode 100644 src/api/extraction/domain/observability/extraction_job_probe.py create mode 100644 src/api/extraction/infrastructure/extraction_job_helpers/__init__.py create mode 100644 src/api/extraction/infrastructure/extraction_job_helpers/workload-mutations.sh create mode 100644 src/api/extraction/infrastructure/extraction_job_repository_files.py create mode 100644 src/api/extraction/infrastructure/extraction_job_verdict.py create mode 100644 src/api/extraction/infrastructure/extraction_job_workdir_layout.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_repository_files.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_verdict.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py diff --git a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py index 77036692b..c2d4dea9c 100644 --- a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py @@ -51,7 +51,8 @@ async def get_extraction_jobs_config(_args: dict[str, Any]) -> dict[str, Any]: "opening capture-everything paragraph, Properties section listing each property, then " "one '{EntityType} -> {rel} -> {CounterpartType}:' line per relationship this entity " "type owns (include only when this entity type has more live instances than the " - "counterpart type)." + "counterpart type). Example: Adapter (19) includes operates_on->Resource (9), " + "excludes verifies/verifies_inverse->ComponentTest (1264)." ), { "version": str, diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index a59b4d44e..1eb4000b6 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -146,8 +146,9 @@ class ResolvedExtractionSkillPack: "Prioritize extraction job set authoring: by_instances batches with required " "per-instance extraction descriptions (no separate extraction_plan.md). " "Each description tells the extraction worker what to enrich for assigned entity slugs. " - "Follow per_instance_description_authoring — default to exhaustive property and " - "relationship coverage unless the operator narrows scope. When the operator approves " + "Follow per_instance_description_authoring — all properties on the target type; " + "relationship lines only where that type has more live instances than the counterpart. " + "When the operator approves " "a proposed configuration, persist it yourself with " "kartograph_save_extraction_jobs_config — never instruct them to manually enter " "values in the extraction-jobs UI." @@ -159,8 +160,15 @@ class ResolvedExtractionSkillPack: "counts. Enumerate every property on the target entity type. For relationships, apply " "the duplicate-work rule by default: when EntityX relates to EntityY, include a " "'{EntityX} -> {relationship_label} -> {EntityY}:' line only if EntityX has MORE live " - "instances than EntityY. Omit that relationship line from EntityY job sets when " - "EntityY has fewer or equal instances — the higher-volume side owns that edge work. " + "instances than EntityY. Omit that relationship line when EntityX has fewer or equal " + "instances — the higher-volume counterpart type's job set owns that edge work. " + "Example (use real counts from entity_types): Adapter (19 instances) vs Resource (9) " + "vs ComponentTest (1264) — an Adapter job set includes " + "'Adapter -> operates_on -> Resource:' (19 > 9) and must NOT include any " + "ComponentTest relationship line such as " + "'Adapter -> verifies_inverse -> ComponentTest:' (19 << 1264; ComponentTest jobs " + "create verifies edges toward adapters). Compare counts numerically before writing " + "each relationship line. " "Write the description using this exact shape (replace with real ontology names — " "never use placeholder EntityX in saved text): " "Opening (one paragraph): 'For each of the instances of {EntityType} you've been " @@ -215,7 +223,8 @@ class ResolvedExtractionSkillPack: "ui_mode_framing": ( "Focus on extraction job set setup: by_instances batches whose description follows " "per_instance_description_authoring (all properties; relationship lines only where " - "this entity type has more instances than the counterpart). Persist via " + "this entity type has more instances than the counterpart — e.g. Adapter includes " + "operates_on->Resource but not verifies->ComponentTest). Persist via " "kartograph_save_extraction_jobs_config when the operator " "approves, then guide them to Run extraction. Use kartograph_get_schema_ontology and " "kartograph_list_instances_by_type to size batches. JobPackage readiness still " diff --git a/src/api/extraction/domain/observability/extraction_job_probe.py b/src/api/extraction/domain/observability/extraction_job_probe.py new file mode 100644 index 000000000..7e5acd1af --- /dev/null +++ b/src/api/extraction/domain/observability/extraction_job_probe.py @@ -0,0 +1,50 @@ +"""Domain observability probes for extraction job execution.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Protocol + + +@dataclass(frozen=True) +class ExtractionJobMaterializationObservation: + job_id: str + knowledge_graph_id: str + files_written: int + packages_requested: int + packages_missing: tuple[str, ...] + paths_requested: tuple[str, ...] + warnings: tuple[str, ...] + + +class ExtractionJobProbe(Protocol): + def repository_files_materialized(self, observation: ExtractionJobMaterializationObservation) -> None: + """Emit when a job workspace repository-files tree is prepared.""" + + +class LoggingExtractionJobProbe: + """Default probe that records materialization outcomes for operators.""" + + def __init__(self, *, sink: Any | None = None) -> None: + import logging + + self._logger = sink or logging.getLogger("kartograph.extraction.jobs") + + def repository_files_materialized(self, observation: ExtractionJobMaterializationObservation) -> None: + if observation.files_written > 0: + self._logger.info( + "extraction_job_repository_files_materialized job_id=%s kg_id=%s files=%s paths_requested=%s", + observation.job_id, + observation.knowledge_graph_id, + observation.files_written, + len(observation.paths_requested), + ) + return + self._logger.warning( + "extraction_job_repository_files_empty job_id=%s kg_id=%s packages_requested=%s packages_missing=%s warnings=%s", + observation.job_id, + observation.knowledge_graph_id, + observation.packages_requested, + list(observation.packages_missing), + list(observation.warnings), + ) diff --git a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py index e521cdf8b..03b084889 100644 --- a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py @@ -32,6 +32,7 @@ from extraction.infrastructure.extraction_job_workdir_materializer import ( ExtractionJobWorkdirMaterializer, ) +from extraction.infrastructure.extraction_job_verdict import require_successful_apply from extraction.infrastructure.vertex_runtime_env import ( build_gcloud_adc_env, build_gcloud_config_bind, @@ -154,6 +155,19 @@ def _run_in_container_sync( raise RuntimeError( f"agentic-ci container exited with code {rc} for job {job.job_id}" ) + verdict = require_successful_apply(workdir) + append_activity_message( + log_path, + kind="done", + text=( + f"Applied {verdict.operations_applied} graph mutation operation(s) " + "via workload API." + ), + ) + metrics = { + **metrics, + "operations_applied": verdict.operations_applied, + } return metrics finally: if otel_proc is not None: diff --git a/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py b/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py new file mode 100644 index 000000000..6680535a0 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py @@ -0,0 +1,7 @@ +"""Bundled helper scripts copied into agentic-ci extraction job workspaces.""" + +from pathlib import Path + +HELPERS_DIR = Path(__file__).resolve().parent +HELPERS_CONTAINER_DIR = "helpers" +HELPER_SCRIPT_NAMES = ("workload-mutations.sh",) diff --git a/src/api/extraction/infrastructure/extraction_job_helpers/workload-mutations.sh b/src/api/extraction/infrastructure/extraction_job_helpers/workload-mutations.sh new file mode 100644 index 000000000..b2facc1db --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_helpers/workload-mutations.sh @@ -0,0 +1,121 @@ +#!/usr/bin/env bash +# Kartograph extraction job helper — validate/apply JSONL via workload API. +# Writes mutations/result.json (agentic-ci verdict artifact). +# +# Usage: +# helpers/workload-mutations.sh validate mutations/batch.jsonl +# helpers/workload-mutations.sh apply mutations/batch.jsonl +set -euo pipefail + +ACTION="${1:-}" +JSONL_PATH="${2:-}" +WORKDIR="${KARTOGRAPH_WORKSPACE:-/workspace}" + +python3 - "${ACTION}" "${JSONL_PATH}" "${WORKDIR}" <<'PY' +import json +import sys +import urllib.error +import urllib.request +from pathlib import Path + +action, jsonl_path, workdir = sys.argv[1:4] +workdir_path = Path(workdir) +context_path = workdir_path / "job-context.json" +result_path = workdir_path / "mutations" / "result.json" + + +def write_result(payload: dict) -> None: + result_path.parent.mkdir(parents=True, exist_ok=True) + result_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + json.dump(payload, sys.stdout, indent=2) + print() + + +def fail(message: str, *, http_status: int | None = None) -> None: + payload = { + "action": action, + "applied": False, + "operations_applied": 0, + "valid": False, + "operation_count": 0, + "errors": [message], + "http_status": http_status, + } + write_result(payload) + raise SystemExit(1) + + +if action not in {"validate", "apply"}: + fail("first argument must be validate or apply") +jsonl_file = Path(jsonl_path) +if not jsonl_file.is_file(): + fail(f"JSONL file not found: {jsonl_path}") +if not context_path.is_file(): + fail("missing job-context.json in workspace") + +context = json.loads(context_path.read_text(encoding="utf-8")) +api_base = str(context["api_base_url"]).rstrip("/") +token = str(context["workload_token"]) +jsonl = jsonl_file.read_text(encoding="utf-8") +endpoint = f"{api_base}/extraction/workloads/mutations/{action}" +body = json.dumps({"jsonl": jsonl}).encode("utf-8") +request = urllib.request.Request( + endpoint, + data=body, + method="POST", + headers={ + "Content-Type": "application/json", + "X-Workload-Token": token, + }, +) + +try: + with urllib.request.urlopen(request, timeout=600) as response: + http_status = response.status + payload = json.loads(response.read().decode("utf-8")) +except urllib.error.HTTPError as exc: + http_status = exc.code + try: + payload = json.loads(exc.read().decode("utf-8")) + except json.JSONDecodeError: + fail(f"workload API returned HTTP {http_status}", http_status=http_status) + errors = payload.get("detail") or payload.get("errors") or [str(payload)] + if isinstance(errors, str): + errors = [errors] + result = { + "action": action, + "applied": False, + "operations_applied": 0, + "valid": False, + "operation_count": 0, + "errors": [str(item) for item in errors], + "http_status": http_status, + } + write_result(result) + raise SystemExit(1) +except urllib.error.URLError as exc: + fail(f"workload API request failed: {exc.reason}") + +if action == "validate": + result = { + "action": "validate", + "valid": bool(payload.get("valid")), + "operation_count": int(payload.get("operation_count") or 0), + "errors": [str(item) for item in payload.get("errors") or []], + "http_status": http_status, + } +else: + operations_applied = int(payload.get("operations_applied") or 0) + result = { + "action": "apply", + "applied": bool(payload.get("applied")), + "operations_applied": operations_applied, + "errors": [str(item) for item in payload.get("errors") or []], + "http_status": http_status, + } + if not result["applied"] or operations_applied <= 0: + write_result(result) + raise SystemExit(1) + +write_result(result) +PY diff --git a/src/api/extraction/infrastructure/extraction_job_prompt.py b/src/api/extraction/infrastructure/extraction_job_prompt.py index 3ed751920..33e3dae59 100644 --- a/src/api/extraction/infrastructure/extraction_job_prompt.py +++ b/src/api/extraction/infrastructure/extraction_job_prompt.py @@ -7,12 +7,15 @@ from extraction.domain.extraction_job import ExtractionJobRecord EXTRACTION_PROMPT_FILENAME = "extraction_prompt.md" +MUTATIONS_HELPER = "helpers/workload-mutations.sh" EXTRACTION_JOB_INVOKE_PROMPT = ( "You are running a Kartograph extraction job in /workspace. " - f"Read {EXTRACTION_PROMPT_FILENAME} and job-context.json, then follow the instructions " - "completely. Use the workload API credentials in job-context.json to apply all required " - "graph mutations before you finish." + f"Read {EXTRACTION_PROMPT_FILENAME}, job-context.json, and sources-index.json, then follow " + "the instructions completely. Write JSONL batches under mutations/, validate with " + f"`bash {MUTATIONS_HELPER} validate mutations/<batch>.jsonl`, then apply with " + f"`bash {MUTATIONS_HELPER} apply mutations/<batch>.jsonl`. Do not finish until apply " + "succeeds and mutations/result.json reports operations_applied > 0." ) @@ -27,7 +30,8 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: """Return the agent prompt for one materialized extraction job.""" lines = [ "You are an extraction agent for Kartograph, a knowledge graph platform.", - "Read job-context.json in the workspace for API credentials and scope.", + "Read job-context.json and sources-index.json in the workspace for API credentials,", + "JobPackage sources, and repository-files materialization status.", "", "## Job instructions", job.description.strip() or "Extract graph entities and relationships for the assigned targets.", @@ -42,7 +46,9 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: "Treat partial coverage as incomplete unless the job instructions below narrow scope.", "", "## Target entity instances", - "Process only the instances listed below. Use the workload API to read existing graph", + "Process only the instances listed below. Read source files under repository-files/", + "when materialized (see job-context.json repository_files and instance property paths", + "such as config_file_path or source_path). Use the workload API to read existing graph", "context and emit JSONL mutations for new or updated entities and relationships.", "", ] @@ -66,26 +72,32 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: lines.append("") lines.extend( [ - "## Workload API", - "This container has no Kartograph MCP tools. Call the workload HTTP API with Bash/curl.", - "Read api_base_url and workload_token from job-context.json.", - "Send header `X-Workload-Token: <workload_token>` on every request.", + "## Repository files", + "If job-context.json repository_files.files_written is 0, report the warnings there", + "and still apply any updates supported by graph context — but prefer reading", + "repository-files/ content whenever sample_paths are listed.", "", - "Base path: `{api_base_url}/extraction/workloads`", + "## Mutations workflow (required)", + "This container has no Kartograph MCP tools. Use the bundled helper script:", + f"- Validate: `bash {MUTATIONS_HELPER} validate mutations/<batch>.jsonl`", + f"- Apply: `bash {MUTATIONS_HELPER} apply mutations/<batch>.jsonl`", + "The helper reads api_base_url and workload_token from job-context.json, calls the", + "workload API, and writes mutations/result.json (the CI verdict artifact).", + "Always validate before apply. Do not finish until apply succeeds.", "", - "Useful endpoints:", - "- GET `/schema/authoring-guide` — JSONL mutation shapes and rules", - "- GET `/schema/ontology` — current graph schema", - "- GET `/graph/search?q=...` — search existing nodes", - "- GET `/graph/instances?entity_type=...` — list instances by type", - "- POST `/mutations/validate` with body `{\"jsonl\": \"...\"}` — dry-run", - "- POST `/mutations/apply` with body `{\"jsonl\": \"...\"}` — apply mutations", + "Manual curl (only if helper fails): base `{api_base_url}/extraction/workloads`,", + "header `X-Workload-Token: <workload_token>`, POST `/mutations/validate` or", + "`/mutations/apply` with JSON body `{\"jsonl\": \"<file contents>\"}`.", "", - "Write `.jsonl` files in the workspace when batches are large. Validate before apply.", + "Other useful GET endpoints:", + "- `/schema/authoring-guide` — JSONL mutation shapes and rules", + "- `/schema/ontology` — current graph schema", + "- `/graph/search?q=...` — search existing nodes", + "- `/graph/instances?entity_type=...` — list instances by type", "", "## Completion", - "When finished, ensure all required mutations are applied through the workload API.", - "Do not modify files outside repository-files/.", + "When finished, mutations/result.json must show action=apply and operations_applied > 0.", + "Do not modify files outside repository-files/ except mutations/ and helpers/.", ] ) return "\n".join(lines) diff --git a/src/api/extraction/infrastructure/extraction_job_repository_files.py b/src/api/extraction/infrastructure/extraction_job_repository_files.py new file mode 100644 index 000000000..6e58e001c --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_repository_files.py @@ -0,0 +1,288 @@ +"""Materialize repository-files from prepared JobPackages for extraction jobs.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from extraction.domain.extraction_job import ExtractionTargetFile, ExtractionTargetInstance +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from shared_kernel.job_package.path_safety import validate_zip_entry_name +from shared_kernel.job_package.reader import JobPackageReader +from shared_kernel.job_package.value_objects import JobPackageId + +_INSTANCE_PATH_PROPERTY_KEYS = ( + "config_file_path", + "source_path", + "file_path", + "repository_path", +) + + +@dataclass(frozen=True) +class RepositoryFilesMaterializationResult: + """Outcome of unpacking JobPackage archives into repository-files/.""" + + files_written: int = 0 + packages_requested: int = 0 + packages_found: int = 0 + packages_missing: tuple[str, ...] = field(default_factory=tuple) + paths_requested: tuple[str, ...] = field(default_factory=tuple) + paths_not_found: tuple[str, ...] = field(default_factory=tuple) + sample_paths: tuple[str, ...] = field(default_factory=tuple) + warnings: tuple[str, ...] = field(default_factory=tuple) + + def merge(self, other: RepositoryFilesMaterializationResult) -> RepositoryFilesMaterializationResult: + combined_samples = self.sample_paths + other.sample_paths + return RepositoryFilesMaterializationResult( + files_written=self.files_written + other.files_written, + packages_requested=max(self.packages_requested, other.packages_requested), + packages_found=self.packages_found + other.packages_found, + packages_missing=tuple(dict.fromkeys(self.packages_missing + other.packages_missing)), + paths_requested=tuple(dict.fromkeys(self.paths_requested + other.paths_requested)), + paths_not_found=tuple(dict.fromkeys(self.paths_not_found + other.paths_not_found)), + sample_paths=tuple(dict.fromkeys(combined_samples))[:12], + warnings=tuple(dict.fromkeys(self.warnings + other.warnings)), + ) + + def to_dict(self) -> dict[str, Any]: + return { + "files_written": self.files_written, + "packages_requested": self.packages_requested, + "packages_found": self.packages_found, + "packages_missing": list(self.packages_missing), + "paths_requested": list(self.paths_requested), + "paths_not_found": list(self.paths_not_found), + "sample_paths": list(self.sample_paths), + "warnings": list(self.warnings), + } + + +def collect_instance_repository_paths( + instances: tuple[ExtractionTargetInstance, ...], +) -> tuple[str, ...]: + """Return repository-relative paths referenced by assigned instance properties.""" + paths: list[str] = [] + seen: set[str] = set() + for instance in instances: + for key, raw_value in instance.properties.items(): + if raw_value in (None, ""): + continue + candidates: list[str] = [] + if key in _INSTANCE_PATH_PROPERTY_KEYS or key.endswith("_path"): + candidates.append(str(raw_value).strip()) + for candidate in candidates: + normalized = _normalize_repository_path(candidate) + if not normalized or normalized in seen: + continue + seen.add(normalized) + paths.append(normalized) + return tuple(sorted(paths)) + + +def _normalize_repository_path(path: str) -> str: + cleaned = path.strip().replace("\\", "/") + while cleaned.startswith("./"): + cleaned = cleaned[2:] + return cleaned.lstrip("/") + + +def _path_matches(requested: str, candidate: str) -> bool: + normalized_requested = _normalize_repository_path(requested) + normalized_candidate = _normalize_repository_path(candidate) + return ( + normalized_candidate == normalized_requested + or normalized_candidate.endswith(f"/{normalized_requested}") + or normalized_requested.endswith(normalized_candidate) + ) + + +def materialize_all_repository_files( + *, + repository_files_dir: Path, + job_package_work_dir: Path, + job_packages: tuple[PreparedJobPackageSource, ...], +) -> RepositoryFilesMaterializationResult: + """Unpack every JobPackage changeset entry into repository-files/.""" + files_written = 0 + packages_found = 0 + packages_missing: list[str] = [] + sample_paths: list[str] = [] + + for source in job_packages: + archive_path = job_package_work_dir / JobPackageId(value=source.package_id).archive_name() + if not archive_path.is_file(): + packages_missing.append(source.package_id) + continue + reader = JobPackageReader(archive_path) + manifest = reader.read_manifest() + if manifest.entry_count <= 0: + continue + packages_found += 1 + for change in reader.iter_changeset(): + if change.content_ref is None or not change.path: + continue + validate_zip_entry_name(change.path) + output_path = repository_files_dir / source.repository_folder / change.path + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(reader.read_content(change.content_ref)) + files_written += 1 + if len(sample_paths) < 12: + sample_paths.append(f"{source.repository_folder}/{change.path}") + + warnings: list[str] = [] + if job_packages and files_written == 0: + if packages_missing: + warnings.append( + "No JobPackage archives found on disk for: " + + ", ".join(packages_missing[:5]) + + ("..." if len(packages_missing) > 5 else "") + ) + else: + warnings.append("JobPackages exist but no repository file content was materialized.") + + return RepositoryFilesMaterializationResult( + files_written=files_written, + packages_requested=len(job_packages), + packages_found=packages_found, + packages_missing=tuple(packages_missing), + sample_paths=tuple(sample_paths), + warnings=tuple(warnings), + ) + + +def materialize_instance_repository_paths( + *, + repository_files_dir: Path, + job_package_work_dir: Path, + job_packages: tuple[PreparedJobPackageSource, ...], + paths: tuple[str, ...], +) -> RepositoryFilesMaterializationResult: + """Materialize only repository paths referenced by assigned entity instances.""" + if not paths: + return RepositoryFilesMaterializationResult() + + files_written = 0 + packages_found = 0 + packages_missing: list[str] = [] + paths_not_found = set(paths) + sample_paths: list[str] = [] + + for source in job_packages: + archive_path = job_package_work_dir / JobPackageId(value=source.package_id).archive_name() + if not archive_path.is_file(): + packages_missing.append(source.package_id) + continue + reader = JobPackageReader(archive_path) + packages_found += 1 + for change in reader.iter_changeset(): + if change.content_ref is None or not change.path: + continue + matched = next( + (requested for requested in paths if _path_matches(requested, str(change.path))), + None, + ) + if matched is None: + continue + validate_zip_entry_name(change.path) + output_path = repository_files_dir / source.repository_folder / change.path + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(reader.read_content(change.content_ref)) + files_written += 1 + paths_not_found.discard(matched) + if len(sample_paths) < 12: + sample_paths.append(f"{source.repository_folder}/{change.path}") + + warnings: list[str] = [] + if paths_not_found: + warnings.append( + "Instance-referenced paths not found in JobPackages: " + + ", ".join(sorted(paths_not_found)[:8]) + + ("..." if len(paths_not_found) > 8 else "") + ) + + return RepositoryFilesMaterializationResult( + files_written=files_written, + packages_requested=len(job_packages), + packages_found=packages_found, + packages_missing=tuple(packages_missing), + paths_requested=paths, + paths_not_found=tuple(sorted(paths_not_found)), + sample_paths=tuple(sample_paths), + warnings=tuple(warnings), + ) + + +def materialize_target_files( + *, + repository_files_dir: Path, + job_package_work_dir: Path, + target_files: tuple[ExtractionTargetFile, ...], + packages_by_id: dict[str, PreparedJobPackageSource], +) -> RepositoryFilesMaterializationResult: + files_written = 0 + packages_missing: list[str] = [] + sample_paths: list[str] = [] + + for target_file in target_files: + source = packages_by_id.get(target_file.package_id) + if source is None: + continue + archive_path = job_package_work_dir / JobPackageId(value=source.package_id).archive_name() + if not archive_path.is_file(): + packages_missing.append(target_file.package_id) + continue + reader = JobPackageReader(archive_path) + for change in reader.iter_changeset(): + if change.path != target_file.path or change.content_ref is None: + continue + validate_zip_entry_name(change.path) + output_path = repository_files_dir / source.repository_folder / change.path + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_bytes(reader.read_content(change.content_ref)) + files_written += 1 + sample_paths.append(f"{source.repository_folder}/{change.path}") + break + + return RepositoryFilesMaterializationResult( + files_written=files_written, + packages_requested=len({target_file.package_id for target_file in target_files}), + packages_found=len({target_file.package_id for target_file in target_files}) - len(packages_missing), + packages_missing=tuple(packages_missing), + sample_paths=tuple(sample_paths), + ) + + +def write_sources_index( + *, + job_root: Path, + knowledge_graph_id: str, + job_packages: tuple[PreparedJobPackageSource, ...], + materialization: RepositoryFilesMaterializationResult, +) -> None: + """Write sources-index.json (agentic-ci context file) for the job workspace.""" + sources = [ + { + "job_package_id": source.package_id, + "data_source_id": source.data_source_id, + "data_source_name": source.data_source_name, + "repository_folder": source.repository_folder, + "repository_root": f"repository-files/{source.repository_folder}", + } + for source in job_packages + ] + (job_root / "sources-index.json").write_text( + json.dumps( + { + "version": 1, + "knowledge_graph_id": knowledge_graph_id, + "sources": sources, + "materialization": materialization.to_dict(), + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) diff --git a/src/api/extraction/infrastructure/extraction_job_verdict.py b/src/api/extraction/infrastructure/extraction_job_verdict.py new file mode 100644 index 000000000..59f2af820 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_verdict.py @@ -0,0 +1,66 @@ +"""Load post-run mutation verdict artifacts from agentic-ci workspaces.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from extraction.infrastructure.extraction_job_workdir_layout import mutation_result_path + + +@dataclass(frozen=True) +class ExtractionMutationVerdict: + """Structured outcome written by helpers/workload-mutations.sh apply.""" + + action: str + applied: bool + operations_applied: int + errors: tuple[str, ...] + http_status: int | None = None + valid: bool | None = None + operation_count: int | None = None + + @classmethod + def from_dict(cls, payload: dict[str, Any]) -> ExtractionMutationVerdict: + return cls( + action=str(payload.get("action") or ""), + applied=bool(payload.get("applied")), + operations_applied=int(payload.get("operations_applied") or 0), + errors=tuple(str(item) for item in payload.get("errors") or []), + http_status=int(payload["http_status"]) if payload.get("http_status") is not None else None, + valid=bool(payload["valid"]) if "valid" in payload else None, + operation_count=int(payload["operation_count"]) + if payload.get("operation_count") is not None + else None, + ) + + +def load_mutation_verdict(job_root: Path) -> ExtractionMutationVerdict | None: + path = mutation_result_path(job_root) + if not path.is_file(): + return None + payload = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(payload, dict): + return None + return ExtractionMutationVerdict.from_dict(payload) + + +def require_successful_apply(job_root: Path) -> ExtractionMutationVerdict: + """Post-agent gate: extraction jobs must apply at least one mutation.""" + verdict = load_mutation_verdict(job_root) + if verdict is None: + raise RuntimeError( + "Extraction job finished without mutations/result.json. " + "Run helpers/workload-mutations.sh apply on your JSONL batch before finishing." + ) + if verdict.action != "apply": + raise RuntimeError( + f"Extraction job wrote mutations/result.json for action '{verdict.action}' " + "but apply is required." + ) + if not verdict.applied or verdict.operations_applied <= 0: + detail = "; ".join(verdict.errors) or "operations_applied is 0" + raise RuntimeError(f"Extraction job applied no graph mutations: {detail}") + return verdict diff --git a/src/api/extraction/infrastructure/extraction_job_workdir_layout.py b/src/api/extraction/infrastructure/extraction_job_workdir_layout.py new file mode 100644 index 000000000..adf65f1b1 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_workdir_layout.py @@ -0,0 +1,48 @@ +"""Agentic-ci workspace layout for extraction job containers.""" + +from __future__ import annotations + +import shutil +from pathlib import Path + +from extraction.infrastructure.extraction_job_helpers import ( + HELPER_SCRIPT_NAMES, + HELPERS_CONTAINER_DIR, + HELPERS_DIR, +) +from extraction.infrastructure.sticky_session_workspace_permissions import ( + ensure_agent_workspace_permissions, +) + +MUTATIONS_DIRNAME = "mutations" +MUTATION_RESULT_FILENAME = "result.json" + + +def prepare_agentic_ci_workspace( + job_root: Path, + *, + container_run_uid: int | None, + container_run_gid: int | None, +) -> None: + """Create writable agent artifacts and copy bundled helpers (context_writer pattern).""" + mutations_dir = job_root / MUTATIONS_DIRNAME + mutations_dir.mkdir(parents=True, exist_ok=True) + + helpers_dir = job_root / HELPERS_CONTAINER_DIR + helpers_dir.mkdir(parents=True, exist_ok=True) + for name in HELPER_SCRIPT_NAMES: + source = HELPERS_DIR / name + if source.is_file(): + target = helpers_dir / name + shutil.copy2(source, target) + target.chmod(target.stat().st_mode | 0o111) + + ensure_agent_workspace_permissions( + job_root, + container_run_uid=container_run_uid, + container_run_gid=container_run_gid, + ) + + +def mutation_result_path(job_root: Path) -> Path: + return job_root / MUTATIONS_DIRNAME / MUTATION_RESULT_FILENAME diff --git a/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py index 1213bf95c..835ed8cce 100644 --- a/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py +++ b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py @@ -6,14 +6,25 @@ import shutil from pathlib import Path -from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionTargetFile +from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.domain.observability.extraction_job_probe import ( + ExtractionJobMaterializationObservation, + ExtractionJobProbe, + LoggingExtractionJobProbe, +) from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from extraction.infrastructure.extraction_job_repository_files import ( + RepositoryFilesMaterializationResult, + collect_instance_repository_paths, + materialize_all_repository_files, + materialize_instance_repository_paths, + materialize_target_files, + write_sources_index, +) +from extraction.infrastructure.extraction_job_workdir_layout import prepare_agentic_ci_workspace from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings from extraction.ports.runtime import ScopedWorkloadCredentials -from shared_kernel.job_package.path_safety import validate_zip_entry_name -from shared_kernel.job_package.reader import JobPackageReader -from shared_kernel.job_package.value_objects import JobPackageId class ExtractionJobWorkdirMaterializer: @@ -24,10 +35,12 @@ def __init__( *, settings: ExtractionWorkloadRuntimeSettings, prepared_job_package_reader: SqlPreparedJobPackageReader, + probe: ExtractionJobProbe | None = None, ) -> None: self._settings = settings self._prepared_job_package_reader = prepared_job_package_reader self._job_package_work_dir = Path(settings.job_package_work_dir) + self._probe = probe or LoggingExtractionJobProbe() async def prepare( self, @@ -39,6 +52,7 @@ async def prepare( job_root = Path(self._settings.extraction_job_work_dir) / job.knowledge_graph_id / job.job_id if job_root.exists(): shutil.rmtree(job_root) + job_root.mkdir(parents=True, exist_ok=True) repository_files_dir = job_root / "repository-files" repository_files_dir.mkdir(parents=True, exist_ok=True) @@ -46,17 +60,34 @@ async def prepare( knowledge_graph_id=job.knowledge_graph_id, ) packages_by_id = {source.package_id: source for source in job_packages} - if job.target_files: - self._materialize_target_files( - repository_files_dir=repository_files_dir, - target_files=job.target_files, - packages_by_id=packages_by_id, - ) - else: - self._materialize_all_repository_files( - repository_files_dir=repository_files_dir, - job_packages=job_packages, + materialization = self._materialize_repository_files( + job=job, + repository_files_dir=repository_files_dir, + job_packages=job_packages, + packages_by_id=packages_by_id, + ) + write_sources_index( + job_root=job_root, + knowledge_graph_id=job.knowledge_graph_id, + job_packages=job_packages, + materialization=materialization, + ) + prepare_agentic_ci_workspace( + job_root, + container_run_uid=self._settings.container_run_uid, + container_run_gid=self._settings.container_run_gid, + ) + self._probe.repository_files_materialized( + ExtractionJobMaterializationObservation( + job_id=job.job_id, + knowledge_graph_id=job.knowledge_graph_id, + files_written=materialization.files_written, + packages_requested=materialization.packages_requested, + packages_missing=materialization.packages_missing, + paths_requested=materialization.paths_requested, + warnings=materialization.warnings, ) + ) context = { "tenant_id": tenant_id, @@ -69,56 +100,51 @@ async def prepare( "workload_token": credentials.token, "target_instances": [instance.to_dict() for instance in job.target_instances], "target_files": [target_file.to_dict() for target_file in job.target_files], + "repository_files": materialization.to_dict(), } (job_root / "job-context.json").write_text( - json.dumps(context, indent=2), + json.dumps(context, indent=2) + "\n", encoding="utf-8", ) return job_root - def _materialize_all_repository_files( + def _materialize_repository_files( self, *, + job: ExtractionJobRecord, repository_files_dir: Path, job_packages: tuple[PreparedJobPackageSource, ...], - ) -> None: - for source in job_packages: - archive_path = self._job_package_work_dir / JobPackageId( - value=source.package_id - ).archive_name() - if not archive_path.is_file(): - continue - reader = JobPackageReader(archive_path) - for change in reader.iter_changeset(): - if change.content_ref is None or not change.path: - continue - validate_zip_entry_name(change.path) - output_path = repository_files_dir / source.repository_folder / change.path - output_path.parent.mkdir(parents=True, exist_ok=True) - output_path.write_bytes(reader.read_content(change.content_ref)) - - def _materialize_target_files( - self, - *, - repository_files_dir: Path, - target_files: tuple[ExtractionTargetFile, ...], packages_by_id: dict[str, PreparedJobPackageSource], - ) -> None: - for target_file in target_files: - source = packages_by_id.get(target_file.package_id) - if source is None: - continue - archive_path = self._job_package_work_dir / JobPackageId( - value=source.package_id - ).archive_name() - if not archive_path.is_file(): - continue - reader = JobPackageReader(archive_path) - for change in reader.iter_changeset(): - if change.path != target_file.path or change.content_ref is None: - continue - validate_zip_entry_name(change.path) - output_path = repository_files_dir / source.repository_folder / change.path - output_path.parent.mkdir(parents=True, exist_ok=True) - output_path.write_bytes(reader.read_content(change.content_ref)) - break + ) -> RepositoryFilesMaterializationResult: + if job.target_files: + return materialize_target_files( + repository_files_dir=repository_files_dir, + job_package_work_dir=self._job_package_work_dir, + target_files=job.target_files, + packages_by_id=packages_by_id, + ) + + materialization = RepositoryFilesMaterializationResult() + if job.target_instances: + instance_paths = collect_instance_repository_paths(job.target_instances) + if instance_paths: + materialization = materialize_instance_repository_paths( + repository_files_dir=repository_files_dir, + job_package_work_dir=self._job_package_work_dir, + job_packages=job_packages, + paths=instance_paths, + ) + if materialization.files_written == 0: + fallback = materialize_all_repository_files( + repository_files_dir=repository_files_dir, + job_package_work_dir=self._job_package_work_dir, + job_packages=job_packages, + ) + materialization = materialization.merge(fallback) + return materialization + + return materialize_all_repository_files( + repository_files_dir=repository_files_dir, + job_package_work_dir=self._job_package_work_dir, + job_packages=job_packages, + ) diff --git a/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py b/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py index 37f03e233..b4c28dc43 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py +++ b/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py @@ -44,6 +44,8 @@ def test_write_extraction_prompt_file_materializes_instructions(tmp_path: Path) def test_extraction_job_invoke_prompt_references_materialized_file() -> None: assert "extraction_prompt.md" in EXTRACTION_JOB_INVOKE_PROMPT assert "job-context.json" in EXTRACTION_JOB_INVOKE_PROMPT + assert "helpers/workload-mutations.sh" in EXTRACTION_JOB_INVOKE_PROMPT + assert "mutations/result.json" in EXTRACTION_JOB_INVOKE_PROMPT def test_patch_job_context_api_base_rewrites_host_reachable_url(tmp_path: Path) -> None: diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_repository_files.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_repository_files.py new file mode 100644 index 000000000..2dff57187 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_repository_files.py @@ -0,0 +1,129 @@ +"""Unit tests for extraction job repository-files materialization.""" + +from __future__ import annotations + +from pathlib import Path + +from extraction.domain.extraction_job import ExtractionTargetInstance +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from extraction.infrastructure.extraction_job_repository_files import ( + collect_instance_repository_paths, + materialize_all_repository_files, + materialize_instance_repository_paths, +) +from shared_kernel.job_package.builder import JobPackageBuilder +from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, + ChangeOperation, + ChangesetEntry, + ContentRef, + JobPackageId, + SyncMode, +) + + +def _source(*, package_id: str) -> PreparedJobPackageSource: + return PreparedJobPackageSource( + package_id=package_id, + data_source_id="ds-1", + data_source_name="hyperfleet-e2e", + repository_folder="hyperfleet-e2e", + ) + + +def _build_package(work_dir: Path, package_id: str, path: str, content: bytes) -> None: + builder = JobPackageBuilder( + data_source_id="ds-1", + knowledge_graph_id="kg-1", + sync_mode=SyncMode.FULL_REFRESH, + package_id=JobPackageId(value=package_id), + ) + ref = builder.add_content(content) + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id="file-1", + type="io.kartograph.change.file", + path=path, + content_ref=ref, + content_type="text/plain", + metadata={}, + ) + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "abc"})) + builder.build(work_dir) + + +def test_collect_instance_repository_paths_reads_config_and_source_paths() -> None: + paths = collect_instance_repository_paths( + ( + ExtractionTargetInstance( + slug="adapter-a", + entity_type="Adapter", + properties={ + "config_file_path": "testdata/adapter-configs/cl-stuck/adapter-config.yaml", + "source_path": "pkg/internal/foo.go", + }, + ), + ) + ) + + assert "testdata/adapter-configs/cl-stuck/adapter-config.yaml" in paths + assert "pkg/internal/foo.go" in paths + + +def test_materialize_all_repository_files_writes_changeset(tmp_path: Path) -> None: + package_id = "01JTESTPACK0000000000000000" + _build_package( + tmp_path, + package_id, + "testdata/adapter-configs/cl-stuck/adapter-config.yaml", + b"adapter: stuck\n", + ) + repo_dir = tmp_path / "repository-files" + + result = materialize_all_repository_files( + repository_files_dir=repo_dir, + job_package_work_dir=tmp_path, + job_packages=(_source(package_id=package_id),), + ) + + output = repo_dir / "hyperfleet-e2e" / "testdata/adapter-configs/cl-stuck/adapter-config.yaml" + assert result.files_written == 1 + assert output.read_text(encoding="utf-8") == "adapter: stuck\n" + + +def test_materialize_all_repository_files_warns_when_archives_missing(tmp_path: Path) -> None: + result = materialize_all_repository_files( + repository_files_dir=tmp_path / "repository-files", + job_package_work_dir=tmp_path, + job_packages=(_source(package_id="01JMISSING0000000000000000"),), + ) + + assert result.files_written == 0 + assert result.packages_missing == ("01JMISSING0000000000000000",) + assert any("No JobPackage archives found" in warning for warning in result.warnings) + + +def test_materialize_instance_repository_paths_targets_referenced_files(tmp_path: Path) -> None: + package_id = "01JTESTPACK0000000000000001" + _build_package( + tmp_path, + package_id, + "testdata/adapter-configs/cl-stuck/adapter-config.yaml", + b"adapter: stuck\n", + ) + repo_dir = tmp_path / "repository-files" + + result = materialize_instance_repository_paths( + repository_files_dir=repo_dir, + job_package_work_dir=tmp_path, + job_packages=(_source(package_id=package_id),), + paths=("testdata/adapter-configs/cl-stuck/adapter-config.yaml",), + ) + + assert result.files_written == 1 + assert result.paths_not_found == () + assert ( + repo_dir / "hyperfleet-e2e" / "testdata/adapter-configs/cl-stuck/adapter-config.yaml" + ).is_file() diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_verdict.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_verdict.py new file mode 100644 index 000000000..1882bc566 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_verdict.py @@ -0,0 +1,73 @@ +"""Unit tests for extraction job mutation verdict post-gates.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from extraction.infrastructure.extraction_job_verdict import ( + load_mutation_verdict, + require_successful_apply, +) + + +def test_require_successful_apply_accepts_apply_verdict(tmp_path: Path) -> None: + result_path = tmp_path / "mutations" / "result.json" + result_path.parent.mkdir(parents=True) + result_path.write_text( + json.dumps( + { + "action": "apply", + "applied": True, + "operations_applied": 3, + "errors": [], + "http_status": 200, + } + ), + encoding="utf-8", + ) + + verdict = require_successful_apply(tmp_path) + + assert verdict.operations_applied == 3 + + +def test_require_successful_apply_rejects_missing_verdict(tmp_path: Path) -> None: + with pytest.raises(RuntimeError, match="mutations/result.json"): + require_successful_apply(tmp_path) + + +def test_require_successful_apply_rejects_zero_operations(tmp_path: Path) -> None: + result_path = tmp_path / "mutations" / "result.json" + result_path.parent.mkdir(parents=True) + result_path.write_text( + json.dumps( + { + "action": "apply", + "applied": True, + "operations_applied": 0, + "errors": [], + } + ), + encoding="utf-8", + ) + + with pytest.raises(RuntimeError, match="applied no graph mutations"): + require_successful_apply(tmp_path) + + +def test_load_mutation_verdict_parses_payload(tmp_path: Path) -> None: + result_path = tmp_path / "mutations" / "result.json" + result_path.parent.mkdir(parents=True) + result_path.write_text( + json.dumps({"action": "validate", "valid": True, "operation_count": 2, "errors": []}), + encoding="utf-8", + ) + + verdict = load_mutation_verdict(tmp_path) + + assert verdict is not None + assert verdict.valid is True + assert verdict.operation_count == 2 diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py new file mode 100644 index 000000000..02388f7a0 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py @@ -0,0 +1,135 @@ +"""Unit tests for extraction job workdir materialization.""" + +from __future__ import annotations + +import json +from pathlib import Path +from unittest.mock import AsyncMock + +from datetime import UTC, datetime + +import pytest + +from extraction.domain.extraction_job import ( + ExtractionJobRecord, + ExtractionJobStatus, + ExtractionTargetInstance, +) +from extraction.domain.observability.extraction_job_probe import ExtractionJobMaterializationObservation +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from extraction.infrastructure.extraction_job_workdir_materializer import ExtractionJobWorkdirMaterializer +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings +from extraction.ports.runtime import ScopedWorkloadCredentials +from shared_kernel.job_package.builder import JobPackageBuilder +from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, + ChangeOperation, + ChangesetEntry, + ContentRef, + JobPackageId, + SyncMode, +) + + +class _RecordingProbe: + def __init__(self) -> None: + self.observations: list[ExtractionJobMaterializationObservation] = [] + + def repository_files_materialized(self, observation: ExtractionJobMaterializationObservation) -> None: + self.observations.append(observation) + + +def _build_package(work_dir: Path, package_id: str, path: str, content: bytes) -> None: + builder = JobPackageBuilder( + data_source_id="ds-1", + knowledge_graph_id="kg-1", + sync_mode=SyncMode.FULL_REFRESH, + package_id=JobPackageId(value=package_id), + ) + ref = builder.add_content(content) + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id="file-1", + type="io.kartograph.change.file", + path=path, + content_ref=ref, + content_type="text/plain", + metadata={}, + ) + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "abc"})) + builder.build(work_dir) + + +@pytest.mark.asyncio +async def test_prepare_materializes_instance_referenced_paths_and_workspace_layout(tmp_path: Path) -> None: + package_id = "01JTESTPACK0000000000000002" + job_packages_dir = tmp_path / "packages" + job_packages_dir.mkdir() + extraction_jobs_dir = tmp_path / "extraction_jobs" + _build_package( + job_packages_dir, + package_id, + "testdata/adapter-configs/cl-stuck/adapter-config.yaml", + b"adapter: stuck\n", + ) + package = PreparedJobPackageSource( + package_id=package_id, + data_source_id="ds-1", + data_source_name="hyperfleet-e2e", + repository_folder="hyperfleet-e2e", + ) + reader = AsyncMock() + reader.list_latest_for_knowledge_graph = AsyncMock(return_value=(package,)) + probe = _RecordingProbe() + materializer = ExtractionJobWorkdirMaterializer( + settings=ExtractionWorkloadRuntimeSettings( + extraction_job_work_dir=str(extraction_jobs_dir), + job_package_work_dir=str(job_packages_dir), + ), + prepared_job_package_reader=reader, + probe=probe, + ) + job = ExtractionJobRecord( + id="job-row", + knowledge_graph_id="kg-1", + job_id="Adapter Deep Extraction_batch_0001_abcd1234", + job_set_name="Adapter Deep Extraction", + strategy="by_instances", + status=ExtractionJobStatus.PENDING, + order_index=0, + description="Extract adapter details.", + target_instances=( + ExtractionTargetInstance( + slug="hyperfleet_e2e_cl_stuck", + entity_type="Adapter", + properties={ + "config_file_path": "testdata/adapter-configs/cl-stuck/adapter-config.yaml", + }, + ), + ), + ) + + job_root = await materializer.prepare( + job=job, + tenant_id="tenant-1", + credentials=ScopedWorkloadCredentials( + token="tok", + expires_at=datetime.now(UTC), + scopes=("workload:chat",), + ), + ) + + repo_file = ( + job_root + / "repository-files" + / "hyperfleet-e2e" + / "testdata/adapter-configs/cl-stuck/adapter-config.yaml" + ) + assert repo_file.is_file() + assert (job_root / "mutations").is_dir() + assert (job_root / "helpers" / "workload-mutations.sh").is_file() + context = json.loads((job_root / "job-context.json").read_text(encoding="utf-8")) + assert context["repository_files"]["files_written"] == 1 + assert probe.observations[0].files_written == 1 diff --git a/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py b/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py index d5b8a271e..90616f3e0 100644 --- a/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py +++ b/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py @@ -43,6 +43,24 @@ def test_omits_relationship_when_counts_are_equal() -> None: assert lines == () +def test_adapter_omits_componenttest_relationship_but_keeps_resource() -> None: + counts = {"Adapter": 19, "Resource": 9, "ComponentTest": 1264} + edges = [ + {"label": "operates_on", "source_type": "Adapter", "target_type": "Resource"}, + {"label": "verifies", "source_type": "ComponentTest", "target_type": "Adapter"}, + ] + lines = relationship_authoring_lines_for_entity_type( + "Adapter", + edge_types=edges, + entity_instance_counts=counts, + ) + labels = {(line.relationship_label, line.counterpart_type) for line in lines} + + assert ("operates_on", "Resource") in labels + assert ("verifies", "ComponentTest") not in labels + assert len(lines) == 1 + + def test_includes_inbound_relationship_when_target_side_has_more_instances() -> None: lines = relationship_authoring_lines_for_entity_type( "Service", From 7ad79e9ae3c54b7bfa9487cf0dfc0a85a0eeb290 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 12 Jun 2026 13:26:45 -0400 Subject: [PATCH 124/153] docs(extraction): require explicit IGNORE lines in per-instance descriptions Job set descriptions must list counterpart-owned relationships under 'Ignore these relationships:' with IGNORE lines and instance counts. --- .../extraction_jobs_tools.py | 8 ++--- .../application/skill_resolution_service.py | 30 +++++++++++-------- .../test_skill_resolution_service.py | 2 ++ 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py index c2d4dea9c..24f1df9f8 100644 --- a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py @@ -49,10 +49,10 @@ async def get_extraction_jobs_config(_args: dict[str, Any]) -> dict[str, Any]: "entity_type + instances_per_job for by_instances, or file_patterns + files_per_job " "for by_files. For by_instances, description must match per_instance_description_authoring: " "opening capture-everything paragraph, Properties section listing each property, then " - "one '{EntityType} -> {rel} -> {CounterpartType}:' line per relationship this entity " - "type owns (include only when this entity type has more live instances than the " - "counterpart type). Example: Adapter (19) includes operates_on->Resource (9), " - "excludes verifies/verifies_inverse->ComponentTest (1264)." + "one '{EntityType} -> {rel} -> {CounterpartType}:' line per owned relationship, " + "plus an 'Ignore these relationships:' section with explicit IGNORE lines for edges " + "where the counterpart type has more instances (e.g. IGNORE Adapter -> " + "verifies_inverse -> ComponentTest when ComponentTest count >> Adapter count)." ), { "version": str, diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 1eb4000b6..0b2292e05 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -160,15 +160,15 @@ class ResolvedExtractionSkillPack: "counts. Enumerate every property on the target entity type. For relationships, apply " "the duplicate-work rule by default: when EntityX relates to EntityY, include a " "'{EntityX} -> {relationship_label} -> {EntityY}:' line only if EntityX has MORE live " - "instances than EntityY. Omit that relationship line when EntityX has fewer or equal " - "instances — the higher-volume counterpart type's job set owns that edge work. " + "instances than EntityY. When EntityX has fewer or equal instances, do not omit " + "silently — add an explicit ignore line (see template below). The higher-volume " + "counterpart type's job set owns that edge work. " "Example (use real counts from entity_types): Adapter (19 instances) vs Resource (9) " "vs ComponentTest (1264) — an Adapter job set includes " - "'Adapter -> operates_on -> Resource:' (19 > 9) and must NOT include any " - "ComponentTest relationship line such as " - "'Adapter -> verifies_inverse -> ComponentTest:' (19 << 1264; ComponentTest jobs " - "create verifies edges toward adapters). Compare counts numerically before writing " - "each relationship line. " + "'Adapter -> operates_on -> Resource:' (19 > 9) and must include " + "'IGNORE Adapter -> verifies_inverse -> ComponentTest: handled by ComponentTest job " + "sets (1264 vs 19 instances). Do not create or update this edge in Adapter jobs.' " + "Compare counts numerically for every ontology relationship before writing each line. " "Write the description using this exact shape (replace with real ontology names — " "never use placeholder EntityX in saved text): " "Opening (one paragraph): 'For each of the instances of {EntityType} you've been " @@ -182,8 +182,14 @@ class ResolvedExtractionSkillPack: "instance-count rule), formatted exactly: " "'{EntityType} -> {relationship_label} -> {CounterpartType}: ' followed by when to " "create or update that edge, how to resolve the counterpart slug, and whether to " - "create missing endpoints. Do NOT list relationship types owned by the counterpart " - "entity type's job set under the duplicate-work rule. " + "create missing endpoints. " + "Then 'Ignore these relationships:' — for every ontology edge involving {EntityType} " + "where {EntityType} has fewer or equal live instances than the counterpart, write " + "exactly one line: " + "'IGNORE {EntityType} -> {relationship_label} -> {CounterpartType}: handled by " + "{CounterpartType} job sets ({counterpart_count} vs {EntityType} {entity_count} " + "instances). Do not create or update this edge in this job set.' " + "List every such edge explicitly; never leave ignored relationships unstated. " "FORBIDDEN: thematic sections only (e.g. 'Implementation Analysis', 'Configuration " "Details', 'Operational Characteristics') that do not name every property and every " "included 'Entity -> rel -> Entity' line. Narrow scope only when the operator " @@ -222,9 +228,9 @@ class ResolvedExtractionSkillPack: GraphManagementUiMode.EXTRACTION_JOBS: { "ui_mode_framing": ( "Focus on extraction job set setup: by_instances batches whose description follows " - "per_instance_description_authoring (all properties; relationship lines only where " - "this entity type has more instances than the counterpart — e.g. Adapter includes " - "operates_on->Resource but not verifies->ComponentTest). Persist via " + "per_instance_description_authoring (all properties; owned relationship lines plus " + "explicit IGNORE lines where counterpart types have more instances — e.g. Adapter " + "includes operates_on->Resource and IGNORE verifies_inverse->ComponentTest). Persist via " "kartograph_save_extraction_jobs_config when the operator " "approves, then guide them to Run extraction. Use kartograph_get_schema_ontology and " "kartograph_list_instances_by_type to size batches. JobPackage readiness still " diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index f0a97a466..168a3166c 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -66,6 +66,8 @@ async def test_extraction_mode_uses_extraction_defaults(self): assert "per_instance_description_authoring" in resolved.skills assert "EntityType} ->" in resolved.skills["per_instance_description_authoring"] assert "MORE live instances" in resolved.skills["per_instance_description_authoring"] + assert "Ignore these relationships" in resolved.skills["per_instance_description_authoring"] + assert "IGNORE" in resolved.skills["per_instance_description_authoring"] assert "Implementation Analysis" in resolved.skills["per_instance_description_authoring"] assert "minor_edits" in resolved.skills assert "schema_edits_secondary" in resolved.skills From 8b72781aa748962748b1bd1f1061dc9572c7832e Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 12 Jun 2026 14:15:05 -0400 Subject: [PATCH 125/153] fix(extraction): default 20 workers and tighten job-set authoring Raise parallel extraction worker default from 2 to 20. Enforce per-instance description ownership on save, expose relationship authoring hints in config API, and keep assistant prompts correct on follow-up turns. Kill and Reset Running now stop orphaned extraction containers. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 49 +++- .../extraction_jobs_tools.py | 4 +- src/agent-runtime/tests/test_agent_prompt.py | 25 ++ .../extraction_job_container.py | 13 + .../extraction_run_orchestrator.py | 10 + .../repositories/extraction_job_repository.py | 8 + .../extraction_jobs_service.py | 17 +- .../management/extraction_jobs_service.py | 62 +++- .../domain/extraction_job_config.py | 34 ++- .../extraction_relationship_authoring.py | 276 +++++++++++++++--- .../extraction_jobs_routes.py | 2 +- .../test_extraction_job_container.py | 46 +++ .../domain/test_extraction_job_config.py | 28 ++ .../test_extraction_relationship_authoring.py | 100 ++++++- .../GraphExtractionJobsWorkspace.vue | 11 +- 15 files changed, 624 insertions(+), 61 deletions(-) create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_container.py diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 6b501f1c8..93f8478c9 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -63,7 +63,7 @@ | Tool | Purpose | |------|---------| -| `kartograph_get_extraction_jobs_config` | Read saved job sets and live entity instance counts | +| `kartograph_get_extraction_jobs_config` | Read saved job sets, live instance counts, and `relationship_authoring_by_entity_type` | | `kartograph_save_extraction_jobs_config` | Save job sets and regenerate pending jobs (operator-approved configs) | | `kartograph_get_extraction_jobs_plan_summary` | Projected job counts per job set before/after save | | `kartograph_get_extraction_jobs_status` | Queue metrics: pending/in-progress/completed/failed jobs | @@ -73,19 +73,34 @@ ### Per-instance description (by_instances job sets) -Use this template (substitute real entity and relationship names from `kartograph_get_schema_ontology`): +Before drafting, call `kartograph_get_extraction_jobs_config` and read +`relationship_authoring_by_entity_type.{EntityType}` — it lists exact `owned` line prefixes +and `ignored` ignore_line text from live instance counts. Copy those lines; do not invent +relationship targets from the raw ontology alone. + +Use this template (substitute real entity and relationship names): ``` -For each of the instances of {EntityType} you've been assigned, capture everything into the knowledge graph: all properties of that instance and every relationship instance an instance of {EntityType} can have. +For each of the instances of {EntityType} you've been assigned, capture everything into the knowledge graph: all properties of that instance and every relationship instance this job set owns (see lines below). Properties: - {property_name}: {how to extract, where in repository-files/, value shape} - ... {EntityType} -> {relationship_label} -> {CounterpartType}: {when to create/update; how to resolve counterpart slug} -{EntityType} -> {other_rel} -> {OtherType}: ... +(one line per entry in relationship_authoring_by_entity_type.{EntityType}.owned only) + +Ignore these relationships: +IGNORE {EntityType} -> {relationship_label} -> {CounterpartType}: handled by {CounterpartType} job sets ({counterpart_count} vs {EntityType} {entity_count} instances). Do not create or update this edge in this job set. +(one line per entry in relationship_authoring_by_entity_type.{EntityType}.ignored — never list these as active extraction targets) + ``` +**Ownership rule:** include `{EntityType} -> {rel} -> {Counterpart}` as an active line only when +{EntityType} has MORE live instances than {Counterpart}. When the counterpart has more (or equal), +use an IGNORE line only — e.g. Adapter (19) owns `operates_on -> Resource` (9) but must IGNORE +`verifies_inverse -> ComponentTest` (1264 instances). + Do **not** use theme-only sections (Implementation Analysis, Configuration Details, etc.). When the operator approves, save via `kartograph_save_extraction_jobs_config`. """.strip() @@ -179,6 +194,9 @@ def _format_workspace_readiness(readiness: dict[str, Any]) -> str: return "\n".join(lines) +_EXTRACTION_JOBS_COMPACT_SKILL_KEYS = ("per_instance_description_authoring", "job_set_contract") + + def build_agent_system_prompt( agent_configuration: dict[str, Any], *, @@ -205,19 +223,36 @@ def build_agent_system_prompt( if ui_mode: skill_sections.append(f"UI mode: {ui_mode}") - for key, value in sorted(skills.items()): + skills_dict = dict(skills) if isinstance(skills, dict) else {} + if prompt_detail == "compact" and ui_mode == "extraction-jobs": + skill_items = sorted( + (key, value) + for key, value in skills_dict.items() + if key in _EXTRACTION_JOBS_COMPACT_SKILL_KEYS + ) + elif prompt_detail == "full": + skill_items = sorted(skills_dict.items()) + else: + skill_items = [] + + for key, value in skill_items: text = str(value).strip() if text: skill_sections.append(f"**{key}**: {text}") skills_block = "" - if prompt_detail == "full" and skill_sections: + if skill_sections and (prompt_detail == "full" or skill_items): skills_block = "## Skills\n\n" + "\n\n".join(skill_sections) tools_block = "" if include_tools_manifest and settings is not None and settings.workload_token.strip(): if prompt_detail == "compact": - tools_block = f"## Tools\n\n{_TOOLS_COMPACT_REFERENCE}" + extraction_jobs_block = ( + f"\n\n{_EXTRACTION_JOBS_TOOLS_REFERENCE}" + if ui_mode == "extraction-jobs" + else "" + ) + tools_block = f"## Tools\n\n{_TOOLS_COMPACT_REFERENCE}{extraction_jobs_block}" else: kartograph_tools = ", ".join( f"`{name}`" diff --git a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py index 24f1df9f8..df2c7a575 100644 --- a/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/extraction_jobs_tools.py @@ -23,7 +23,9 @@ def append_extraction_jobs_tools(*, tooling: RuntimeTooling, tools: list[Any]) - "kartograph_get_extraction_jobs_config", ( "Read saved extraction job sets for this knowledge graph, including live " - "entity type instance counts. Call before proposing or saving changes." + "entity type instance counts and relationship_authoring_by_entity_type " + "(owned line prefixes and IGNORE lines per entity type). Call before proposing " + "or saving changes." ), {}, ) diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index a56fecfb7..545c0e784 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -106,3 +106,28 @@ def test_build_agent_system_prompt_compact_omits_skills_and_full_tools_table() - assert "Quick workflow" not in prompt assert "entities_to_jsonl.py" in prompt assert "never /tmp" in prompt.lower() or "Never /tmp" in prompt + + +def test_build_agent_system_prompt_compact_extraction_jobs_keeps_description_authoring_skill() -> None: + prompt = build_agent_system_prompt( + { + "system_prompt": "You are the Graph Management Assistant.", + "skills": { + "prepopulation": "Run instance_generators with Bash.", + "per_instance_description_authoring": "Use IGNORE lines when counterpart has more instances.", + "job_set_contract": "Save via kartograph_save_extraction_jobs_config.", + }, + "graph_management_ui_mode": "extraction-jobs", + }, + settings=AgentRuntimeSettings( + KARTOGRAPH_WORKLOAD_TOKEN="token", + KARTOGRAPH_KNOWLEDGE_GRAPH_ID="kg-123", + ), + prompt_detail="compact", + ) + + assert "**prepopulation**" not in prompt + assert "**per_instance_description_authoring**" in prompt + assert "IGNORE lines" in prompt + assert "relationship_authoring_by_entity_type" in prompt + assert "verifies_inverse -> ComponentTest" in prompt diff --git a/src/api/extraction/infrastructure/extraction_job_container.py b/src/api/extraction/infrastructure/extraction_job_container.py index 2e6d30c6c..52d3c4b21 100644 --- a/src/api/extraction/infrastructure/extraction_job_container.py +++ b/src/api/extraction/infrastructure/extraction_job_container.py @@ -19,3 +19,16 @@ def stop_extraction_job_container(*, job_id: str, container_engine: str = "auto" runtime = create_container_runtime(container_engine) name = extraction_job_container_name(job_id) return runtime.remove_by_name(name, force=True) + + +def stop_extraction_job_containers( + *, + job_ids: tuple[str, ...] | list[str], + container_engine: str = "auto", +) -> int: + """Stop and remove extraction containers for many jobs. Returns count removed.""" + stopped = 0 + for job_id in job_ids: + if stop_extraction_job_container(job_id=job_id, container_engine=container_engine): + stopped += 1 + return stopped diff --git a/src/api/extraction/infrastructure/extraction_run_orchestrator.py b/src/api/extraction/infrastructure/extraction_run_orchestrator.py index 772f6e002..e5c256167 100644 --- a/src/api/extraction/infrastructure/extraction_run_orchestrator.py +++ b/src/api/extraction/infrastructure/extraction_run_orchestrator.py @@ -89,6 +89,16 @@ async def request_pause(self, *, knowledge_graph_id: str) -> None: ) await session.commit() + async def stop_workers(self, *, knowledge_graph_id: str) -> None: + """Cancel worker tasks without changing job rows (for reset-to-pending flows).""" + state = self._active.get(knowledge_graph_id) + if state is None: + return + state.stop_event.set() + for task in state.tasks: + task.cancel() + self._active.pop(knowledge_graph_id, None) + async def halt(self, *, knowledge_graph_id: str) -> None: state = self._active.get(knowledge_graph_id) if state is not None: diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index c841e2d85..87a51bc99 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -241,6 +241,14 @@ async def delete_pending_job( ) return int(result.rowcount or 0) > 0 + async def list_in_progress_job_ids(self, *, knowledge_graph_id: str) -> list[str]: + stmt = select(ExtractionJobModel.job_id).where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.IN_PROGRESS.value, + ) + result = await self._session.execute(stmt) + return [str(row[0]) for row in result.all()] + async def count_by_status(self, *, knowledge_graph_id: str) -> dict[str, int]: stmt = ( select(ExtractionJobModel.status, func.count()) diff --git a/src/api/infrastructure/extraction_workload/extraction_jobs_service.py b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py index d5b00c85a..988cf19fd 100644 --- a/src/api/infrastructure/extraction_workload/extraction_jobs_service.py +++ b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py @@ -30,6 +30,10 @@ ExtractionJobSetDefinition, ExtractionJobSetStrategy, ) +from management.domain.extraction_relationship_authoring import ( + edge_type_dicts_from_ontology, + relationship_authoring_by_entity_type, +) from management.domain.value_objects import KnowledgeGraphId from management.infrastructure.repositories.knowledge_graph_repository import ( KnowledgeGraphRepository, @@ -92,6 +96,8 @@ async def get_document( knowledge_graph_id=knowledge_graph_id, graph_data=graph_data, ) + ontology = await self._knowledge_graph_repository.get_ontology(knowledge_graph_id) + edge_types = edge_type_dicts_from_ontology(ontology) entity_types = [ {"name": name, "instance_count": count} for name, count in sorted(counts.items(), key=lambda item: item[0]) @@ -99,6 +105,10 @@ async def get_document( return { **document.to_dict(), "entity_types": entity_types, + "relationship_authoring_by_entity_type": relationship_authoring_by_entity_type( + entity_instance_counts=counts, + edge_types=edge_types, + ), } async def save_document( @@ -125,7 +135,12 @@ async def save_document( knowledge_graph_id=knowledge_graph_id, graph_data=graph_data, ) - errors = document.validation_errors(entity_instance_counts=counts) + ontology = await self._knowledge_graph_repository.get_ontology(knowledge_graph_id) + edge_types = edge_type_dicts_from_ontology(ontology) + errors = document.validation_errors( + entity_instance_counts=counts, + edge_types=edge_types, + ) if errors: raise ValueError("; ".join(errors)) diff --git a/src/api/infrastructure/management/extraction_jobs_service.py b/src/api/infrastructure/management/extraction_jobs_service.py index 69342190f..b7fa82f90 100644 --- a/src/api/infrastructure/management/extraction_jobs_service.py +++ b/src/api/infrastructure/management/extraction_jobs_service.py @@ -15,7 +15,10 @@ materialize_jobs_from_config, projected_job_count, ) -from extraction.infrastructure.extraction_job_container import stop_extraction_job_container +from extraction.infrastructure.extraction_job_container import ( + stop_extraction_job_container, + stop_extraction_job_containers, +) from extraction.infrastructure.extraction_run_orchestrator import get_extraction_run_orchestrator from extraction.domain.extraction_job import ExtractionJobStatus, ExtractionRunStatus from extraction.infrastructure.extraction_job_activity import ( @@ -36,6 +39,10 @@ ExtractionJobSetDefinition, ExtractionJobSetStrategy, ) +from management.domain.extraction_relationship_authoring import ( + edge_type_dicts_from_ontology, + relationship_authoring_by_entity_type, +) from management.infrastructure.repositories.knowledge_graph_repository import ( KnowledgeGraphRepository, ) @@ -105,6 +112,8 @@ async def get_extraction_jobs_document( knowledge_graph_id=kg_id, graph_data=graph_data, ) + ontology = await self._knowledge_graph_repository.get_ontology(kg_id) + edge_types = edge_type_dicts_from_ontology(ontology) entity_types = [ {"name": name, "instance_count": count} for name, count in sorted(counts.items(), key=lambda item: item[0]) @@ -112,6 +121,10 @@ async def get_extraction_jobs_document( return { **document.to_dict(), "entity_types": entity_types, + "relationship_authoring_by_entity_type": relationship_authoring_by_entity_type( + entity_instance_counts=counts, + edge_types=edge_types, + ), } async def save_extraction_jobs_document( @@ -137,7 +150,12 @@ async def save_extraction_jobs_document( knowledge_graph_id=kg_id, graph_data=graph_data, ) - errors = document.validation_errors(entity_instance_counts=counts) + ontology = await self._knowledge_graph_repository.get_ontology(kg_id) + edge_types = edge_type_dicts_from_ontology(ontology) + errors = document.validation_errors( + entity_instance_counts=counts, + edge_types=edge_types, + ) if errors: raise ValueError("; ".join(errors)) @@ -217,6 +235,18 @@ async def regenerate_jobs( await self._session.commit() return result + async def _stop_in_progress_containers(self, *, kg_id: str) -> int: + runtime_settings = get_extraction_workload_runtime_settings() + job_ids = await self._extraction_job_repository.list_in_progress_job_ids( + knowledge_graph_id=kg_id, + ) + if not job_ids: + return 0 + return stop_extraction_job_containers( + job_ids=job_ids, + container_engine=runtime_settings.container_engine, + ) + async def cancel_job( self, *, @@ -481,19 +511,43 @@ async def halt_extraction(self, *, user_id: str, kg_id: str) -> dict[str, Any]: kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) if kg is None: raise ValueError(f"Knowledge graph '{kg_id}' not found") + job_ids = await self._extraction_job_repository.list_in_progress_job_ids( + knowledge_graph_id=kg_id, + ) orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) await orchestrator.halt(knowledge_graph_id=kg_id) + runtime_settings = get_extraction_workload_runtime_settings() + stopped = stop_extraction_job_containers( + job_ids=job_ids, + container_engine=runtime_settings.container_engine, + ) await self._session.commit() - return {"success": True, "message": "Extraction halted and incomplete jobs marked failed."} + return { + "success": True, + "message": ( + "Extraction halted, incomplete jobs marked failed, and " + f"{stopped} extraction container(s) stopped." + ), + } async def reset_stale_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: _ = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await orchestrator.stop_workers(knowledge_graph_id=kg_id) + stopped = await self._stop_in_progress_containers(kg_id=kg_id) reset = await self._extraction_job_repository.reset_jobs_by_status( knowledge_graph_id=kg_id, from_status=ExtractionJobStatus.IN_PROGRESS, ) await self._session.commit() - return {"success": True, "reset_count": reset} + return { + "success": True, + "reset_count": reset, + "containers_stopped": stopped, + "message": ( + f"Reset {reset} running job(s) to pending and stopped {stopped} container(s)." + ), + } async def reset_completed_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: _ = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) diff --git a/src/api/management/domain/extraction_job_config.py b/src/api/management/domain/extraction_job_config.py index 07e764d54..397a005cf 100644 --- a/src/api/management/domain/extraction_job_config.py +++ b/src/api/management/domain/extraction_job_config.py @@ -7,6 +7,9 @@ from typing import Any +from management.domain.extraction_relationship_authoring import ( + per_instance_description_relationship_errors, +) class ExtractionJobSetStrategy(StrEnum): """Batching strategy for an extraction job set.""" @@ -31,7 +34,12 @@ def __post_init__(self) -> None: if not self.name or not self.name.strip(): raise ValueError("Job set name must not be empty") - def validation_errors(self, *, entity_instance_counts: dict[str, int]) -> tuple[str, ...]: + def validation_errors( + self, + *, + entity_instance_counts: dict[str, int], + edge_types: list[dict[str, Any]] | None = None, + ) -> tuple[str, ...]: """Return human-readable validation errors for this job set.""" if not self.enabled: return () @@ -52,6 +60,16 @@ def validation_errors(self, *, entity_instance_counts: dict[str, int]) -> tuple[ errors.append( f"{self.name}: per-instance extraction description is required." ) + elif self.entity_type and edge_types: + errors.extend( + f"{self.name}: {err}" + for err in per_instance_description_relationship_errors( + self.description, + self.entity_type, + edge_types=edge_types, + entity_instance_counts=entity_instance_counts, + ) + ) elif self.strategy == ExtractionJobSetStrategy.BY_FILES: if not self.file_patterns: errors.append(f"{self.name}: at least one file pattern is required for by_files.") @@ -113,14 +131,24 @@ class ExtractionJobConfigDocument: def enabled_job_sets(self) -> tuple[ExtractionJobSetDefinition, ...]: return tuple(job_set for job_set in self.job_sets if job_set.enabled) - def validation_errors(self, *, entity_instance_counts: dict[str, int]) -> tuple[str, ...]: + def validation_errors( + self, + *, + entity_instance_counts: dict[str, int], + edge_types: list[dict[str, Any]] | None = None, + ) -> tuple[str, ...]: errors: list[str] = [] seen_names: set[str] = set() for job_set in self.job_sets: if job_set.name in seen_names: errors.append(f"Duplicate job set name '{job_set.name}'.") seen_names.add(job_set.name) - errors.extend(job_set.validation_errors(entity_instance_counts=entity_instance_counts)) + errors.extend( + job_set.validation_errors( + entity_instance_counts=entity_instance_counts, + edge_types=edge_types, + ) + ) return tuple(errors) def to_dict(self) -> dict[str, Any]: diff --git a/src/api/management/domain/extraction_relationship_authoring.py b/src/api/management/domain/extraction_relationship_authoring.py index b9a2c23fe..f388eb672 100644 --- a/src/api/management/domain/extraction_relationship_authoring.py +++ b/src/api/management/domain/extraction_relationship_authoring.py @@ -15,22 +15,58 @@ class RelationshipAuthoringLine: counterpart_type: str -def relationship_authoring_lines_for_entity_type( +@dataclass(frozen=True) +class RelationshipAuthoringGuidance: + """Owned vs ignored relationship lines for one entity type.""" + + owned: tuple[RelationshipAuthoringLine, ...] + ignored: tuple[RelationshipAuthoringLine, ...] + + +def format_owned_line_prefix(line: RelationshipAuthoringLine) -> str: + return ( + f"{line.entity_type} -> {line.relationship_label} -> {line.counterpart_type}:" + ) + + +def format_ignore_line( + line: RelationshipAuthoringLine, + *, + entity_count: int, + counterpart_count: int, +) -> str: + return ( + f"IGNORE {line.entity_type} -> {line.relationship_label} -> {line.counterpart_type}: " + f"handled by {line.counterpart_type} job sets ({counterpart_count} vs " + f"{line.entity_type} {entity_count} instances). Do not create or update this edge " + f"in this job set." + ) + + +def edge_type_dicts_from_ontology(ontology: Any | None) -> list[dict[str, Any]]: + """Normalize ontology edge types for relationship authoring helpers.""" + if ontology is None: + return [] + edge_types = getattr(ontology, "edge_types", None) or [] + rows: list[dict[str, Any]] = [] + for edge in edge_types: + source_labels = getattr(edge, "source_labels", None) or () + target_labels = getattr(edge, "target_labels", None) or () + rows.append( + { + "label": str(getattr(edge, "label", "") or "").strip(), + "source_type": str(source_labels[0]).strip() if source_labels else "", + "target_type": str(target_labels[0]).strip() if target_labels else "", + } + ) + return rows + + +def _relationship_lines_involving_entity_type( entity_type: str, *, edge_types: list[dict[str, Any]], - entity_instance_counts: dict[str, int], ) -> tuple[RelationshipAuthoringLine, ...]: - """Return relationship lines EntityX jobs should cover to avoid duplicate work. - - When EntityX relates to EntityY, only the side with more live instances - should create/update that relationship in its extraction jobs. The side - with fewer (or equal) instances omits the line. - """ - entity_count = entity_instance_counts.get(entity_type, 0) - if entity_count <= 0: - return () - lines: list[RelationshipAuthoringLine] = [] seen: set[tuple[str, str, str]] = set() @@ -42,34 +78,202 @@ def relationship_authoring_lines_for_entity_type( continue if source_type == entity_type and target_type: - counterpart = target_type - counterpart_count = entity_instance_counts.get(counterpart, 0) - if entity_count > counterpart_count: - key = (entity_type, label, counterpart) - if key not in seen: - seen.add(key) - lines.append( - RelationshipAuthoringLine( - entity_type=entity_type, - relationship_label=label, - counterpart_type=counterpart, - ) + key = (entity_type, label, target_type) + if key not in seen: + seen.add(key) + lines.append( + RelationshipAuthoringLine( + entity_type=entity_type, + relationship_label=label, + counterpart_type=target_type, ) + ) continue if target_type == entity_type and source_type: - counterpart = source_type - counterpart_count = entity_instance_counts.get(counterpart, 0) - if entity_count > counterpart_count: - key = (entity_type, label, counterpart) - if key not in seen: - seen.add(key) - lines.append( - RelationshipAuthoringLine( - entity_type=entity_type, - relationship_label=label, - counterpart_type=counterpart, - ) + key = (entity_type, label, source_type) + if key not in seen: + seen.add(key) + lines.append( + RelationshipAuthoringLine( + entity_type=entity_type, + relationship_label=label, + counterpart_type=source_type, ) + ) return tuple(sorted(lines, key=lambda line: (line.relationship_label, line.counterpart_type))) + + +def relationship_authoring_guidance_for_entity_type( + entity_type: str, + *, + edge_types: list[dict[str, Any]], + entity_instance_counts: dict[str, int], +) -> RelationshipAuthoringGuidance: + """Split ontology edges into owned vs ignored lines for per-instance descriptions.""" + entity_count = entity_instance_counts.get(entity_type, 0) + if entity_count <= 0: + return RelationshipAuthoringGuidance(owned=(), ignored=()) + + owned: list[RelationshipAuthoringLine] = [] + ignored: list[RelationshipAuthoringLine] = [] + for line in _relationship_lines_involving_entity_type(entity_type, edge_types=edge_types): + counterpart_count = entity_instance_counts.get(line.counterpart_type, 0) + if entity_count > counterpart_count: + owned.append(line) + else: + ignored.append(line) + + return RelationshipAuthoringGuidance(owned=tuple(owned), ignored=tuple(ignored)) + + +def relationship_authoring_lines_for_entity_type( + entity_type: str, + *, + edge_types: list[dict[str, Any]], + entity_instance_counts: dict[str, int], +) -> tuple[RelationshipAuthoringLine, ...]: + """Return relationship lines EntityX jobs should cover to avoid duplicate work. + + When EntityX relates to EntityY, only the side with more live instances + should create/update that relationship in its extraction jobs. The side + with fewer (or equal) instances omits the line. + """ + return relationship_authoring_guidance_for_entity_type( + entity_type, + edge_types=edge_types, + entity_instance_counts=entity_instance_counts, + ).owned + + +def _line_key(line: RelationshipAuthoringLine) -> str: + return ( + f"{line.entity_type} -> {line.relationship_label} -> {line.counterpart_type}" + ) + + +def _active_relationship_line_present(description: str, line: RelationshipAuthoringLine) -> bool: + key = _line_key(line).lower() + for raw_line in description.splitlines(): + stripped = raw_line.strip() + if stripped.upper().startswith("IGNORE "): + continue + if key in stripped.lower() and ":" in stripped: + return True + return False + + +def _ignore_relationship_line_present(description: str, line: RelationshipAuthoringLine) -> bool: + key = _line_key(line).lower() + for raw_line in description.splitlines(): + stripped = raw_line.strip() + if not stripped.upper().startswith("IGNORE "): + continue + if key in stripped.lower(): + return True + return False + + +def per_instance_description_relationship_errors( + description: str, + entity_type: str, + *, + edge_types: list[dict[str, Any]], + entity_instance_counts: dict[str, int], +) -> tuple[str, ...]: + """Validate owned vs IGNORE relationship lines in a per-instance description.""" + if not edge_types: + return () + + guidance = relationship_authoring_guidance_for_entity_type( + entity_type, + edge_types=edge_types, + entity_instance_counts=entity_instance_counts, + ) + errors: list[str] = [] + + for line in guidance.owned: + if not _active_relationship_line_present(description, line): + errors.append( + f"{entity_type}: missing owned relationship line " + f"'{format_owned_line_prefix(line)}' (include extraction instructions after the colon)." + ) + + for line in guidance.ignored: + if _active_relationship_line_present(description, line): + errors.append( + f"{entity_type}: must not list '{_line_key(line)}' as an active extraction target " + f"(counterpart has more instances). Use an IGNORE line instead." + ) + if not _ignore_relationship_line_present(description, line): + errors.append( + f"{entity_type}: missing IGNORE line for '{_line_key(line)}' under " + "'Ignore these relationships:'." + ) + + return tuple(errors) + + +def relationship_authoring_by_entity_type( + *, + entity_instance_counts: dict[str, int], + edge_types: list[dict[str, Any]], +) -> dict[str, Any]: + """Build owned/ignored guidance for every entity type in counts or ontology edges.""" + entity_types = sorted( + { + *entity_instance_counts.keys(), + *(edge.get("source_type") or "" for edge in edge_types), + *(edge.get("target_type") or "" for edge in edge_types), + } + ) + payload: dict[str, Any] = {} + for entity_type in entity_types: + if not entity_type: + continue + payload[entity_type] = relationship_authoring_payload_for_entity_type( + entity_type, + edge_types=edge_types, + entity_instance_counts=entity_instance_counts, + ) + return payload + + +def relationship_authoring_payload_for_entity_type( + entity_type: str, + *, + edge_types: list[dict[str, Any]], + entity_instance_counts: dict[str, int], +) -> dict[str, Any]: + """Serialize owned/ignored lines for API responses and agent tooling.""" + entity_count = entity_instance_counts.get(entity_type, 0) + guidance = relationship_authoring_guidance_for_entity_type( + entity_type, + edge_types=edge_types, + entity_instance_counts=entity_instance_counts, + ) + return { + "entity_type": entity_type, + "entity_instance_count": entity_count, + "owned": [ + { + "relationship_label": line.relationship_label, + "counterpart_type": line.counterpart_type, + "line_prefix": format_owned_line_prefix(line), + } + for line in guidance.owned + ], + "ignored": [ + { + "relationship_label": line.relationship_label, + "counterpart_type": line.counterpart_type, + "ignore_line": format_ignore_line( + line, + entity_count=entity_count, + counterpart_count=entity_instance_counts.get(line.counterpart_type, 0), + ), + } + for line in guidance.ignored + ], + } diff --git a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py index 1432db587..ab05bf497 100644 --- a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py +++ b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py @@ -39,7 +39,7 @@ class ExtractionJobsDocumentResponse(BaseModel): class StartExtractionRequest(BaseModel): - workers: int = Field(default=2, ge=1, le=32) + workers: int = Field(default=20, ge=1, le=32) class ActionResponse(BaseModel): diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_container.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_container.py new file mode 100644 index 000000000..1c0c2ec6a --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_container.py @@ -0,0 +1,46 @@ +"""Unit tests for extraction job container lifecycle helpers.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from extraction.infrastructure.extraction_job_container import ( + extraction_job_container_name, + stop_extraction_job_container, + stop_extraction_job_containers, +) + + +def test_extraction_job_container_name_is_stable_and_short() -> None: + name = extraction_job_container_name("Adapter Deep Extraction_batch_0005_f46f3c66") + + assert name.startswith("kartograph-extract-") + assert len(name) <= 63 + + +@patch("extraction.infrastructure.extraction_job_container.create_container_runtime") +def test_stop_extraction_job_containers_stops_each_job(mock_create_runtime: MagicMock) -> None: + runtime = MagicMock() + runtime.remove_by_name.side_effect = [True, False] + mock_create_runtime.return_value = runtime + + stopped = stop_extraction_job_containers( + job_ids=("job-a", "job-b"), + container_engine="docker", + ) + + assert stopped == 1 + assert runtime.remove_by_name.call_count == 2 + runtime.remove_by_name.assert_any_call( + extraction_job_container_name("job-a"), + force=True, + ) + + +@patch("extraction.infrastructure.extraction_job_container.create_container_runtime") +def test_stop_extraction_job_container_delegates_to_runtime(mock_create_runtime: MagicMock) -> None: + runtime = MagicMock() + runtime.remove_by_name.return_value = True + mock_create_runtime.return_value = runtime + + assert stop_extraction_job_container(job_id="job-a", container_engine="docker") is True diff --git a/src/api/tests/unit/management/domain/test_extraction_job_config.py b/src/api/tests/unit/management/domain/test_extraction_job_config.py index 7dc48cd0f..542aed75e 100644 --- a/src/api/tests/unit/management/domain/test_extraction_job_config.py +++ b/src/api/tests/unit/management/domain/test_extraction_job_config.py @@ -50,3 +50,31 @@ def test_document_rejects_duplicate_job_set_names() -> None: ) errors = document.validation_errors(entity_instance_counts={"Feature": 3}) assert any("Duplicate" in err for err in errors) + + +def test_by_instances_rejects_counterpart_owned_relationship_line() -> None: + document = ExtractionJobConfigDocument( + version="1.0", + job_sets=( + ExtractionJobSetDefinition( + name="adapters", + strategy=ExtractionJobSetStrategy.BY_INSTANCES, + entity_type="Adapter", + instances_per_job=3, + description=( + "Properties:\n" + "- name: from source\n\n" + "Adapter -> operates_on -> Resource: link resources\n" + "Adapter -> verifies_inverse -> ComponentTest: link tests\n" + ), + ), + ), + ) + edges = [ + {"label": "operates_on", "source_type": "Adapter", "target_type": "Resource"}, + {"label": "verifies_inverse", "source_type": "Adapter", "target_type": "ComponentTest"}, + ] + counts = {"Adapter": 19, "Resource": 9, "ComponentTest": 1264} + errors = document.validation_errors(entity_instance_counts=counts, edge_types=edges) + + assert any("verifies_inverse" in err and "must not" in err.lower() for err in errors) diff --git a/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py b/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py index 90616f3e0..e1bcf6f22 100644 --- a/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py +++ b/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py @@ -1,6 +1,10 @@ """Tests for relationship ownership rules in per-instance descriptions.""" from management.domain.extraction_relationship_authoring import ( + format_ignore_line, + format_owned_line_prefix, + per_instance_description_relationship_errors, + relationship_authoring_guidance_for_entity_type, relationship_authoring_lines_for_entity_type, ) @@ -48,17 +52,21 @@ def test_adapter_omits_componenttest_relationship_but_keeps_resource() -> None: edges = [ {"label": "operates_on", "source_type": "Adapter", "target_type": "Resource"}, {"label": "verifies", "source_type": "ComponentTest", "target_type": "Adapter"}, + {"label": "verifies_inverse", "source_type": "Adapter", "target_type": "ComponentTest"}, ] - lines = relationship_authoring_lines_for_entity_type( + guidance = relationship_authoring_guidance_for_entity_type( "Adapter", edge_types=edges, entity_instance_counts=counts, ) - labels = {(line.relationship_label, line.counterpart_type) for line in lines} + owned = {(line.relationship_label, line.counterpart_type) for line in guidance.owned} + ignored = {(line.relationship_label, line.counterpart_type) for line in guidance.ignored} - assert ("operates_on", "Resource") in labels - assert ("verifies", "ComponentTest") not in labels - assert len(lines) == 1 + assert ("operates_on", "Resource") in owned + assert ("verifies_inverse", "ComponentTest") in ignored + assert ("verifies", "ComponentTest") in ignored + assert len(owned) == 1 + assert len(ignored) == 2 def test_includes_inbound_relationship_when_target_side_has_more_instances() -> None: @@ -73,3 +81,85 @@ def test_includes_inbound_relationship_when_target_side_has_more_instances() -> assert len(lines) == 1 assert lines[0].entity_type == "Service" assert lines[0].counterpart_type == "Route" + + +def test_rejects_active_line_for_ignored_relationship() -> None: + counts = {"Adapter": 19, "Resource": 9, "ComponentTest": 1264} + edges = [ + {"label": "operates_on", "source_type": "Adapter", "target_type": "Resource"}, + {"label": "verifies_inverse", "source_type": "Adapter", "target_type": "ComponentTest"}, + ] + description = """ +For each Adapter instance, capture everything. + +Properties: +- name: from source + +Adapter -> operates_on -> Resource: link managed resources +Adapter -> verifies_inverse -> ComponentTest: link tests +""" + errors = per_instance_description_relationship_errors( + description, + "Adapter", + edge_types=edges, + entity_instance_counts=counts, + ) + + assert any("verifies_inverse" in err and "must not" in err.lower() for err in errors) + + +def test_requires_ignore_line_for_counterpart_owned_edge() -> None: + counts = {"Adapter": 19, "Resource": 9, "ComponentTest": 1264} + edges = [ + {"label": "operates_on", "source_type": "Adapter", "target_type": "Resource"}, + {"label": "verifies_inverse", "source_type": "Adapter", "target_type": "ComponentTest"}, + ] + description = """ +Properties: +- name: from source + +Adapter -> operates_on -> Resource: link managed resources +""" + errors = per_instance_description_relationship_errors( + description, + "Adapter", + edge_types=edges, + entity_instance_counts=counts, + ) + + assert any("IGNORE" in err and "verifies_inverse" in err for err in errors) + + +def test_accepts_canonical_adapter_description() -> None: + counts = {"Adapter": 19, "Resource": 9, "ComponentTest": 1264} + edges = [ + {"label": "operates_on", "source_type": "Adapter", "target_type": "Resource"}, + {"label": "verifies_inverse", "source_type": "Adapter", "target_type": "ComponentTest"}, + ] + description = f""" +Properties: +- name: from source + +{format_owned_line_prefix( + relationship_authoring_guidance_for_entity_type( + "Adapter", edge_types=edges, entity_instance_counts=counts + ).owned[0] +)} link managed resources + +Ignore these relationships: +{format_ignore_line( + relationship_authoring_guidance_for_entity_type( + "Adapter", edge_types=edges, entity_instance_counts=counts + ).ignored[0], + entity_count=19, + counterpart_count=1264, +)} +""" + errors = per_instance_description_relationship_errors( + description, + "Adapter", + edge_types=edges, + entity_instance_counts=counts, + ) + + assert errors == () diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue index 162234eeb..6e832ddae 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue @@ -96,7 +96,7 @@ const dbRefreshing = ref(false) const dbError = ref<string | null>(null) const extractionRunState = ref<ExtractionRunState | null>(null) const planSummary = ref<PlanSummary | null>(null) -const workers = ref(2) +const workers = ref(20) const startingExtraction = ref(false) const pausingExtraction = ref(false) const killingExtraction = ref(false) @@ -402,8 +402,13 @@ async function resetByKind(kind: 'stale' | 'completed' | 'failed' | 'all') { } as const map[kind].ref.value = true try { - await apiFetch(`${basePath.value}/${map[kind].path}`, { method: 'POST' }) - toast.success('Jobs reset') + const res = await apiFetch<{ message?: string; reset_count?: number; containers_stopped?: number }>( + `${basePath.value}/${map[kind].path}`, + { method: 'POST' }, + ) + toast.success(kind === 'stale' ? 'Running jobs reset' : 'Jobs reset', { + description: res.message || (res.reset_count !== undefined ? `${res.reset_count} job(s) reset` : undefined), + }) await refreshAll() } catch (e: unknown) { toast.error('Reset failed', { description: e instanceof Error ? e.message : 'Request failed' }) From 67cc547f16ef4e73977b67dab6fe23139c467619 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 12 Jun 2026 14:26:47 -0400 Subject: [PATCH 126/153] fix(extraction): hydrate missing JobPackages before workspace materialization Re-fetch ingest-only archives when ZIPs are absent on disk so extraction jobs and sticky sessions populate repository-files. Gate readiness on archive presence and inject workload credentials into agentic-ci container env. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/api/extraction/dependencies.py | 10 +- .../agentic_ci_extraction_job_runner.py | 45 +++- .../infrastructure/extraction_job_prompt.py | 6 +- .../extraction_job_runner_factory.py | 6 + .../extraction_job_workdir_materializer.py | 17 ++ .../ingestion_readiness_reader.py | 32 +-- .../sticky_session_bootstrap_builder.py | 8 + .../infrastructure/job_packages/__init__.py | 1 + .../job_packages/archive_hydrator.py | 237 ++++++++++++++++++ .../infrastructure/job_packages/readiness.py | 48 ++++ .../job_packages/test_archive_hydrator.py | 138 ++++++++++ 11 files changed, 525 insertions(+), 23 deletions(-) create mode 100644 src/api/infrastructure/job_packages/__init__.py create mode 100644 src/api/infrastructure/job_packages/archive_hydrator.py create mode 100644 src/api/infrastructure/job_packages/readiness.py create mode 100644 src/api/tests/unit/infrastructure/job_packages/test_archive_hydrator.py diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index 5c6ed70d7..1ad96f545 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -15,6 +15,7 @@ from extraction.application.sticky_session_runtime_service import StickySessionRuntimeService from extraction.infrastructure.sticky_runtime_health import StickyRuntimeHealthChecker from extraction.infrastructure.ingestion_readiness_reader import SqlIngestionReadinessReader +from infrastructure.job_packages.archive_hydrator import JobPackageArchiveHydrator from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader from extraction.infrastructure.repositories import ( ExtractionAgentSessionRepository, @@ -114,11 +115,18 @@ def get_extraction_chat_turn_service( container_run_gid=runtime_settings.container_run_gid, ), runtime_settings=runtime_settings, + archive_hydrator=JobPackageArchiveHydrator( + session=session, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ), ) runtime_service = StickySessionRuntimeService( session_service=session_service, skill_resolution_service=skill_resolution_service, - ingestion_readiness_reader=SqlIngestionReadinessReader(session=session), + ingestion_readiness_reader=SqlIngestionReadinessReader( + session=session, + job_package_work_dir_path=Path(runtime_settings.job_package_work_dir), + ), sticky_runtime_manager=sticky_runtime_manager, bootstrap_builder=bootstrap_builder, health_checker=StickyRuntimeHealthChecker(), diff --git a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py index 03b084889..1c69ec77d 100644 --- a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py @@ -98,7 +98,13 @@ async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, An ) _patch_job_context_api_base(workdir, self._settings.agentic_ci_api_base_url) prompt = build_extraction_job_prompt(job=job) - return await self._run_in_container(job=job, workdir=workdir, prompt=prompt) + return await self._run_in_container( + job=job, + workdir=workdir, + prompt=prompt, + tenant_id=tenant_id, + workload_token=credentials.token, + ) async def _run_in_container( self, @@ -106,16 +112,27 @@ async def _run_in_container( job: ExtractionJobRecord, workdir: Path, prompt: str, + tenant_id: str, + workload_token: str, ) -> dict[str, Any]: import asyncio - return await asyncio.to_thread(self._run_in_container_sync, job, workdir, prompt) + return await asyncio.to_thread( + self._run_in_container_sync, + job, + workdir, + prompt, + tenant_id, + workload_token, + ) def _run_in_container_sync( self, job: ExtractionJobRecord, workdir: Path, prompt: str, + tenant_id: str, + workload_token: str, ) -> dict[str, Any]: runtime = create_container_runtime(self._settings.container_engine) binary = getattr(runtime, "_binary", "podman") @@ -128,7 +145,12 @@ def _run_in_container_sync( try: otel_proc, otel_port, otel_log_path, _otel_rate = otel.start_collector(run_dir) otel_log = Path(otel_log_path) - env = self._build_container_env(otel_port=otel_port) + env = self._build_container_env( + otel_port=otel_port, + job=job, + tenant_id=tenant_id, + workload_token=workload_token, + ) binds = self._build_binds(workdir=workdir) write_extraction_prompt_file(workdir=workdir, prompt=prompt) command = _strip_harness_binary( @@ -188,13 +210,28 @@ def _resolve_model(self) -> str: return from_env return self._harness.default_model() - def _build_container_env(self, *, otel_port: int) -> dict[str, str]: + def _build_container_env( + self, + *, + otel_port: int, + job: ExtractionJobRecord | None = None, + tenant_id: str = "", + workload_token: str = "", + ) -> dict[str, str]: model = self._resolve_model() env: dict[str, str] = { "DISABLE_AUTOUPDATER": "1", "AGENT_MODEL": model, self._harness.model_env_var(): model, } + if workload_token.strip(): + env["KARTOGRAPH_WORKLOAD_TOKEN"] = workload_token.strip() + env["KARTOGRAPH_API_BASE_URL"] = self._settings.agentic_ci_api_base_url.rstrip("/") + if job is not None: + env["KARTOGRAPH_KNOWLEDGE_GRAPH_ID"] = job.knowledge_graph_id + if tenant_id.strip(): + env["KARTOGRAPH_TENANT_ID"] = tenant_id.strip() + env["KARTOGRAPH_WORKSPACE"] = "/workspace" if self._harness.auth_mode == "api-key": api_key = os.environ.get("ANTHROPIC_API_KEY", "").strip() if api_key: diff --git a/src/api/extraction/infrastructure/extraction_job_prompt.py b/src/api/extraction/infrastructure/extraction_job_prompt.py index 33e3dae59..63a03c2c0 100644 --- a/src/api/extraction/infrastructure/extraction_job_prompt.py +++ b/src/api/extraction/infrastructure/extraction_job_prompt.py @@ -81,8 +81,10 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: "This container has no Kartograph MCP tools. Use the bundled helper script:", f"- Validate: `bash {MUTATIONS_HELPER} validate mutations/<batch>.jsonl`", f"- Apply: `bash {MUTATIONS_HELPER} apply mutations/<batch>.jsonl`", - "The helper reads api_base_url and workload_token from job-context.json, calls the", - "workload API, and writes mutations/result.json (the CI verdict artifact).", + "The helper reads api_base_url and workload_token from job-context.json (also exported", + "as KARTOGRAPH_WORKLOAD_TOKEN, KARTOGRAPH_API_BASE_URL, KARTOGRAPH_KNOWLEDGE_GRAPH_ID,", + "and KARTOGRAPH_TENANT_ID in the container environment), calls the workload API, and", + "writes mutations/result.json (the CI verdict artifact).", "Always validate before apply. Do not finish until apply succeeds.", "", "Manual curl (only if helper fails): base `{api_base_url}/extraction/workloads`,", diff --git a/src/api/extraction/infrastructure/extraction_job_runner_factory.py b/src/api/extraction/infrastructure/extraction_job_runner_factory.py index bb5531689..f7dda8937 100644 --- a/src/api/extraction/infrastructure/extraction_job_runner_factory.py +++ b/src/api/extraction/infrastructure/extraction_job_runner_factory.py @@ -33,9 +33,15 @@ def create_extraction_job_runner( session=session, job_package_work_dir=Path(resolved.job_package_work_dir), ) + from infrastructure.job_packages.archive_hydrator import JobPackageArchiveHydrator + materializer = ExtractionJobWorkdirMaterializer( settings=resolved, prepared_job_package_reader=prepared_reader, + archive_hydrator=JobPackageArchiveHydrator( + session=session, + job_package_work_dir=Path(resolved.job_package_work_dir), + ), ) return AgenticCiExtractionJobRunner( settings=resolved, diff --git a/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py index 835ed8cce..2842195c0 100644 --- a/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py +++ b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py @@ -23,6 +23,7 @@ ) from extraction.infrastructure.extraction_job_workdir_layout import prepare_agentic_ci_workspace from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader +from infrastructure.job_packages.archive_hydrator import JobPackageArchiveHydrator from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings from extraction.ports.runtime import ScopedWorkloadCredentials @@ -36,11 +37,13 @@ def __init__( settings: ExtractionWorkloadRuntimeSettings, prepared_job_package_reader: SqlPreparedJobPackageReader, probe: ExtractionJobProbe | None = None, + archive_hydrator: JobPackageArchiveHydrator | None = None, ) -> None: self._settings = settings self._prepared_job_package_reader = prepared_job_package_reader self._job_package_work_dir = Path(settings.job_package_work_dir) self._probe = probe or LoggingExtractionJobProbe() + self._archive_hydrator = archive_hydrator async def prepare( self, @@ -56,6 +59,14 @@ async def prepare( repository_files_dir = job_root / "repository-files" repository_files_dir.mkdir(parents=True, exist_ok=True) + hydration_warnings: list[str] = [] + if self._archive_hydrator is not None: + hydration = await self._archive_hydrator.ensure_for_knowledge_graph( + knowledge_graph_id=job.knowledge_graph_id, + tenant_id=tenant_id, + ) + hydration_warnings.extend(hydration.errors) + job_packages = await self._prepared_job_package_reader.list_latest_for_knowledge_graph( knowledge_graph_id=job.knowledge_graph_id, ) @@ -66,6 +77,12 @@ async def prepare( job_packages=job_packages, packages_by_id=packages_by_id, ) + if hydration_warnings: + materialization = materialization.merge( + RepositoryFilesMaterializationResult( + warnings=tuple(hydration_warnings), + ) + ) write_sources_index( job_root=job_root, knowledge_graph_id=job.knowledge_graph_id, diff --git a/src/api/extraction/infrastructure/ingestion_readiness_reader.py b/src/api/extraction/infrastructure/ingestion_readiness_reader.py index a89908379..76df461e7 100644 --- a/src/api/extraction/infrastructure/ingestion_readiness_reader.py +++ b/src/api/extraction/infrastructure/ingestion_readiness_reader.py @@ -2,35 +2,35 @@ from __future__ import annotations -from sqlalchemy import text +from pathlib import Path + from sqlalchemy.ext.asyncio import AsyncSession from extraction.domain.value_objects import IngestionReadinessSnapshot +from infrastructure.job_packages.readiness import materialized_data_source_counts class SqlIngestionReadinessReader: """Reads prepared data source counts from the shared data_sources table.""" - def __init__(self, *, session: AsyncSession) -> None: + def __init__( + self, + *, + session: AsyncSession, + job_package_work_dir_path: Path | None = None, + ) -> None: self._session = session + self._job_package_work_dir_path = job_package_work_dir_path async def read_for_knowledge_graph( self, *, knowledge_graph_id: str ) -> IngestionReadinessSnapshot: - result = await self._session.execute( - text( - """ - SELECT - COUNT(*) AS total, - COUNT(*) FILTER (WHERE last_prepared_commit IS NOT NULL) AS prepared - FROM data_sources - WHERE knowledge_graph_id = :knowledge_graph_id - """ - ), - {"knowledge_graph_id": knowledge_graph_id}, + total, prepared = await materialized_data_source_counts( + session=self._session, + knowledge_graph_id=knowledge_graph_id, + job_package_work_dir_path=self._job_package_work_dir_path, ) - row = result.one() return IngestionReadinessSnapshot( - data_source_count=int(row.total or 0), - prepared_source_count=int(row.prepared or 0), + data_source_count=total, + prepared_source_count=prepared, ) diff --git a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py index 50b8896a6..0d816c2b1 100644 --- a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py +++ b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py @@ -2,6 +2,7 @@ from __future__ import annotations +from infrastructure.job_packages.archive_hydrator import JobPackageArchiveHydrator from extraction.infrastructure.sticky_session_workdir_materializer import ( StickySessionWorkdirMaterializer, ) @@ -25,11 +26,13 @@ def __init__( prepared_job_package_reader: IPreparedJobPackageReader, workdir_materializer: StickySessionWorkdirMaterializer, runtime_settings: ExtractionWorkloadRuntimeSettings | None = None, + archive_hydrator: JobPackageArchiveHydrator | None = None, ) -> None: self._credential_issuer = credential_issuer self._prepared_job_package_reader = prepared_job_package_reader self._workdir_materializer = workdir_materializer self._runtime_settings = runtime_settings or get_extraction_workload_runtime_settings() + self._archive_hydrator = archive_hydrator async def resolve_job_packages( self, @@ -57,6 +60,11 @@ async def build( job_packages: tuple[PreparedJobPackageSource, ...] = () if include_job_packages: + if self._archive_hydrator is not None: + await self._archive_hydrator.ensure_for_knowledge_graph( + knowledge_graph_id=knowledge_graph_id, + tenant_id=tenant_id, + ) job_packages = await self._prepared_job_package_reader.list_latest_for_knowledge_graph( knowledge_graph_id=knowledge_graph_id, ) diff --git a/src/api/infrastructure/job_packages/__init__.py b/src/api/infrastructure/job_packages/__init__.py new file mode 100644 index 000000000..9137e1577 --- /dev/null +++ b/src/api/infrastructure/job_packages/__init__.py @@ -0,0 +1 @@ +"""Cross-context JobPackage archive helpers.""" diff --git a/src/api/infrastructure/job_packages/archive_hydrator.py b/src/api/infrastructure/job_packages/archive_hydrator.py new file mode 100644 index 000000000..2a86c5ae3 --- /dev/null +++ b/src/api/infrastructure/job_packages/archive_hydrator.py @@ -0,0 +1,237 @@ +"""Re-materialize missing JobPackage archives before workspace preparation.""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession +from ulid import ULID + +from management.infrastructure.job_package_archive_reader import SqlJobPackageArchiveReader +from shared_kernel.job_package.archive_availability import job_package_archive_exists + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class JobPackageArchiveHydrationResult: + """Outcome of ensuring JobPackage ZIP archives exist on disk.""" + + hydrated_count: int + skipped_count: int + errors: tuple[str, ...] + + +class JobPackageArchiveHydrator: + """Ensure JobPackage ZIP archives exist for every data source on a knowledge graph.""" + + def __init__( + self, + *, + session: AsyncSession, + job_package_work_dir: Path, + ) -> None: + self._session = session + self._job_package_work_dir = job_package_work_dir + self._archive_reader = SqlJobPackageArchiveReader( + session=session, + job_package_work_dir=job_package_work_dir, + ) + + async def ensure_for_knowledge_graph( + self, + *, + knowledge_graph_id: str, + tenant_id: str, + ) -> JobPackageArchiveHydrationResult: + """Re-run ingest-only ingestion for data sources whose archives are missing.""" + rows = await self._load_data_sources(knowledge_graph_id=knowledge_graph_id) + if not rows: + return JobPackageArchiveHydrationResult( + hydrated_count=0, + skipped_count=0, + errors=(), + ) + + hydrated = 0 + skipped = 0 + errors: list[str] = [] + for row in rows: + data_source_id = str(row["id"]) + package_id = await self._archive_reader.latest_job_package_id_for_data_source( + data_source_id=data_source_id, + ) + if job_package_archive_exists( + work_dir=self._job_package_work_dir, + job_package_id=package_id, + ): + skipped += 1 + continue + try: + await self._hydrate_data_source( + row=row, + knowledge_graph_id=knowledge_graph_id, + tenant_id=tenant_id, + ) + hydrated += 1 + except Exception as exc: # noqa: BLE001 + name = str(row.get("name") or data_source_id) + message = f"{name}: {exc}" + logger.exception( + "job_package_archive_hydration_failed data_source_id=%s kg_id=%s", + data_source_id, + knowledge_graph_id, + ) + errors.append(message) + + if hydrated: + logger.info( + "job_package_archives_hydrated kg_id=%s hydrated=%s skipped=%s", + knowledge_graph_id, + hydrated, + skipped, + ) + return JobPackageArchiveHydrationResult( + hydrated_count=hydrated, + skipped_count=skipped, + errors=tuple(errors), + ) + + async def _load_data_sources(self, *, knowledge_graph_id: str) -> list[dict[str, Any]]: + result = await self._session.execute( + text( + """ + SELECT + id, + name, + adapter_type, + connection_config, + credentials_path, + clone_head_commit, + last_prepared_commit + FROM data_sources + WHERE knowledge_graph_id = :knowledge_graph_id + ORDER BY name + """ + ), + {"knowledge_graph_id": knowledge_graph_id}, + ) + rows: list[dict[str, Any]] = [] + for row in result.fetchall(): + connection_config = row.connection_config or {} + if not isinstance(connection_config, dict): + connection_config = dict(connection_config) + rows.append( + { + "id": str(row.id), + "name": str(row.name or ""), + "adapter_type": str(row.adapter_type or ""), + "connection_config": connection_config, + "credentials_path": row.credentials_path, + "clone_head_commit": row.clone_head_commit, + "last_prepared_commit": row.last_prepared_commit, + } + ) + return rows + + async def _hydrate_data_source( + self, + *, + row: dict[str, Any], + knowledge_graph_id: str, + tenant_id: str, + ) -> None: + from infrastructure.outbox.repository import OutboxRepository + from infrastructure.settings import get_management_settings + from ingestion.application.services.ingestion_service import IngestionService + from ingestion.infrastructure.adapters.github import GitHubAdapter + from management.infrastructure.repositories.fernet_secret_store import FernetSecretStore + + data_source_id = str(row["id"]) + adapter_type = str(row["adapter_type"]) + credentials_path = row.get("credentials_path") + credentials: dict[str, str] = {} + if credentials_path: + mgmt_settings = get_management_settings() + encryption_keys = [ + key.strip() + for key in mgmt_settings.encryption_key.get_secret_value().split(",") + if key.strip() + ] + if not encryption_keys: + raise RuntimeError("No encryption keys configured for credential retrieval") + credential_reader = FernetSecretStore( + session=self._session, + encryption_keys=encryption_keys, + ) + credentials = await credential_reader.retrieve( + path=str(credentials_path), + tenant_id=tenant_id, + ) + + ingestion_service = IngestionService( + adapter_registry={"github": GitHubAdapter()}, + work_dir=self._job_package_work_dir, + ) + sync_run_id = str(ULID()) + ingestion_result = await ingestion_service.run( + sync_run_id=sync_run_id, + data_source_id=data_source_id, + knowledge_graph_id=knowledge_graph_id, + adapter_type=adapter_type, + connection_config=dict(row.get("connection_config") or {}), + credentials_path=str(credentials_path) if credentials_path else None, + tenant_id=tenant_id, + credentials=credentials, + baseline_commit=row.get("clone_head_commit") or row.get("last_prepared_commit"), + pipeline_mode="ingest_only", + ) + if ingestion_result.entry_count <= 0: + raise RuntimeError( + "Ingestion produced an empty JobPackage; verify data source connectivity" + ) + + now = datetime.now(UTC) + outbox = OutboxRepository(session=self._session) + await outbox.append( + event_type="IngestionPrepared", + payload={ + "sync_run_id": sync_run_id, + "data_source_id": data_source_id, + "knowledge_graph_id": knowledge_graph_id, + "job_package_id": str(ingestion_result.job_package_id), + "prepared_commit_sha": ingestion_result.prepared_commit_sha, + "prepared_file_count": ingestion_result.branch_file_count, + "changeset_entry_count": ingestion_result.entry_count, + "occurred_at": now.isoformat(), + "hydrated": True, + }, + occurred_at=now, + aggregate_type="sync_run", + aggregate_id=sync_run_id, + ) + await self._session.execute( + text( + """ + UPDATE data_sources + SET + last_prepared_commit = :prepared_commit, + last_prepared_file_count = :prepared_file_count, + clone_head_commit = COALESCE(:prepared_commit, clone_head_commit), + updated_at = :updated_at + WHERE id = :data_source_id + """ + ), + { + "data_source_id": data_source_id, + "prepared_commit": ingestion_result.prepared_commit_sha, + "prepared_file_count": ingestion_result.branch_file_count, + "updated_at": now, + }, + ) + await self._session.commit() diff --git a/src/api/infrastructure/job_packages/readiness.py b/src/api/infrastructure/job_packages/readiness.py new file mode 100644 index 000000000..175fad8a2 --- /dev/null +++ b/src/api/infrastructure/job_packages/readiness.py @@ -0,0 +1,48 @@ +"""JobPackage readiness counts based on on-disk archives.""" + +from __future__ import annotations + +from pathlib import Path + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +from management.infrastructure.job_package_archive_reader import SqlJobPackageArchiveReader +from shared_kernel.job_package.archive_availability import ( + job_package_archive_exists, + job_package_work_dir, +) + + +async def materialized_data_source_counts( + *, + session: AsyncSession, + knowledge_graph_id: str, + job_package_work_dir_path: Path | None = None, +) -> tuple[int, int]: + """Return (total_data_sources, data_sources_with_materializable_archives).""" + work_dir = job_package_work_dir_path or job_package_work_dir() + result = await session.execute( + text( + """ + SELECT id + FROM data_sources + WHERE knowledge_graph_id = :knowledge_graph_id + ORDER BY name + """ + ), + {"knowledge_graph_id": knowledge_graph_id}, + ) + data_source_ids = [str(row.id) for row in result.fetchall()] + archive_reader = SqlJobPackageArchiveReader( + session=session, + job_package_work_dir=work_dir, + ) + prepared_count = 0 + for data_source_id in data_source_ids: + package_id = await archive_reader.latest_job_package_id_for_data_source( + data_source_id=data_source_id, + ) + if job_package_archive_exists(work_dir=work_dir, job_package_id=package_id): + prepared_count += 1 + return len(data_source_ids), prepared_count diff --git a/src/api/tests/unit/infrastructure/job_packages/test_archive_hydrator.py b/src/api/tests/unit/infrastructure/job_packages/test_archive_hydrator.py new file mode 100644 index 000000000..be9d902db --- /dev/null +++ b/src/api/tests/unit/infrastructure/job_packages/test_archive_hydrator.py @@ -0,0 +1,138 @@ +"""Unit tests for JobPackage archive hydration.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from infrastructure.job_packages.archive_hydrator import JobPackageArchiveHydrator +from shared_kernel.job_package.builder import JobPackageBuilder +from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, + ChangeOperation, + ChangesetEntry, + JobPackageId, + SyncMode, +) + + +def _write_package(work_dir: Path, package_id: str) -> None: + builder = JobPackageBuilder( + data_source_id="ds-1", + knowledge_graph_id="kg-1", + sync_mode=SyncMode.FULL_REFRESH, + package_id=JobPackageId(value=package_id), + ) + ref = builder.add_content(b"print('hello')\n") + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id="file-1", + type="io.kartograph.change.file", + path="pkg/example.go", + content_ref=ref, + content_type="text/plain", + metadata={}, + ) + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "abc"})) + builder.build(work_dir) + + +def _mock_session(*, data_sources: list[dict]) -> AsyncMock: + ds_result = MagicMock() + ds_result.fetchall.return_value = [MagicMock(**row) for row in data_sources] + session = AsyncMock() + session.execute = AsyncMock(return_value=ds_result) + session.commit = AsyncMock() + return session + + +@pytest.mark.asyncio +async def test_hydrator_skips_when_archive_exists(tmp_path: Path) -> None: + package_id = "01JFULL0000000000000000000" + _write_package(tmp_path, package_id) + session = _mock_session( + data_sources=[ + { + "id": "ds-1", + "name": "hyperfleet-e2e", + "adapter_type": "github", + "connection_config": {}, + "credentials_path": None, + "clone_head_commit": "abc", + "last_prepared_commit": "abc", + } + ] + ) + hydrator = JobPackageArchiveHydrator( + session=session, + job_package_work_dir=tmp_path, + ) + with patch.object( + hydrator._archive_reader, + "latest_job_package_id_for_data_source", + AsyncMock(return_value=package_id), + ): + result = await hydrator.ensure_for_knowledge_graph( + knowledge_graph_id="kg-1", + tenant_id="tenant-1", + ) + + assert result.hydrated_count == 0 + assert result.skipped_count == 1 + assert result.errors == () + + +@pytest.mark.asyncio +async def test_hydrator_runs_ingestion_when_archive_missing(tmp_path: Path) -> None: + session = _mock_session( + data_sources=[ + { + "id": "ds-1", + "name": "hyperfleet-e2e", + "adapter_type": "github", + "connection_config": {"owner": "org", "repo": "repo"}, + "credentials_path": None, + "clone_head_commit": "abc", + "last_prepared_commit": "abc", + } + ] + ) + hydrator = JobPackageArchiveHydrator( + session=session, + job_package_work_dir=tmp_path, + ) + ingestion_result = MagicMock( + job_package_id=JobPackageId(value="01JHYDRATED000000000000000"), + entry_count=2, + branch_file_count=10, + prepared_commit_sha="def", + ) + with patch.object( + hydrator._archive_reader, + "latest_job_package_id_for_data_source", + AsyncMock(return_value=None), + ), patch( + "ingestion.application.services.ingestion_service.IngestionService" + ) as ingestion_cls, patch( + "infrastructure.outbox.repository.OutboxRepository" + ) as outbox_cls: + ingestion_service = AsyncMock() + ingestion_service.run = AsyncMock(return_value=ingestion_result) + ingestion_cls.return_value = ingestion_service + outbox = AsyncMock() + outbox.append = AsyncMock() + outbox_cls.return_value = outbox + + result = await hydrator.ensure_for_knowledge_graph( + knowledge_graph_id="kg-1", + tenant_id="tenant-1", + ) + + assert result.hydrated_count == 1 + assert result.skipped_count == 0 + ingestion_service.run.assert_awaited_once() + outbox.append.assert_awaited_once() From a86d892baa9cac1a7ff5844c80442f95759e7a4c Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 12 Jun 2026 15:37:14 -0400 Subject: [PATCH 127/153] feat(extraction): archive jobs, schema-driven GMA, and fix GitHub prepare Persist successful extraction jobs as archived with mutation history and surface that in graph management. Validate relationship authoring against ontology and merge token/graph-write metrics from JSONL and agent streams. Use tarball-based GitHub full refresh with auth fallback, and order sync runs newest-first so prepare retries show accurate UI state. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 8 +- src/agent-runtime/tests/test_agent_prompt.py | 2 +- .../archived_extraction_history.py | 74 +++++ .../application/skill_resolution_service.py | 19 +- src/api/extraction/domain/extraction_job.py | 17 ++ .../agentic_ci_extraction_job_runner.py | 68 ++--- .../infrastructure/extraction_job_activity.py | 3 +- .../infrastructure/extraction_job_metrics.py | 78 +++++- .../extraction_job_mutation_metrics.py | 104 +++++++ .../infrastructure/extraction_job_prompt.py | 6 +- .../infrastructure/models/extraction_job.py | 4 + .../repositories/extraction_job_repository.py | 80 +++++- .../extraction_jobs_service.py | 4 + .../management/extraction_jobs_service.py | 63 +++++ ...6n7o8_add_extraction_job_archive_fields.py | 42 +++ .../infrastructure/adapters/github.py | 210 +++++++++++--- src/api/main.py | 1 + .../domain/extraction_job_config.py | 22 ++ .../extraction_relationship_authoring.py | 165 +++++++++++ .../data_source_sync_run_repository.py | 6 +- .../extraction_jobs_routes.py | 38 +++ .../test_data_source_sync_run_repository.py | 60 ++++ .../test_archived_extraction_history.py | 47 ++++ .../test_skill_resolution_service.py | 3 +- .../test_extraction_job_mutation_metrics.py | 83 ++++++ .../adapters/test_github_adapter.py | 196 +++++++++---- .../test_extraction_relationship_authoring.py | 50 ++++ .../GraphExtractionArchivedHistory.vue | 264 ++++++++++++++++++ .../GraphExtractionJobWatchDialog.vue | 4 + .../GraphExtractionJobsWorkspace.vue | 11 +- src/dev-ui/app/pages/data-sources/index.vue | 9 +- .../[kgId]/data-sources/index.vue | 27 +- .../pages/knowledge-graphs/[kgId]/manage.vue | 188 +------------ .../app/tests/kg-data-sources-phase1.test.ts | 26 ++ src/dev-ui/app/utils/kgDataSourcesSync.ts | 14 +- src/dev-ui/app/utils/kgManageState.ts | 8 +- src/dev-ui/app/utils/kgManageWorkspace.ts | 2 +- 37 files changed, 1628 insertions(+), 378 deletions(-) create mode 100644 src/api/extraction/application/archived_extraction_history.py create mode 100644 src/api/extraction/infrastructure/extraction_job_mutation_metrics.py create mode 100644 src/api/infrastructure/migrations/versions/j3k4l5m6n7o8_add_extraction_job_archive_fields.py create mode 100644 src/api/tests/unit/extraction/application/test_archived_extraction_history.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_mutation_metrics.py create mode 100644 src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 93f8478c9..9c0ce3db0 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -74,9 +74,10 @@ ### Per-instance description (by_instances job sets) Before drafting, call `kartograph_get_extraction_jobs_config` and read +`entity_type_authoring_context.{EntityType}` for exact property names plus `relationship_authoring_by_entity_type.{EntityType}` — it lists exact `owned` line prefixes -and `ignored` ignore_line text from live instance counts. Copy those lines; do not invent -relationship targets from the raw ontology alone. +and `ignored` ignore_line text from live instance counts and the real ontology. Copy those lines; +do not invent relationship labels or property names from memory. Use this template (substitute real entity and relationship names): @@ -98,8 +99,7 @@ **Ownership rule:** include `{EntityType} -> {rel} -> {Counterpart}` as an active line only when {EntityType} has MORE live instances than {Counterpart}. When the counterpart has more (or equal), -use an IGNORE line only — e.g. Adapter (19) owns `operates_on -> Resource` (9) but must IGNORE -`verifies_inverse -> ComponentTest` (1264 instances). +use an IGNORE line only — copy the exact lines from `relationship_authoring_by_entity_type`. Do **not** use theme-only sections (Implementation Analysis, Configuration Details, etc.). When the operator approves, save via `kartograph_save_extraction_jobs_config`. diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index 545c0e784..fbb157042 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -130,4 +130,4 @@ def test_build_agent_system_prompt_compact_extraction_jobs_keeps_description_aut assert "**per_instance_description_authoring**" in prompt assert "IGNORE lines" in prompt assert "relationship_authoring_by_entity_type" in prompt - assert "verifies_inverse -> ComponentTest" in prompt + assert "entity_type_authoring_context" in prompt diff --git a/src/api/extraction/application/archived_extraction_history.py b/src/api/extraction/application/archived_extraction_history.py new file mode 100644 index 000000000..b495525b1 --- /dev/null +++ b/src/api/extraction/application/archived_extraction_history.py @@ -0,0 +1,74 @@ +"""Shape archived extraction jobs for mutation history UI.""" + +from __future__ import annotations + +from typing import Any + +from extraction.domain.extraction_job import ExtractionJobRecord + + +def serialize_archived_job(job: ExtractionJobRecord) -> dict[str, Any]: + return { + **job.to_dict(), + "jobId": job.job_id, + "jobSet": job.job_set_name, + "writeOps": job.write_ops(), + "hasMutations": bool(job.applied_mutations_jsonl), + } + + +def group_archived_jobs_by_run_and_set( + jobs: list[ExtractionJobRecord], +) -> list[dict[str, Any]]: + """Group archived jobs by extraction run start, then job set name.""" + runs: dict[str, dict[str, Any]] = {} + for job in jobs: + run_key = job.run_started_at.isoformat() if job.run_started_at else "unknown-run" + if run_key not in runs: + runs[run_key] = { + "runStartedAt": job.run_started_at.isoformat() if job.run_started_at else None, + "jobSets": {}, + "jobCount": 0, + "writeOps": 0, + "inputTokens": 0, + "outputTokens": 0, + "costUsd": 0.0, + } + run = runs[run_key] + set_name = job.job_set_name + job_sets: dict[str, list[dict[str, Any]]] = run["jobSets"] + if set_name not in job_sets: + job_sets[set_name] = [] + job_sets[set_name].append(serialize_archived_job(job)) + run["jobCount"] += 1 + run["writeOps"] += job.write_ops() + run["inputTokens"] += job.input_tokens + run["outputTokens"] += job.output_tokens + run["costUsd"] += job.cost_usd + + grouped: list[dict[str, Any]] = [] + for run_key in sorted(runs.keys(), reverse=True): + run = runs[run_key] + job_sets_payload = [] + for set_name in sorted(run["jobSets"].keys()): + archived_jobs = run["jobSets"][set_name] + job_sets_payload.append( + { + "jobSet": set_name, + "jobs": archived_jobs, + "jobCount": len(archived_jobs), + "writeOps": sum(int(job.get("writeOps") or 0) for job in archived_jobs), + } + ) + grouped.append( + { + "runStartedAt": run["runStartedAt"], + "jobCount": run["jobCount"], + "writeOps": run["writeOps"], + "inputTokens": run["inputTokens"], + "outputTokens": run["outputTokens"], + "costUsd": round(float(run["costUsd"]), 6), + "jobSets": job_sets_payload, + } + ) + return grouped diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 0b2292e05..a3b0c28eb 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -155,19 +155,12 @@ class ResolvedExtractionSkillPack: ), "per_instance_description_authoring": ( "The job set `description` is the shared per-instance brief for every by_instances job. " - "Before drafting, call kartograph_get_schema_ontology for the target entity_type and " - "kartograph_get_extraction_jobs_config (or plan summary entity_types) for live instance " - "counts. Enumerate every property on the target entity type. For relationships, apply " - "the duplicate-work rule by default: when EntityX relates to EntityY, include a " - "'{EntityX} -> {relationship_label} -> {EntityY}:' line only if EntityX has MORE live " - "instances than EntityY. When EntityX has fewer or equal instances, do not omit " - "silently — add an explicit ignore line (see template below). The higher-volume " - "counterpart type's job set owns that edge work. " - "Example (use real counts from entity_types): Adapter (19 instances) vs Resource (9) " - "vs ComponentTest (1264) — an Adapter job set includes " - "'Adapter -> operates_on -> Resource:' (19 > 9) and must include " - "'IGNORE Adapter -> verifies_inverse -> ComponentTest: handled by ComponentTest job " - "sets (1264 vs 19 instances). Do not create or update this edge in Adapter jobs.' " + "Before drafting, call kartograph_get_schema_ontology and " + "kartograph_get_extraction_jobs_config. Read " + "entity_type_authoring_context.{EntityType}.properties for the exact property " + "names and relationship_authoring_by_entity_type.{EntityType} for owned/ignored " + "relationship lines derived from live instance counts and the real ontology — " + "never invent relationship labels or property names. " "Compare counts numerically for every ontology relationship before writing each line. " "Write the description using this exact shape (replace with real ontology names — " "never use placeholder EntityX in saved text): " diff --git a/src/api/extraction/domain/extraction_job.py b/src/api/extraction/domain/extraction_job.py index 5babf15d1..f61cd41f9 100644 --- a/src/api/extraction/domain/extraction_job.py +++ b/src/api/extraction/domain/extraction_job.py @@ -14,6 +14,7 @@ class ExtractionJobStatus(StrEnum): PENDING = "pending" IN_PROGRESS = "in_progress" COMPLETED = "completed" + ARCHIVED = "archived" FAILED = "failed" @@ -102,6 +103,18 @@ class ExtractionJobRecord: entities_created: int = 0 entities_modified: int = 0 relationships_created: int = 0 + relationships_modified: int = 0 + run_started_at: datetime | None = None + archived_at: datetime | None = None + applied_mutations_jsonl: str | None = None + + def write_ops(self) -> int: + return ( + self.entities_created + + self.entities_modified + + self.relationships_created + + self.relationships_modified + ) def to_dict(self) -> dict[str, Any]: return { @@ -129,6 +142,10 @@ def to_dict(self) -> dict[str, Any]: "entities_created": self.entities_created, "entities_modified": self.entities_modified, "relationships_created": self.relationships_created, + "relationships_modified": self.relationships_modified, + "write_ops": self.write_ops(), + "run_started_at": self.run_started_at.isoformat() if self.run_started_at else None, + "archived_at": self.archived_at.isoformat() if self.archived_at else None, "instance_count": len(self.target_instances), "file_count": len(self.target_files), } diff --git a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py index 1c69ec77d..96e255104 100644 --- a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py @@ -23,7 +23,7 @@ format_activity_log_line, format_claude_code_stream_line, ) -from extraction.infrastructure.extraction_job_metrics import metrics_from_otel_log +from extraction.infrastructure.extraction_job_metrics import merge_extraction_job_metrics from extraction.infrastructure.extraction_job_prompt import ( EXTRACTION_JOB_INVOKE_PROMPT, build_extraction_job_prompt, @@ -98,13 +98,7 @@ async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, An ) _patch_job_context_api_base(workdir, self._settings.agentic_ci_api_base_url) prompt = build_extraction_job_prompt(job=job) - return await self._run_in_container( - job=job, - workdir=workdir, - prompt=prompt, - tenant_id=tenant_id, - workload_token=credentials.token, - ) + return await self._run_in_container(job=job, workdir=workdir, prompt=prompt) async def _run_in_container( self, @@ -112,27 +106,16 @@ async def _run_in_container( job: ExtractionJobRecord, workdir: Path, prompt: str, - tenant_id: str, - workload_token: str, ) -> dict[str, Any]: import asyncio - return await asyncio.to_thread( - self._run_in_container_sync, - job, - workdir, - prompt, - tenant_id, - workload_token, - ) + return await asyncio.to_thread(self._run_in_container_sync, job, workdir, prompt) def _run_in_container_sync( self, job: ExtractionJobRecord, workdir: Path, prompt: str, - tenant_id: str, - workload_token: str, ) -> dict[str, Any]: runtime = create_container_runtime(self._settings.container_engine) binary = getattr(runtime, "_binary", "podman") @@ -145,12 +128,7 @@ def _run_in_container_sync( try: otel_proc, otel_port, otel_log_path, _otel_rate = otel.start_collector(run_dir) otel_log = Path(otel_log_path) - env = self._build_container_env( - otel_port=otel_port, - job=job, - tenant_id=tenant_id, - workload_token=workload_token, - ) + env = self._build_container_env(otel_port=otel_port) binds = self._build_binds(workdir=workdir) write_extraction_prompt_file(workdir=workdir, prompt=prompt) command = _strip_harness_binary( @@ -172,7 +150,12 @@ def _run_in_container_sync( if otel_proc is not None: otel.stop_collector(otel_proc) otel_proc = None - metrics = metrics_from_otel_log(otel_log) if otel_log is not None else {} + log_path = activity_log_path(workdir) + metrics = merge_extraction_job_metrics( + otel_log=otel_log, + workdir=workdir, + activity_log=log_path, + ) if rc != 0: raise RuntimeError( f"agentic-ci container exited with code {rc} for job {job.job_id}" @@ -186,10 +169,7 @@ def _run_in_container_sync( "via workload API." ), ) - metrics = { - **metrics, - "operations_applied": verdict.operations_applied, - } + metrics["operations_applied"] = verdict.operations_applied return metrics finally: if otel_proc is not None: @@ -210,28 +190,13 @@ def _resolve_model(self) -> str: return from_env return self._harness.default_model() - def _build_container_env( - self, - *, - otel_port: int, - job: ExtractionJobRecord | None = None, - tenant_id: str = "", - workload_token: str = "", - ) -> dict[str, str]: + def _build_container_env(self, *, otel_port: int) -> dict[str, str]: model = self._resolve_model() env: dict[str, str] = { "DISABLE_AUTOUPDATER": "1", "AGENT_MODEL": model, self._harness.model_env_var(): model, } - if workload_token.strip(): - env["KARTOGRAPH_WORKLOAD_TOKEN"] = workload_token.strip() - env["KARTOGRAPH_API_BASE_URL"] = self._settings.agentic_ci_api_base_url.rstrip("/") - if job is not None: - env["KARTOGRAPH_KNOWLEDGE_GRAPH_ID"] = job.knowledge_graph_id - if tenant_id.strip(): - env["KARTOGRAPH_TENANT_ID"] = tenant_id.strip() - env["KARTOGRAPH_WORKSPACE"] = "/workspace" if self._harness.auth_mode == "api-key": api_key = os.environ.get("ANTHROPIC_API_KEY", "").strip() if api_key: @@ -353,9 +318,13 @@ def _run_foreground_streaming( bufsize=1, ) captured_tail: list[str] = [] + stream_log_path = activity_log_path.parent / "agent_stream.jsonl" try: assert proc.stdout is not None - with activity_log_path.open("a", encoding="utf-8") as log_handle: + with activity_log_path.open("a", encoding="utf-8") as log_handle, stream_log_path.open( + "a", + encoding="utf-8", + ) as stream_handle: for line in proc.stdout: if time.monotonic() - started > timeout_seconds: proc.kill() @@ -370,6 +339,9 @@ def _run_foreground_streaming( cleaned = line.rstrip("\n") if not cleaned: continue + if cleaned.startswith("{"): + stream_handle.write(cleaned + "\n") + stream_handle.flush() parsed = format_claude_code_stream_line(cleaned) if parsed: ts = datetime.now(UTC).isoformat() diff --git a/src/api/extraction/infrastructure/extraction_job_activity.py b/src/api/extraction/infrastructure/extraction_job_activity.py index f422e39c3..50d9b5463 100644 --- a/src/api/extraction/infrastructure/extraction_job_activity.py +++ b/src/api/extraction/infrastructure/extraction_job_activity.py @@ -233,7 +233,8 @@ def serialize_recent_job( "entitiesCreated": job.entities_created, "entitiesModified": job.entities_modified, "relationshipsCreated": job.relationships_created, - "writeOps": job.entities_created + job.entities_modified + job.relationships_created, + "relationshipsModified": job.relationships_modified, + "writeOps": job.write_ops(), "instanceCount": len(job.target_instances), "fileCount": len(job.target_files), "assistantPreview": preview, diff --git a/src/api/extraction/infrastructure/extraction_job_metrics.py b/src/api/extraction/infrastructure/extraction_job_metrics.py index db31b7a39..47514acbf 100644 --- a/src/api/extraction/infrastructure/extraction_job_metrics.py +++ b/src/api/extraction/infrastructure/extraction_job_metrics.py @@ -1,4 +1,4 @@ -"""Parse agentic-ci OTEL logs into extraction job metrics.""" +"""Parse agentic-ci OTEL logs and Claude stream output into extraction job metrics.""" from __future__ import annotations @@ -6,6 +6,78 @@ from pathlib import Path from typing import Any +from extraction.infrastructure.extraction_job_mutation_metrics import ( + applied_mutation_jsonl_from_workdir, + metrics_from_mutation_workdir, +) + + +def merge_extraction_job_metrics( + *, + otel_log: Path | None, + workdir: Path, + activity_log: Path | None = None, +) -> dict[str, Any]: + """Combine OTEL token metrics, Claude stream fallback, and applied JSONL graph writes.""" + metrics = metrics_from_otel_log(otel_log) if otel_log is not None else _empty_metrics() + if _token_total(metrics) == 0: + stream_log = workdir / "agent_stream.jsonl" + stream_metrics = metrics_from_claude_stream_log(stream_log) + if not stream_metrics and activity_log is not None: + stream_metrics = metrics_from_claude_stream_log(activity_log) + for key, value in stream_metrics.items(): + if key.startswith(("input_", "output_", "cache_", "cost_")) and value: + metrics[key] = value + + mutation_metrics = metrics_from_mutation_workdir(workdir) + metrics.update(mutation_metrics) + applied_jsonl = applied_mutation_jsonl_from_workdir(workdir) + if applied_jsonl: + metrics["applied_mutations_jsonl"] = applied_jsonl + return metrics + + +def metrics_from_claude_stream_log(activity_log: Path) -> dict[str, Any]: + """Extract token usage from claude-code JSONL result events in the activity log.""" + if not activity_log.is_file(): + return {} + usage: dict[str, Any] = {} + cost_usd = 0.0 + for line in activity_log.read_text(encoding="utf-8").splitlines(): + body = line.strip() + if " " in body and body[0].isdigit() and "T" in body.split(" ", 1)[0]: + _, _, body = body.partition(" ") + body = body.strip() + if not body.startswith("{"): + continue + try: + event = json.loads(body) + except json.JSONDecodeError: + continue + if str(event.get("type") or "") != "result": + continue + raw_usage = event.get("usage") + if isinstance(raw_usage, dict): + usage = raw_usage + total_cost = event.get("total_cost_usd") + if total_cost is not None: + cost_usd = float(total_cost) + + if not usage and cost_usd == 0.0: + return {} + + return { + "input_tokens": int(usage.get("input_tokens") or 0), + "output_tokens": int(usage.get("output_tokens") or 0), + "cache_read_tokens": int(usage.get("cache_read_input_tokens") or 0), + "cache_creation_tokens": int(usage.get("cache_creation_input_tokens") or 0), + "cost_usd": cost_usd, + } + + +def _token_total(metrics: dict[str, Any]) -> int: + return int(metrics.get("input_tokens") or 0) + int(metrics.get("output_tokens") or 0) + def metrics_from_otel_log(otel_log: Path) -> dict[str, Any]: """Extract token and cost metrics from an agentic-ci OTEL JSONL log.""" @@ -56,6 +128,8 @@ def metrics_from_otel_log(otel_log: Path) -> dict[str, Any]: "entities_created": 0, "entities_modified": 0, "relationships_created": 0, + "relationships_modified": 0, + "write_ops": 0, } @@ -69,4 +143,6 @@ def _empty_metrics() -> dict[str, Any]: "entities_created": 0, "entities_modified": 0, "relationships_created": 0, + "relationships_modified": 0, + "write_ops": 0, } diff --git a/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py new file mode 100644 index 000000000..149244d22 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py @@ -0,0 +1,104 @@ +"""Count graph instance write operations from applied extraction job JSONL.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from graph.domain.value_objects import EntityType, MutationOperationType + + +def metrics_from_mutation_jsonl(jsonl_content: str) -> dict[str, int]: + """Count instance CREATE/UPDATE operations; ignore schema DEFINE operations.""" + entities_created = 0 + entities_modified = 0 + relationships_created = 0 + relationships_modified = 0 + + for raw_line in jsonl_content.splitlines(): + line = raw_line.strip() + if not line: + continue + try: + row = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(row, dict): + continue + + op = str(row.get("op") or "").upper() + entity_type = str(row.get("type") or "").lower() + if op == MutationOperationType.DEFINE.value: + continue + if op not in { + MutationOperationType.CREATE.value, + MutationOperationType.UPDATE.value, + }: + continue + + if entity_type == EntityType.NODE.value: + if op == MutationOperationType.CREATE.value: + entities_created += 1 + else: + entities_modified += 1 + elif entity_type == EntityType.EDGE.value: + if op == MutationOperationType.CREATE.value: + relationships_created += 1 + else: + relationships_modified += 1 + + write_ops = ( + entities_created + + entities_modified + + relationships_created + + relationships_modified + ) + return { + "entities_created": entities_created, + "entities_modified": entities_modified, + "relationships_created": relationships_created, + "relationships_modified": relationships_modified, + "write_ops": write_ops, + } + + +def metrics_from_mutation_workdir(job_root: Path) -> dict[str, int]: + """Load graph write metrics from mutations/*.jsonl in a job workspace.""" + mutations_dir = job_root / "mutations" + if not mutations_dir.is_dir(): + return _empty_metrics() + + jsonl_files = sorted( + path for path in mutations_dir.glob("*.jsonl") if path.is_file() + ) + if not jsonl_files: + return _empty_metrics() + + combined = "\n".join( + path.read_text(encoding="utf-8") for path in jsonl_files + ) + return metrics_from_mutation_jsonl(combined) + + +def applied_mutation_jsonl_from_workdir(job_root: Path) -> str | None: + """Return concatenated applied JSONL content for archival.""" + mutations_dir = job_root / "mutations" + if not mutations_dir.is_dir(): + return None + jsonl_files = sorted(path for path in mutations_dir.glob("*.jsonl") if path.is_file()) + if not jsonl_files: + return None + parts = [path.read_text(encoding="utf-8") for path in jsonl_files] + content = "\n".join(part.rstrip("\n") for part in parts if part.strip()) + return content or None + + +def _empty_metrics() -> dict[str, int]: + return { + "entities_created": 0, + "entities_modified": 0, + "relationships_created": 0, + "relationships_modified": 0, + "write_ops": 0, + } diff --git a/src/api/extraction/infrastructure/extraction_job_prompt.py b/src/api/extraction/infrastructure/extraction_job_prompt.py index 63a03c2c0..33e3dae59 100644 --- a/src/api/extraction/infrastructure/extraction_job_prompt.py +++ b/src/api/extraction/infrastructure/extraction_job_prompt.py @@ -81,10 +81,8 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: "This container has no Kartograph MCP tools. Use the bundled helper script:", f"- Validate: `bash {MUTATIONS_HELPER} validate mutations/<batch>.jsonl`", f"- Apply: `bash {MUTATIONS_HELPER} apply mutations/<batch>.jsonl`", - "The helper reads api_base_url and workload_token from job-context.json (also exported", - "as KARTOGRAPH_WORKLOAD_TOKEN, KARTOGRAPH_API_BASE_URL, KARTOGRAPH_KNOWLEDGE_GRAPH_ID,", - "and KARTOGRAPH_TENANT_ID in the container environment), calls the workload API, and", - "writes mutations/result.json (the CI verdict artifact).", + "The helper reads api_base_url and workload_token from job-context.json, calls the", + "workload API, and writes mutations/result.json (the CI verdict artifact).", "Always validate before apply. Do not finish until apply succeeds.", "", "Manual curl (only if helper fails): base `{api_base_url}/extraction/workloads`,", diff --git a/src/api/extraction/infrastructure/models/extraction_job.py b/src/api/extraction/infrastructure/models/extraction_job.py index 7bf38a5fe..c34fb7fa9 100644 --- a/src/api/extraction/infrastructure/models/extraction_job.py +++ b/src/api/extraction/infrastructure/models/extraction_job.py @@ -39,6 +39,10 @@ class ExtractionJobModel(Base, TimestampMixin): entities_created: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) entities_modified: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) relationships_created: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + relationships_modified: Mapped[int] = mapped_column(sa.Integer(), nullable=False, default=0) + run_started_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) + archived_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) + applied_mutations_jsonl: Mapped[str | None] = mapped_column(sa.Text(), nullable=True) class ExtractionRunModel(Base, TimestampMixin): diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index 87a51bc99..54cb222e1 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -49,6 +49,10 @@ def _job_model_to_record(model: ExtractionJobModel) -> ExtractionJobRecord: entities_created=model.entities_created, entities_modified=model.entities_modified, relationships_created=model.relationships_created, + relationships_modified=model.relationships_modified, + run_started_at=model.run_started_at, + archived_at=model.archived_at, + applied_mutations_jsonl=model.applied_mutations_jsonl, ) @@ -371,9 +375,12 @@ async def claim_next_pending_job( model = result.scalar_one_or_none() if model is None: return None + run = await self.get_run(knowledge_graph_id=knowledge_graph_id) model.status = ExtractionJobStatus.IN_PROGRESS.value model.worker_id = worker_id model.started_at = datetime.now(UTC) + if run is not None and run.started_at is not None: + model.run_started_at = run.started_at model.attempt = int(model.attempt) + 1 await self._session.flush() return _job_model_to_record(model) @@ -386,24 +393,50 @@ async def mark_job_completed( metrics: dict[str, Any] | None = None, ) -> None: payload = metrics or {} + entities_created = int(payload.get("entities_created", 0)) + entities_modified = int(payload.get("entities_modified", 0)) + relationships_created = int(payload.get("relationships_created", 0)) + relationships_modified = int(payload.get("relationships_modified", 0)) + write_ops = int( + payload.get("write_ops") + or ( + entities_created + + entities_modified + + relationships_created + + relationships_modified + ) + ) + now = datetime.now(UTC) + status = ( + ExtractionJobStatus.ARCHIVED.value + if write_ops > 0 + else ExtractionJobStatus.COMPLETED.value + ) + values: dict[str, Any] = { + "status": status, + "completed_at": now, + "input_tokens": int(payload.get("input_tokens", 0)), + "output_tokens": int(payload.get("output_tokens", 0)), + "cache_read_tokens": int(payload.get("cache_read_tokens", 0)), + "cache_creation_tokens": int(payload.get("cache_creation_tokens", 0)), + "cost_usd": float(payload.get("cost_usd", 0.0)), + "entities_created": entities_created, + "entities_modified": entities_modified, + "relationships_created": relationships_created, + "relationships_modified": relationships_modified, + } + if write_ops > 0: + values["archived_at"] = now + applied_jsonl = payload.get("applied_mutations_jsonl") + if isinstance(applied_jsonl, str) and applied_jsonl.strip(): + values["applied_mutations_jsonl"] = applied_jsonl await self._session.execute( update(ExtractionJobModel) .where( ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, ExtractionJobModel.job_id == job_id, ) - .values( - status=ExtractionJobStatus.COMPLETED.value, - completed_at=datetime.now(UTC), - input_tokens=int(payload.get("input_tokens", 0)), - output_tokens=int(payload.get("output_tokens", 0)), - cache_read_tokens=int(payload.get("cache_read_tokens", 0)), - cache_creation_tokens=int(payload.get("cache_creation_tokens", 0)), - cost_usd=float(payload.get("cost_usd", 0.0)), - entities_created=int(payload.get("entities_created", 0)), - entities_modified=int(payload.get("entities_modified", 0)), - relationships_created=int(payload.get("relationships_created", 0)), - ) + .values(**values) ) async def mark_job_failed( @@ -486,6 +519,29 @@ async def reset_all_non_pending( ) return total + async def list_archived_jobs( + self, + *, + knowledge_graph_id: str, + limit: int = 500, + ) -> list[ExtractionJobRecord]: + stmt = ( + select(ExtractionJobModel) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == ExtractionJobStatus.ARCHIVED.value, + ) + .order_by( + ExtractionJobModel.run_started_at.desc().nullslast(), + ExtractionJobModel.archived_at.desc().nullslast(), + ExtractionJobModel.job_set_name.asc(), + ExtractionJobModel.order_index.asc(), + ) + .limit(limit) + ) + result = await self._session.execute(stmt) + return [_job_model_to_record(model) for model in result.scalars().all()] + async def aggregate_token_metrics(self, *, knowledge_graph_id: str) -> dict[str, float | int]: stmt = select( func.coalesce(func.sum(ExtractionJobModel.input_tokens), 0), diff --git a/src/api/infrastructure/extraction_workload/extraction_jobs_service.py b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py index 988cf19fd..56262edfd 100644 --- a/src/api/infrastructure/extraction_workload/extraction_jobs_service.py +++ b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py @@ -137,9 +137,13 @@ async def save_document( ) ontology = await self._knowledge_graph_repository.get_ontology(knowledge_graph_id) edge_types = edge_type_dicts_from_ontology(ontology) + from management.domain.extraction_relationship_authoring import node_type_dicts_from_ontology + + node_types = node_type_dicts_from_ontology(ontology) errors = document.validation_errors( entity_instance_counts=counts, edge_types=edge_types, + node_types=node_types, ) if errors: raise ValueError("; ".join(errors)) diff --git a/src/api/infrastructure/management/extraction_jobs_service.py b/src/api/infrastructure/management/extraction_jobs_service.py index b7fa82f90..9f37304e4 100644 --- a/src/api/infrastructure/management/extraction_jobs_service.py +++ b/src/api/infrastructure/management/extraction_jobs_service.py @@ -41,6 +41,8 @@ ) from management.domain.extraction_relationship_authoring import ( edge_type_dicts_from_ontology, + entity_type_authoring_context, + node_type_dicts_from_ontology, relationship_authoring_by_entity_type, ) from management.infrastructure.repositories.knowledge_graph_repository import ( @@ -114,10 +116,20 @@ async def get_extraction_jobs_document( ) ontology = await self._knowledge_graph_repository.get_ontology(kg_id) edge_types = edge_type_dicts_from_ontology(ontology) + node_types = node_type_dicts_from_ontology(ontology) entity_types = [ {"name": name, "instance_count": count} for name, count in sorted(counts.items(), key=lambda item: item[0]) ] + authoring_context = { + entity_type: entity_type_authoring_context( + entity_type, + node_types=node_types, + edge_types=edge_types, + entity_instance_counts=counts, + ) + for entity_type in counts + } return { **document.to_dict(), "entity_types": entity_types, @@ -125,6 +137,7 @@ async def get_extraction_jobs_document( entity_instance_counts=counts, edge_types=edge_types, ), + "entity_type_authoring_context": authoring_context, } async def save_extraction_jobs_document( @@ -152,9 +165,11 @@ async def save_extraction_jobs_document( ) ontology = await self._knowledge_graph_repository.get_ontology(kg_id) edge_types = edge_type_dicts_from_ontology(ontology) + node_types = node_type_dicts_from_ontology(ontology) errors = document.validation_errors( entity_instance_counts=counts, edge_types=edge_types, + node_types=node_types, ) if errors: raise ValueError("; ".join(errors)) @@ -339,6 +354,7 @@ async def get_database_status( "pending": counts.get("pending", 0), "in_progress": counts.get("in_progress", 0), "completed": counts.get("completed", 0), + "archived": counts.get("archived", 0), "failed": counts.get("failed", 0), }, "jobsBySet": jobs_by_set, @@ -549,6 +565,53 @@ async def reset_stale_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: ), } + async def get_archived_extraction_history( + self, + *, + user_id: str, + kg_id: str, + ) -> dict[str, Any] | None: + from extraction.application.archived_extraction_history import ( + group_archived_jobs_by_run_and_set, + ) + + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + return None + jobs = await self._extraction_job_repository.list_archived_jobs( + knowledge_graph_id=kg_id, + ) + runs = group_archived_jobs_by_run_and_set(jobs) + return { + "archivedJobCount": len(jobs), + "runs": runs, + } + + async def get_archived_job_mutations( + self, + *, + user_id: str, + kg_id: str, + job_id: str, + ) -> dict[str, Any] | None: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + if kg is None: + return None + job = await self._extraction_job_repository.get_by_job_id( + knowledge_graph_id=kg_id, + job_id=job_id, + ) + if job is None or job.status != ExtractionJobStatus.ARCHIVED: + return None + return { + "jobId": job.job_id, + "jobSet": job.job_set_name, + "runStartedAt": job.run_started_at.isoformat() if job.run_started_at else None, + "archivedAt": job.archived_at.isoformat() if job.archived_at else None, + "jsonl": job.applied_mutations_jsonl or "", + "writeOps": job.write_ops(), + } + async def reset_completed_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: _ = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) reset = await self._extraction_job_repository.reset_jobs_by_status( diff --git a/src/api/infrastructure/migrations/versions/j3k4l5m6n7o8_add_extraction_job_archive_fields.py b/src/api/infrastructure/migrations/versions/j3k4l5m6n7o8_add_extraction_job_archive_fields.py new file mode 100644 index 000000000..f82b836f5 --- /dev/null +++ b/src/api/infrastructure/migrations/versions/j3k4l5m6n7o8_add_extraction_job_archive_fields.py @@ -0,0 +1,42 @@ +"""Add archive and graph-write fields to extraction jobs. + +Revision ID: j3k4l5m6n7o8 +Revises: i2j3k4l5m6n7 +Create Date: 2026-06-12 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "j3k4l5m6n7o8" +down_revision: Union[str, Sequence[str], None] = "i2j3k4l5m6n7" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "extraction_jobs", + sa.Column("relationships_modified", sa.Integer(), nullable=False, server_default="0"), + ) + op.add_column( + "extraction_jobs", + sa.Column("run_started_at", sa.DateTime(timezone=True), nullable=True), + ) + op.add_column( + "extraction_jobs", + sa.Column("archived_at", sa.DateTime(timezone=True), nullable=True), + ) + op.add_column( + "extraction_jobs", + sa.Column("applied_mutations_jsonl", sa.Text(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("extraction_jobs", "applied_mutations_jsonl") + op.drop_column("extraction_jobs", "archived_at") + op.drop_column("extraction_jobs", "run_started_at") + op.drop_column("extraction_jobs", "relationships_modified") diff --git a/src/api/ingestion/infrastructure/adapters/github.py b/src/api/ingestion/infrastructure/adapters/github.py index 8374e1e16..f4632223c 100644 --- a/src/api/ingestion/infrastructure/adapters/github.py +++ b/src/api/ingestion/infrastructure/adapters/github.py @@ -4,15 +4,16 @@ producing raw content and changeset entries for packaging into a JobPackage. Supports: -- Full refresh: fetches all blobs from the repository tree. +- Full refresh: downloads a repository tarball (one archive fetch). - Incremental sync: uses the GitHub Compare API to find only files that changed since the previous checkpoint commit SHA. API endpoints used: - GET /repos/{owner}/{repo}/branches/{branch} — resolve branch to commit SHA -- GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1 — full tree (blobs) +- GET /repos/{owner}/{repo}/tarball/{ref} — full refresh archive download +- GET /repos/{owner}/{repo}/git/trees/{sha}?recursive=1 — branch file counts - GET /repos/{owner}/{repo}/compare/{base}...{head} — changed files -- GET /repos/{owner}/{repo}/git/blobs/{sha} — raw file content (base64) +- GET /repos/{owner}/{repo}/git/blobs/{sha} — incremental blob content (base64) dlt integration note: this adapter class provides the extraction contract (IDatasourceAdapter). The Ingestion service (a future task) wraps this adapter @@ -25,7 +26,10 @@ import asyncio import base64 +import io +import json import mimetypes +import tarfile from typing import Any import httpx @@ -41,6 +45,7 @@ # GitHub REST API base URL _GITHUB_API_BASE = "https://api.github.com" +_USER_AGENT = "Kartograph-GitHub-Ingestion/1.0" # Version of the checkpoint schema this adapter understands. # Bump on backwards-incompatible checkpoint changes; callers should @@ -129,6 +134,17 @@ def _parse_connection_config( "connection_config must include either 'repo_url' or 'owner'+'repo' keys" ) + @staticmethod + def _github_headers(token: str) -> dict[str, str]: + headers = { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + "User-Agent": _USER_AGENT, + } + if token: + headers["Authorization"] = f"Bearer {token}" + return headers + async def extract( self, connection_config: dict[str, str], @@ -163,10 +179,6 @@ async def extract( """ owner, repo, branch = self._parse_connection_config(connection_config) token = credentials.get("token") or credentials.get("access_token", "") - if not token: - raise ValueError( - "GitHub credentials must include 'token' or 'access_token'" - ) use_full_refresh = ( sync_mode == SyncMode.FULL_REFRESH @@ -174,27 +186,27 @@ async def extract( or _COMMIT_SHA_KEY not in checkpoint.data ) - client = self._http_client or httpx.AsyncClient() - headers = { - "Authorization": f"Bearer {token}", - "Accept": "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - } + client = self._http_client or httpx.AsyncClient(follow_redirects=True) + headers = self._github_headers(token) try: - # Step 1: Resolve branch to current HEAD commit SHA head_sha = await self._get_branch_head_sha( client, headers, owner, repo, branch ) - # Step 2: Determine which files to fetch if use_full_refresh: - files_to_fetch = await self._get_all_tree_blobs( - client, headers, owner, repo, head_sha + changeset_entries, content_blobs, branch_file_count = ( + await self._extract_full_refresh_via_tarball( + client, + headers, + owner, + repo, + branch, + head_sha, + ) ) - branch_file_count = len(files_to_fetch) else: - assert checkpoint is not None # narrowed above + assert checkpoint is not None base_sha = checkpoint.data[_COMMIT_SHA_KEY] files_to_fetch = await self._get_changed_files( client, headers, owner, repo, base_sha, head_sha @@ -202,11 +214,9 @@ async def extract( branch_file_count = await self._count_tree_blobs( client, headers, owner, repo, head_sha ) - - # Step 3: Fetch content for each file - changeset_entries, content_blobs = await self._fetch_file_contents( - client, headers, owner, repo, files_to_fetch - ) + changeset_entries, content_blobs = await self._fetch_file_contents( + client, headers, owner, repo, files_to_fetch + ) finally: # Only close the client if we created it ourselves @@ -253,9 +263,7 @@ async def _get_branch_head_sha( httpx.HTTPStatusError: If the GitHub API returns a non-2xx status. """ url = f"{_GITHUB_API_BASE}/repos/{owner}/{repo}/branches/{branch}" - response = await client.get(url, headers=headers) - response.raise_for_status() - data: dict[str, Any] = response.json() + data = await self._get_json_with_auth_fallback(client, url, headers=headers) return str(data["commit"]["sha"]) async def _get_all_tree_blobs( @@ -284,9 +292,7 @@ async def _get_all_tree_blobs( url = ( f"{_GITHUB_API_BASE}/repos/{owner}/{repo}/git/trees/{tree_sha}?recursive=1" ) - response = await client.get(url, headers=headers) - response.raise_for_status() - tree_data: dict[str, Any] = response.json() + tree_data = await self._get_json_with_auth_fallback(client, url, headers=headers) result: list[dict[str, Any]] = [] for item in tree_data.get("tree", []): @@ -314,9 +320,7 @@ async def _count_tree_blobs( url = ( f"{_GITHUB_API_BASE}/repos/{owner}/{repo}/git/trees/{tree_sha}?recursive=1" ) - response = await client.get(url, headers=headers) - response.raise_for_status() - tree_data: dict[str, Any] = response.json() + tree_data = await self._get_json_with_auth_fallback(client, url, headers=headers) return sum( 1 for item in tree_data.get("tree", []) if item.get("type") == "blob" ) @@ -349,9 +353,7 @@ async def _get_changed_files( ``previous_path`` keys. """ url = f"{_GITHUB_API_BASE}/repos/{owner}/{repo}/compare/{base_sha}...{head_sha}" - response = await client.get(url, headers=headers) - response.raise_for_status() - compare_data: dict[str, Any] = response.json() + compare_data = await self._get_json_with_auth_fallback(client, url, headers=headers) result: list[dict[str, Any]] = [] for file_info in compare_data.get("files", []): @@ -378,6 +380,138 @@ async def _get_changed_files( ) return result + async def _extract_full_refresh_via_tarball( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + owner: str, + repo: str, + branch: str, + head_sha: str, + ) -> tuple[list[ChangesetEntry], dict[str, bytes], int]: + """Download repository tarball and build ADD changeset entries.""" + url = f"{_GITHUB_API_BASE}/repos/{owner}/{repo}/tarball/{branch}" + archive_bytes = await self._get_bytes_with_auth_fallback( + client, + url, + headers=headers, + ) + try: + branch_file_count = await self._count_tree_blobs( + client, headers, owner, repo, head_sha + ) + except httpx.HTTPStatusError: + # Tarball extraction already succeeded; tree count is metadata only. + branch_file_count = 0 + return self._changeset_from_tarball(archive_bytes, branch_file_count=branch_file_count) + + @staticmethod + def _changeset_from_tarball( + archive_bytes: bytes, + *, + branch_file_count: int, + ) -> tuple[list[ChangesetEntry], dict[str, bytes], int]: + changeset_entries: list[ChangesetEntry] = [] + content_blobs: dict[str, bytes] = {} + file_count = 0 + + with tarfile.open(fileobj=io.BytesIO(archive_bytes), mode="r:gz") as archive: + members = [member for member in archive.getmembers() if member.isfile()] + if not members: + return [], {}, branch_file_count + + root_prefix = members[0].name.split("/", 1)[0] + "/" + for member in members: + if not member.name.startswith(root_prefix): + continue + relative_path = member.name[len(root_prefix) :] + if not relative_path or relative_path.endswith("/"): + continue + extracted = archive.extractfile(member) + if extracted is None: + continue + raw_bytes = extracted.read() + file_count += 1 + content_ref = ContentRef.from_bytes(raw_bytes) + content_type, _ = mimetypes.guess_type(relative_path) + if content_type is None: + content_type = "application/octet-stream" + changeset_entries.append( + ChangesetEntry( + operation=ChangeOperation.ADD, + id=content_ref.hex_digest, + type=_ENTRY_TYPE_FILE, + path=relative_path, + content_ref=content_ref, + content_type=content_type, + metadata={}, + ) + ) + content_blobs[content_ref.hex_digest] = raw_bytes + + return changeset_entries, content_blobs, branch_file_count or file_count + + @staticmethod + def _unauthenticated_headers(headers: dict[str, str]) -> dict[str, str]: + return { + key: value + for key, value in headers.items() + if key.lower() != "authorization" + } + + async def _get_with_auth_fallback( + self, + client: httpx.AsyncClient, + url: str, + *, + headers: dict[str, str], + ) -> httpx.Response: + response = await client.get(url, headers=headers) + if response.status_code == 403 and headers.get("Authorization"): + response = await client.get( + url, + headers=self._unauthenticated_headers(headers), + ) + if response.status_code >= 400: + raise httpx.HTTPStatusError( + self._github_error_detail(response), + request=response.request, + response=response, + ) + return response + + async def _get_json_with_auth_fallback( + self, + client: httpx.AsyncClient, + url: str, + *, + headers: dict[str, str], + ) -> dict[str, Any]: + response = await self._get_with_auth_fallback(client, url, headers=headers) + return response.json() + + async def _get_bytes_with_auth_fallback( + self, + client: httpx.AsyncClient, + url: str, + *, + headers: dict[str, str], + ) -> bytes: + response = await self._get_with_auth_fallback(client, url, headers=headers) + return response.content + + @staticmethod + def _github_error_detail(response: httpx.Response) -> str: + try: + payload = response.json() + except json.JSONDecodeError: + return response.text.strip() or f"HTTP {response.status_code}" + message = str(payload.get("message") or "").strip() + documentation = str(payload.get("documentation_url") or "").strip() + if message and documentation: + return f"{message} ({documentation})" + return message or f"HTTP {response.status_code}" + async def _fetch_file_contents( self, client: httpx.AsyncClient, @@ -480,9 +614,7 @@ async def _fetch_blob( ValueError: If the blob encoding is not ``base64``. """ url = f"{_GITHUB_API_BASE}/repos/{owner}/{repo}/git/blobs/{blob_sha}" - response = await client.get(url, headers=headers) - response.raise_for_status() - blob_data: dict[str, Any] = response.json() + blob_data = await self._get_json_with_auth_fallback(client, url, headers=headers) encoding: str = blob_data.get("encoding", "base64") if encoding != "base64": diff --git a/src/api/main.py b/src/api/main.py index cf91c92be..3b08db27f 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -179,6 +179,7 @@ async def _resolve_github_tracked_head_commit( headers = { "Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28", + "User-Agent": "Kartograph-GitHub-Ingestion/1.0", } token = credentials.get("token") or credentials.get("access_token") if token: diff --git a/src/api/management/domain/extraction_job_config.py b/src/api/management/domain/extraction_job_config.py index 397a005cf..3f4fceca1 100644 --- a/src/api/management/domain/extraction_job_config.py +++ b/src/api/management/domain/extraction_job_config.py @@ -8,7 +8,9 @@ from management.domain.extraction_relationship_authoring import ( + per_instance_description_property_errors, per_instance_description_relationship_errors, + per_instance_description_unknown_relationship_errors, ) class ExtractionJobSetStrategy(StrEnum): """Batching strategy for an extraction job set.""" @@ -39,6 +41,7 @@ def validation_errors( *, entity_instance_counts: dict[str, int], edge_types: list[dict[str, Any]] | None = None, + node_types: list[dict[str, Any]] | None = None, ) -> tuple[str, ...]: """Return human-readable validation errors for this job set.""" if not self.enabled: @@ -70,6 +73,23 @@ def validation_errors( entity_instance_counts=entity_instance_counts, ) ) + errors.extend( + f"{self.name}: {err}" + for err in per_instance_description_unknown_relationship_errors( + self.description, + self.entity_type, + edge_types=edge_types, + ) + ) + if self.entity_type and node_types: + errors.extend( + f"{self.name}: {err}" + for err in per_instance_description_property_errors( + self.description, + self.entity_type, + node_types=node_types, + ) + ) elif self.strategy == ExtractionJobSetStrategy.BY_FILES: if not self.file_patterns: errors.append(f"{self.name}: at least one file pattern is required for by_files.") @@ -136,6 +156,7 @@ def validation_errors( *, entity_instance_counts: dict[str, int], edge_types: list[dict[str, Any]] | None = None, + node_types: list[dict[str, Any]] | None = None, ) -> tuple[str, ...]: errors: list[str] = [] seen_names: set[str] = set() @@ -147,6 +168,7 @@ def validation_errors( job_set.validation_errors( entity_instance_counts=entity_instance_counts, edge_types=edge_types, + node_types=node_types, ) ) return tuple(errors) diff --git a/src/api/management/domain/extraction_relationship_authoring.py b/src/api/management/domain/extraction_relationship_authoring.py index f388eb672..6cd89634b 100644 --- a/src/api/management/domain/extraction_relationship_authoring.py +++ b/src/api/management/domain/extraction_relationship_authoring.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re from dataclasses import dataclass from typing import Any @@ -62,6 +63,170 @@ def edge_type_dicts_from_ontology(ontology: Any | None) -> list[dict[str, Any]]: return rows +def node_type_dicts_from_ontology(ontology: Any | None) -> list[dict[str, Any]]: + """Normalize ontology node types for property authoring helpers.""" + if ontology is None: + return [] + node_types = getattr(ontology, "node_types", None) or [] + rows: list[dict[str, Any]] = [] + for node in node_types: + rows.append( + { + "label": str(getattr(node, "label", "") or "").strip(), + "description": str(getattr(node, "description", "") or "").strip(), + "required_properties": list(getattr(node, "required_properties", None) or ()), + "optional_properties": list(getattr(node, "optional_properties", None) or ()), + } + ) + return rows + + +def properties_for_entity_type( + entity_type: str, + *, + node_types: list[dict[str, Any]], +) -> tuple[str, ...]: + """Return all schema property names declared on one entity type.""" + for node in node_types: + if str(node.get("label") or "").strip() != entity_type: + continue + required = tuple(str(name).strip() for name in node.get("required_properties") or () if str(name).strip()) + optional = tuple(str(name).strip() for name in node.get("optional_properties") or () if str(name).strip()) + return required + optional + return () + + +def entity_type_authoring_context( + entity_type: str, + *, + node_types: list[dict[str, Any]], + edge_types: list[dict[str, Any]], + entity_instance_counts: dict[str, int], +) -> dict[str, Any]: + """Schema-backed context for drafting one by_instances job set description.""" + properties = properties_for_entity_type(entity_type, node_types=node_types) + relationship_payload = relationship_authoring_payload_for_entity_type( + entity_type, + edge_types=edge_types, + entity_instance_counts=entity_instance_counts, + ) + return { + "entity_type": entity_type, + "properties": list(properties), + "relationship_authoring": relationship_payload, + } + + +_RELATIONSHIP_LINE_RE = re.compile( + r"^(?:IGNORE\s+)?(?P<entity>[^>]+?)\s*->\s*(?P<label>[^>]+?)\s*->\s*(?P<counterpart>[^:]+?)\s*:", + re.IGNORECASE, +) + + +def _parse_relationship_lines(description: str) -> list[tuple[bool, str, str, str]]: + """Return (is_ignore, entity_type, label, counterpart) tuples from description lines.""" + parsed: list[tuple[bool, str, str, str]] = [] + for raw_line in description.splitlines(): + stripped = raw_line.strip() + if "->" not in stripped or ":" not in stripped: + continue + is_ignore = stripped.upper().startswith("IGNORE ") + match = _RELATIONSHIP_LINE_RE.match(stripped) + if match is None: + continue + parsed.append( + ( + is_ignore, + match.group("entity").strip(), + match.group("label").strip(), + match.group("counterpart").strip(), + ) + ) + return parsed + + +def _valid_relationship_keys_for_entity( + entity_type: str, + *, + edge_types: list[dict[str, Any]], +) -> set[tuple[str, str, str]]: + keys: set[tuple[str, str, str]] = set() + for line in _relationship_lines_involving_entity_type(entity_type, edge_types=edge_types): + keys.add((line.entity_type, line.relationship_label, line.counterpart_type)) + return keys + + +def _property_names_from_description(description: str) -> set[str]: + names: set[str] = set() + in_properties = False + for raw_line in description.splitlines(): + stripped = raw_line.strip() + if stripped.lower().startswith("properties:"): + in_properties = True + continue + if in_properties and "->" in stripped and ":" in stripped: + in_properties = False + if not in_properties: + continue + if stripped.startswith("- "): + body = stripped[2:].strip() + if ":" in body: + names.add(body.split(":", 1)[0].strip()) + return names + + +def per_instance_description_property_errors( + description: str, + entity_type: str, + *, + node_types: list[dict[str, Any]], +) -> tuple[str, ...]: + """Validate Properties section names against ontology node type definitions.""" + if not node_types: + return () + known = set(properties_for_entity_type(entity_type, node_types=node_types)) + if not known: + return (f"{entity_type}: entity type not found in ontology.",) + + listed = _property_names_from_description(description) + errors: list[str] = [] + unknown = sorted(name for name in listed if name not in known) + for name in unknown: + errors.append( + f"{entity_type}: property '{name}' is not defined on this entity type in the ontology." + ) + missing = sorted(name for name in known if name not in listed) + for name in missing: + errors.append( + f"{entity_type}: missing property line '- {name}:' under Properties (required by schema)." + ) + return tuple(errors) + + +def per_instance_description_unknown_relationship_errors( + description: str, + entity_type: str, + *, + edge_types: list[dict[str, Any]], +) -> tuple[str, ...]: + """Reject relationship lines that do not exist in the ontology for this entity type.""" + if not edge_types: + return () + valid = _valid_relationship_keys_for_entity(entity_type, edge_types=edge_types) + errors: list[str] = [] + for is_ignore, line_entity, label, counterpart in _parse_relationship_lines(description): + if line_entity != entity_type: + continue + key = (line_entity, label, counterpart) + if key not in valid: + action = "IGNORE line" if is_ignore else "relationship line" + errors.append( + f"{entity_type}: {action} '{line_entity} -> {label} -> {counterpart}' " + "is not a relationship type in the ontology for this entity type." + ) + return tuple(errors) + + def _relationship_lines_involving_entity_type( entity_type: str, *, diff --git a/src/api/management/infrastructure/repositories/data_source_sync_run_repository.py b/src/api/management/infrastructure/repositories/data_source_sync_run_repository.py index aa234411f..cd50a56e5 100644 --- a/src/api/management/infrastructure/repositories/data_source_sync_run_repository.py +++ b/src/api/management/infrastructure/repositories/data_source_sync_run_repository.py @@ -93,8 +93,10 @@ async def get_by_id(self, sync_run_id: str) -> DataSourceSyncRun | None: return self._to_domain(model) async def find_by_data_source(self, data_source_id: str) -> list[DataSourceSyncRun]: - stmt = select(DataSourceSyncRunModel).where( - DataSourceSyncRunModel.data_source_id == data_source_id + stmt = ( + select(DataSourceSyncRunModel) + .where(DataSourceSyncRunModel.data_source_id == data_source_id) + .order_by(desc(DataSourceSyncRunModel.created_at)) ) result = await self._session.execute(stmt) models = result.scalars().all() diff --git a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py index ab05bf497..087391ffd 100644 --- a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py +++ b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py @@ -200,6 +200,44 @@ async def get_extraction_job_activity( return payload +@router.get("/knowledge-graphs/{kg_id}/extraction-jobs/archived-history") +async def get_archived_extraction_history( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> dict[str, Any]: + try: + payload = await service.get_archived_extraction_history( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge graph not found") + return payload + + +@router.get("/knowledge-graphs/{kg_id}/extraction-jobs/jobs/{job_id}/archived-mutations") +async def get_archived_job_mutations( + kg_id: str, + job_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> dict[str, Any]: + try: + payload = await service.get_archived_job_mutations( + user_id=current_user.user_id.value, + kg_id=kg_id, + job_id=job_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Archived job not found") + return payload + + @router.get("/knowledge-graphs/{kg_id}/extraction-jobs/database-status") async def get_extraction_database_status( kg_id: str, diff --git a/src/api/tests/integration/management/test_data_source_sync_run_repository.py b/src/api/tests/integration/management/test_data_source_sync_run_repository.py index c441b3b7b..8bfabc4f9 100644 --- a/src/api/tests/integration/management/test_data_source_sync_run_repository.py +++ b/src/api/tests/integration/management/test_data_source_sync_run_repository.py @@ -396,6 +396,66 @@ async def test_finds_sync_runs_by_data_source( for result in results: assert result.data_source_id == ds1.id.value + @pytest.mark.asyncio + async def test_find_by_data_source_orders_newest_first( + self, + data_source_sync_run_repository: DataSourceSyncRunRepository, + data_source_repository: DataSourceRepository, + knowledge_graph_repository: KnowledgeGraphRepository, + async_session, + test_tenant: str, + test_workspace: str, + clean_management_data, + ): + """Should return sync runs newest-first so UI can treat index 0 as latest.""" + kg = KnowledgeGraph.create( + tenant_id=test_tenant, + workspace_id=test_workspace, + name="Test KG", + description="For sync run ordering tests", + ) + async with async_session.begin(): + await knowledge_graph_repository.save(kg) + + ds = DataSource.create( + knowledge_graph_id=kg.id.value, + tenant_id=test_tenant, + name="Ordering DS", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"repo": "org/repo"}, + ) + async with async_session.begin(): + await data_source_repository.save(ds) + + oldest_id = str(ULID()) + middle_id = str(ULID()) + newest_id = str(ULID()) + timestamps = [ + datetime(2026, 1, 1, tzinfo=UTC), + datetime(2026, 1, 2, tzinfo=UTC), + datetime(2026, 1, 3, tzinfo=UTC), + ] + for run_id, created_at in zip( + [oldest_id, middle_id, newest_id], + timestamps, + strict=True, + ): + sync_run = DataSourceSyncRun( + id=run_id, + data_source_id=ds.id.value, + status="failed" if run_id == oldest_id else "ingested", + started_at=created_at, + completed_at=created_at, + error="old failure" if run_id == oldest_id else None, + created_at=created_at, + ) + async with async_session.begin(): + await data_source_sync_run_repository.save(sync_run) + + results = await data_source_sync_run_repository.find_by_data_source(ds.id.value) + + assert [run.id for run in results] == [newest_id, middle_id, oldest_id] + @pytest.mark.asyncio async def test_returns_empty_for_data_source_with_no_runs( self, diff --git a/src/api/tests/unit/extraction/application/test_archived_extraction_history.py b/src/api/tests/unit/extraction/application/test_archived_extraction_history.py new file mode 100644 index 000000000..a00684ea8 --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_archived_extraction_history.py @@ -0,0 +1,47 @@ +"""Unit tests for archived extraction history grouping.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +from extraction.application.archived_extraction_history import group_archived_jobs_by_run_and_set +from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus + + +def _job( + *, + job_id: str, + job_set: str, + run_started_at: datetime, +) -> ExtractionJobRecord: + return ExtractionJobRecord( + id="01JOB", + knowledge_graph_id="01KG", + job_id=job_id, + job_set_name=job_set, + strategy="by_instances", + status=ExtractionJobStatus.ARCHIVED, + order_index=0, + description="", + entities_modified=2, + run_started_at=run_started_at, + archived_at=run_started_at, + applied_mutations_jsonl='{"op":"UPDATE","type":"node"}', + ) + + +def test_group_archived_jobs_by_run_and_set() -> None: + run_a = datetime(2026, 6, 12, 18, 0, tzinfo=UTC) + run_b = datetime(2026, 6, 11, 12, 0, tzinfo=UTC) + grouped = group_archived_jobs_by_run_and_set( + [ + _job(job_id="job-a1", job_set="Adapter Deep Extraction", run_started_at=run_a), + _job(job_id="job-a2", job_set="Adapter Deep Extraction", run_started_at=run_a), + _job(job_id="job-b1", job_set="Resource Extraction", run_started_at=run_b), + ] + ) + + assert len(grouped) == 2 + assert grouped[0]["jobCount"] == 2 + assert grouped[0]["jobSets"][0]["jobSet"] == "Adapter Deep Extraction" + assert grouped[1]["jobCount"] == 1 diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 168a3166c..5f6bb40bb 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -65,7 +65,8 @@ async def test_extraction_mode_uses_extraction_defaults(self): assert "job_setup" in resolved.skills assert "per_instance_description_authoring" in resolved.skills assert "EntityType} ->" in resolved.skills["per_instance_description_authoring"] - assert "MORE live instances" in resolved.skills["per_instance_description_authoring"] + assert "entity_type_authoring_context" in resolved.skills["per_instance_description_authoring"] + assert "never invent relationship labels" in resolved.skills["per_instance_description_authoring"] assert "Ignore these relationships" in resolved.skills["per_instance_description_authoring"] assert "IGNORE" in resolved.skills["per_instance_description_authoring"] assert "Implementation Analysis" in resolved.skills["per_instance_description_authoring"] diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_mutation_metrics.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_mutation_metrics.py new file mode 100644 index 000000000..f8f7f32fc --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_mutation_metrics.py @@ -0,0 +1,83 @@ +"""Unit tests for graph write metrics parsed from applied mutation JSONL.""" + +from __future__ import annotations + +import json +from pathlib import Path + +from extraction.infrastructure.extraction_job_mutation_metrics import ( + metrics_from_mutation_jsonl, + metrics_from_mutation_workdir, +) + + +def test_metrics_from_mutation_jsonl_counts_instance_operations() -> None: + jsonl = "\n".join( + [ + json.dumps( + { + "op": "CREATE", + "type": "node", + "id": "adapter:abc", + "label": "Adapter", + "set_properties": {"slug": "a", "data_source_id": "ds"}, + } + ), + json.dumps( + { + "op": "UPDATE", + "type": "node", + "id": "adapter:def", + "label": "Adapter", + "set_properties": {"description": "updated"}, + } + ), + json.dumps( + { + "op": "CREATE", + "type": "edge", + "label": "deploys", + "source_id": "adapter:abc", + "target_id": "cluster:xyz", + } + ), + json.dumps( + { + "op": "DEFINE", + "type": "node", + "label": "Adapter", + } + ), + ] + ) + + metrics = metrics_from_mutation_jsonl(jsonl) + + assert metrics["entities_created"] == 1 + assert metrics["entities_modified"] == 1 + assert metrics["relationships_created"] == 1 + assert metrics["relationships_modified"] == 0 + assert metrics["write_ops"] == 3 + + +def test_metrics_from_mutation_workdir_reads_latest_jsonl(tmp_path: Path) -> None: + mutations = tmp_path / "mutations" + mutations.mkdir() + (mutations / "batch.jsonl").write_text( + json.dumps( + { + "op": "UPDATE", + "type": "edge", + "id": "edge:1", + "label": "connects", + "set_properties": {"weight": 2}, + } + ) + + "\n", + encoding="utf-8", + ) + + metrics = metrics_from_mutation_workdir(tmp_path) + + assert metrics["relationships_modified"] == 1 + assert metrics["write_ops"] == 1 diff --git a/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py b/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py index 4efa93f85..f5ea51f08 100644 --- a/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py +++ b/src/api/tests/unit/ingestion/infrastructure/adapters/test_github_adapter.py @@ -24,7 +24,9 @@ import asyncio import base64 +import io import json +import tarfile import httpx import pytest @@ -115,6 +117,21 @@ def _blob_response(content: bytes) -> dict: } +def _tarball_bytes( + files: dict[str, bytes], + *, + root: str = "myorg-myrepo-abc123", +) -> bytes: + buffer = io.BytesIO() + with tarfile.open(fileobj=buffer, mode="w:gz") as archive: + for path, content in files.items(): + payload = io.BytesIO(content) + info = tarfile.TarInfo(name=f"{root}/{path}") + info.size = len(content) + archive.addfile(info, payload) + return buffer.getvalue() + + # --------------------------------------------------------------------------- # Fake transport # --------------------------------------------------------------------------- @@ -138,6 +155,14 @@ def __init__(self, responses: dict[str, dict]) -> None: async def handle_async_request(self, request: httpx.Request) -> httpx.Response: url_path = request.url.path for path_suffix, response_data in self._responses.items(): + if path_suffix == "__tarball__": + if "/tarball/" in url_path: + payload = response_data if isinstance(response_data, bytes) else response_data["bytes"] + return httpx.Response( + status_code=200, + content=payload, + headers={"content-type": "application/x-gzip"}, + ) if url_path.endswith(path_suffix) or path_suffix in url_path: return httpx.Response( status_code=200, @@ -170,13 +195,14 @@ def full_refresh_transport() -> FakeGitHubTransport: """Transport configured for a full refresh extraction.""" return FakeGitHubTransport( { - # Branch tip "/branches/main": _branch_response(HEAD_SHA), - # Full tree f"/git/trees/{HEAD_SHA}": _tree_response(), - # Blobs - f"/git/blobs/{BLOB_SHA_README}": _blob_response(README_CONTENT), - f"/git/blobs/{BLOB_SHA_MAIN}": _blob_response(MAIN_PY_CONTENT), + "__tarball__": _tarball_bytes( + { + "README.md": README_CONTENT, + "src/main.py": MAIN_PY_CONTENT, + } + ), } ) @@ -307,29 +333,20 @@ async def test_full_refresh_fetches_content_for_each_file( async def test_full_refresh_skips_tree_entries_that_are_not_blobs( self, connection_config, credentials ): - """Directory (tree-type) entries in the tree are skipped.""" + """Tarball extraction ignores directory entries and keeps file paths.""" transport = FakeGitHubTransport( { "/branches/main": _branch_response(HEAD_SHA), f"/git/trees/{HEAD_SHA}": _tree_response( [ - { - "path": "src", - "type": "tree", # directory — must be skipped - "sha": "dir-sha", - "size": 0, - "mode": "040000", - }, { "path": "src/main.py", "type": "blob", "sha": BLOB_SHA_MAIN, - "size": len(MAIN_PY_CONTENT), - "mode": "100644", }, ] ), - f"/git/blobs/{BLOB_SHA_MAIN}": _blob_response(MAIN_PY_CONTENT), + "__tarball__": _tarball_bytes({"src/main.py": MAIN_PY_CONTENT}), } ) client = httpx.AsyncClient(transport=transport) @@ -658,6 +675,7 @@ async def test_authorization_header_sent_with_token( { "/branches/main": _branch_response(HEAD_SHA), f"/git/trees/{HEAD_SHA}": _tree_response([]), + "__tarball__": _tarball_bytes({}), } ) calls: list[str] = [] @@ -700,6 +718,12 @@ async def handle_async_request( data: dict = _branch_response(HEAD_SHA) elif "git/trees" in url_path: data = _tree_response([]) + elif "/tarball/" in url_path: + return httpx.Response( + 200, + content=_tarball_bytes({}), + headers={"content-type": "application/x-gzip"}, + ) else: raise RuntimeError(f"Unexpected: {url_path}") return httpx.Response( @@ -884,47 +908,59 @@ async def test_changeset_entry_type_is_file( assert entry.type == "io.kartograph.change.file" @pytest.mark.asyncio - async def test_full_refresh_fetches_blobs_with_parallelism( + async def test_full_refresh_downloads_repository_tarball( self, connection_config, credentials ): - """Blob fetches should run concurrently for better throughput.""" - max_in_flight = 0 - in_flight = 0 - - files = [ - { - "path": f"src/file_{i}.py", - "type": "blob", - "sha": f"blob{i:02d}" * 5, - } - for i in range(4) - ] + """Full refresh should download one archive instead of per-blob API calls.""" + tarball_requests = 0 + blob_requests = 0 - class ConcurrentBlobTransport(httpx.AsyncBaseTransport): + class TarballTransport(httpx.AsyncBaseTransport): async def handle_async_request( self, request: httpx.Request ) -> httpx.Response: - nonlocal max_in_flight, in_flight + nonlocal tarball_requests, blob_requests url_path = request.url.path if url_path.endswith("/branches/main"): data: dict = _branch_response(HEAD_SHA) - elif f"/git/trees/{HEAD_SHA}" in url_path: - data = _tree_response(files) - elif "/git/blobs/" in url_path: - in_flight += 1 - max_in_flight = max(max_in_flight, in_flight) - await asyncio.sleep(0.03) - in_flight -= 1 + return httpx.Response( + 200, + content=json.dumps(data).encode(), + headers={"content-type": "application/json"}, + ) + if f"/git/trees/{HEAD_SHA}" in url_path: + data = _tree_response( + [ + {"path": f"src/file_{i}.py", "type": "blob", "sha": f"sha{i}"} + for i in range(4) + ] + ) + return httpx.Response( + 200, + content=json.dumps(data).encode(), + headers={"content-type": "application/json"}, + ) + if "/tarball/" in url_path: + tarball_requests += 1 + payload = _tarball_bytes( + {f"src/file_{i}.py": b"print('hi')\n" for i in range(4)} + ) + return httpx.Response( + 200, + content=payload, + headers={"content-type": "application/x-gzip"}, + ) + if "/git/blobs/" in url_path: + blob_requests += 1 data = _blob_response(b"print('hi')\n") - else: - raise RuntimeError(f"Unexpected URL: {url_path}") - return httpx.Response( - 200, - content=json.dumps(data).encode(), - headers={"content-type": "application/json"}, - ) + return httpx.Response( + 200, + content=json.dumps(data).encode(), + headers={"content-type": "application/json"}, + ) + raise RuntimeError(f"Unexpected URL: {url_path}") - client = httpx.AsyncClient(transport=ConcurrentBlobTransport()) + client = httpx.AsyncClient(transport=TarballTransport()) adapter = GitHubAdapter(http_client=client) result = await adapter.extract( @@ -935,4 +971,70 @@ async def handle_async_request( ) assert len(result.changeset_entries) == 4 - assert max_in_flight >= 2 + assert tarball_requests == 1 + assert blob_requests == 0 + + @pytest.mark.asyncio + async def test_full_refresh_retries_without_auth_when_token_returns_403( + self, connection_config, credentials + ): + """Restricted PATs on public repos should fall back to unauthenticated access.""" + auth_attempts = 0 + unauth_attempts = 0 + + class ForbiddenWithTokenTransport(httpx.AsyncBaseTransport): + async def handle_async_request( + self, request: httpx.Request + ) -> httpx.Response: + nonlocal auth_attempts, unauth_attempts + url_path = request.url.path + has_auth = bool(request.headers.get("authorization")) + + if url_path.endswith("/branches/main"): + if has_auth: + auth_attempts += 1 + return httpx.Response(403, content=b'{"message":"Forbidden"}') + unauth_attempts += 1 + data = _branch_response(HEAD_SHA) + return httpx.Response( + 200, + content=json.dumps(data).encode(), + headers={"content-type": "application/json"}, + ) + if "/tarball/" in url_path: + if has_auth: + auth_attempts += 1 + return httpx.Response(403, content=b'{"message":"Forbidden"}') + unauth_attempts += 1 + payload = _tarball_bytes({"README.md": b"# hello\n"}) + return httpx.Response( + 200, + content=payload, + headers={"content-type": "application/x-gzip"}, + ) + if f"/git/trees/{HEAD_SHA}" in url_path: + if has_auth: + auth_attempts += 1 + return httpx.Response(403, content=b'{"message":"Forbidden"}') + unauth_attempts += 1 + data = _tree_response([{"path": "README.md", "type": "blob", "sha": "abc"}]) + return httpx.Response( + 200, + content=json.dumps(data).encode(), + headers={"content-type": "application/json"}, + ) + raise RuntimeError(f"Unexpected URL: {url_path}") + + client = httpx.AsyncClient(transport=ForbiddenWithTokenTransport()) + adapter = GitHubAdapter(http_client=client) + + result = await adapter.extract( + connection_config=connection_config, + credentials=credentials, + checkpoint=None, + sync_mode=SyncMode.FULL_REFRESH, + ) + + assert len(result.changeset_entries) == 1 + assert auth_attempts >= 1 + assert unauth_attempts >= 1 diff --git a/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py b/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py index e1bcf6f22..bda1c7266 100644 --- a/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py +++ b/src/api/tests/unit/management/domain/test_extraction_relationship_authoring.py @@ -83,6 +83,56 @@ def test_includes_inbound_relationship_when_target_side_has_more_instances() -> assert lines[0].counterpart_type == "Route" +def test_rejects_hallucinated_relationship_label() -> None: + edges = [ + {"label": "deploys", "source_type": "Adapter", "target_type": "Cluster"}, + ] + description = """ +Properties: +- name: from source + +Adapter -> operates_on -> Resource: invented edge +""" + from management.domain.extraction_relationship_authoring import ( + per_instance_description_unknown_relationship_errors, + ) + + errors = per_instance_description_unknown_relationship_errors( + description, + "Adapter", + edge_types=edges, + ) + + assert any("operates_on" in err and "not a relationship type" in err for err in errors) + + +def test_rejects_unknown_property_name() -> None: + node_types = [ + { + "label": "Adapter", + "required_properties": ["name", "slug"], + "optional_properties": ["description"], + } + ] + description = """ +Properties: +- name: from source +- operates_on: invented property +""" + from management.domain.extraction_relationship_authoring import ( + per_instance_description_property_errors, + ) + + errors = per_instance_description_property_errors( + description, + "Adapter", + node_types=node_types, + ) + + assert any("operates_on" in err and "not defined" in err for err in errors) + assert any("slug" in err and "missing property" in err for err in errors) + + def test_rejects_active_line_for_ignored_relationship() -> None: counts = {"Adapter": 19, "Resource": 9, "ComponentTest": 1264} edges = [ diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue b/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue new file mode 100644 index 000000000..42a278b05 --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue @@ -0,0 +1,264 @@ +<script setup lang="ts"> +import { computed, ref, watch } from 'vue' +import { Loader2, Archive, ChevronRight } from 'lucide-vue-next' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Separator } from '@/components/ui/separator' + +const props = defineProps<{ + kgId: string +}>() + +const { apiFetch } = useApiClient() + +interface ArchivedJob { + jobId: string + jobSet: string + status: string + workerId: string | null + startedAt: string | null + completedAt: string | null + archivedAt: string | null + runStartedAt: string | null + inputTokens: number + outputTokens: number + costUsd: number + entitiesCreated: number + entitiesModified: number + relationshipsCreated: number + relationshipsModified: number + writeOps: number + instanceCount: number + hasMutations: boolean +} + +interface ArchivedJobSetGroup { + jobSet: string + jobCount: number + writeOps: number + jobs: ArchivedJob[] +} + +interface ArchivedRunGroup { + runStartedAt: string | null + jobCount: number + writeOps: number + inputTokens: number + outputTokens: number + costUsd: number + jobSets: ArchivedJobSetGroup[] +} + +interface ArchivedHistoryPayload { + archivedJobCount: number + runs: ArchivedRunGroup[] +} + +const loading = ref(false) +const error = ref<string | null>(null) +const payload = ref<ArchivedHistoryPayload | null>(null) +const selectedRunIndex = ref(0) +const selectedJobSetIndex = ref(0) +const selectedJobId = ref<string | null>(null) +const mutationJsonl = ref<string | null>(null) +const mutationLoading = ref(false) + +const selectedRun = computed(() => payload.value?.runs[selectedRunIndex.value] ?? null) +const selectedJobSet = computed(() => selectedRun.value?.jobSets[selectedJobSetIndex.value] ?? null) +const selectedJob = computed( + () => selectedJobSet.value?.jobs.find((job) => job.jobId === selectedJobId.value) ?? null, +) + +async function loadHistory() { + loading.value = true + error.value = null + try { + payload.value = await apiFetch<ArchivedHistoryPayload>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs/archived-history`, + ) + selectedRunIndex.value = 0 + selectedJobSetIndex.value = 0 + selectedJobId.value = payload.value?.runs[0]?.jobSets[0]?.jobs[0]?.jobId ?? null + await loadSelectedMutations() + } catch (e: unknown) { + error.value = e instanceof Error ? e.message : 'Failed to load archived extraction history' + payload.value = null + } finally { + loading.value = false + } +} + +async function loadSelectedMutations() { + mutationJsonl.value = null + if (!selectedJobId.value) return + mutationLoading.value = true + try { + const detail = await apiFetch<{ jsonl: string }>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs/jobs/${encodeURIComponent(selectedJobId.value)}/archived-mutations`, + ) + mutationJsonl.value = detail.jsonl || '' + } catch { + mutationJsonl.value = null + } finally { + mutationLoading.value = false + } +} + +function selectRun(index: number) { + selectedRunIndex.value = index + selectedJobSetIndex.value = 0 + selectedJobId.value = payload.value?.runs[index]?.jobSets[0]?.jobs[0]?.jobId ?? null + void loadSelectedMutations() +} + +function selectJobSet(index: number) { + selectedJobSetIndex.value = index + selectedJobId.value = selectedRun.value?.jobSets[index]?.jobs[0]?.jobId ?? null + void loadSelectedMutations() +} + +function selectJob(jobId: string) { + selectedJobId.value = jobId + void loadSelectedMutations() +} + +function formatWhen(value: string | null | undefined): string { + if (!value) return 'Unknown run' + return new Date(value).toLocaleString() +} + +function formatCompactNumber(value: number): string { + return new Intl.NumberFormat(undefined, { notation: 'compact', maximumFractionDigits: 1 }).format(value) +} + +watch( + () => props.kgId, + () => { void loadHistory() }, + { immediate: true }, +) +</script> + +<template> + <Card> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base"> + <Archive class="size-4" /> + Extraction archive + </CardTitle> + <CardDescription> + Permanent history of extraction jobs that applied graph writes, grouped by run and job set. + </CardDescription> + </CardHeader> + <CardContent class="grid gap-4 xl:grid-cols-[260px_220px_minmax(0,1fr)]"> + <div v-if="loading" class="col-span-full flex items-center gap-2 text-sm text-muted-foreground"> + <Loader2 class="size-4 animate-spin" /> + Loading archived extraction jobs... + </div> + <div v-else-if="error" class="col-span-full text-sm text-destructive"> + {{ error }} + <Button class="mt-2" size="sm" variant="outline" @click="loadHistory">Retry</Button> + </div> + <div v-else-if="!payload?.runs.length" class="col-span-full text-sm text-muted-foreground"> + No archived extraction jobs yet. Jobs that apply graph writes are archived automatically. + </div> + <template v-else> + <div class="rounded border"> + <div class="flex items-center justify-between border-b px-3 py-2"> + <p class="text-xs font-medium text-muted-foreground">Runs ({{ payload.archivedJobCount }})</p> + <Button size="sm" variant="ghost" class="h-6 px-2 text-[10px]" @click="loadHistory">Refresh</Button> + </div> + <div class="max-h-80 space-y-1 overflow-auto p-2"> + <button + v-for="(run, index) in payload.runs" + :key="`${run.runStartedAt}-${index}`" + type="button" + class="w-full rounded-md border px-2 py-2 text-left text-xs transition hover:bg-muted/40" + :class="index === selectedRunIndex ? 'border-primary bg-primary/5' : 'border-transparent'" + @click="selectRun(index)" + > + <p class="font-medium">{{ formatWhen(run.runStartedAt) }}</p> + <p class="text-[10px] text-muted-foreground"> + {{ run.jobCount }} jobs · {{ run.writeOps }} writes · ${{ run.costUsd.toFixed(4) }} + </p> + </button> + </div> + </div> + + <div class="rounded border"> + <div class="border-b px-3 py-2"> + <p class="text-xs font-medium text-muted-foreground">Job sets</p> + </div> + <div class="max-h-80 space-y-1 overflow-auto p-2"> + <button + v-for="(set, index) in selectedRun?.jobSets || []" + :key="set.jobSet" + type="button" + class="flex w-full items-center justify-between rounded-md border px-2 py-2 text-left text-xs transition hover:bg-muted/40" + :class="index === selectedJobSetIndex ? 'border-primary bg-primary/5' : 'border-transparent'" + @click="selectJobSet(index)" + > + <span class="font-medium">{{ set.jobSet }}</span> + <ChevronRight class="size-3 text-muted-foreground" /> + </button> + </div> + </div> + + <div class="space-y-3"> + <div class="rounded border"> + <div class="border-b px-3 py-2"> + <p class="text-xs font-medium text-muted-foreground">Jobs in {{ selectedJobSet?.jobSet }}</p> + </div> + <div class="max-h-48 space-y-1 overflow-auto p-2"> + <button + v-for="job in selectedJobSet?.jobs || []" + :key="job.jobId" + type="button" + class="w-full rounded-md border px-2 py-2 text-left text-[11px] transition hover:bg-muted/40" + :class="job.jobId === selectedJobId ? 'border-primary bg-primary/5' : 'border-transparent'" + @click="selectJob(job.jobId)" + > + <p class="font-mono">{{ job.jobId }}</p> + <p class="text-[10px] text-muted-foreground"> + {{ job.writeOps }} writes · {{ formatCompactNumber(job.inputTokens) }}/{{ formatCompactNumber(job.outputTokens) }} tokens + </p> + </button> + </div> + </div> + + <div v-if="selectedJob" class="rounded border p-3 text-xs"> + <div class="flex flex-wrap items-center gap-2"> + <Badge variant="outline">{{ selectedJob.status }}</Badge> + <span v-if="selectedJob.workerId" class="font-mono text-muted-foreground">{{ selectedJob.workerId }}</span> + </div> + <Separator class="my-2" /> + <div class="grid gap-1 text-muted-foreground sm:grid-cols-2"> + <p>{{ selectedJob.entitiesCreated }} entities created</p> + <p>{{ selectedJob.entitiesModified }} entities modified</p> + <p>{{ selectedJob.relationshipsCreated }} relationships created</p> + <p>{{ selectedJob.relationshipsModified }} relationships modified</p> + <p class="font-medium text-foreground sm:col-span-2">{{ selectedJob.writeOps }} total write ops</p> + </div> + </div> + + <div class="rounded border"> + <div class="border-b px-3 py-2"> + <p class="text-xs font-medium text-muted-foreground">Applied mutations (JSONL)</p> + </div> + <div v-if="mutationLoading" class="flex items-center gap-2 px-3 py-4 text-xs text-muted-foreground"> + <Loader2 class="size-3.5 animate-spin" /> + Loading mutations... + </div> + <pre + v-else-if="mutationJsonl" + class="max-h-64 overflow-auto p-3 font-mono text-[10px] leading-relaxed whitespace-pre-wrap break-all" + >{{ mutationJsonl }}</pre> + <p v-else class="px-3 py-4 text-xs text-muted-foreground"> + No stored mutation JSONL for this job. + </p> + </div> + </div> + </template> + </CardContent> + </Card> +</template> diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobWatchDialog.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobWatchDialog.vue index 3699a43d2..c59bb7598 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobWatchDialog.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobWatchDialog.vue @@ -45,6 +45,7 @@ interface JobActivityPayload { entitiesCreated: number entitiesModified: number relationshipsCreated: number + relationshipsModified: number writeOps: number instanceCount: number fileCount: number @@ -230,6 +231,9 @@ function messageClass(kind: string): string { <p>{{ payload.detail.entitiesCreated }} entities created</p> <p>{{ payload.detail.entitiesModified }} entities modified</p> <p>{{ payload.detail.relationshipsCreated }} relationships created</p> + <p v-if="payload.detail.relationshipsModified"> + {{ payload.detail.relationshipsModified }} relationships modified + </p> <p class="font-medium text-foreground">{{ payload.detail.writeOps }} total write ops</p> </div> diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue index 6e832ddae..6ec6d6f79 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue @@ -203,10 +203,11 @@ const workerCount = computed(() => Math.max(1, Math.floor(Number(workers.value) const pendingJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.pending || 0)) const inProgressJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.in_progress || 0)) const completedJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.completed || 0)) +const archivedJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.archived || 0)) const failedJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.failed || 0)) const remainingJobsCount = computed(() => pendingJobsCount.value + inProgressJobsCount.value) const materializedJobsTotal = computed( - () => pendingJobsCount.value + inProgressJobsCount.value + failedJobsCount.value + completedJobsCount.value, + () => pendingJobsCount.value + inProgressJobsCount.value + failedJobsCount.value + completedJobsCount.value + archivedJobsCount.value, ) const extractionRunLive = computed(() => { if (optimisticLiveUntilMs.value && nowMs.value < optimisticLiveUntilMs.value) return true @@ -216,7 +217,7 @@ const hasRunningJobs = computed(() => inProgressJobsCount.value > 0) const extractionProgressPercent = computed(() => { const total = materializedJobsTotal.value if (total <= 0) return 0 - return Math.round(((completedJobsCount.value + failedJobsCount.value) / total) * 100) + return Math.round(((completedJobsCount.value + archivedJobsCount.value + failedJobsCount.value) / total) * 100) }) const plannedKnownTotalJobs = computed(() => { const sets = planSummary.value?.job_sets || [] @@ -730,10 +731,8 @@ onUnmounted(() => { <p class="text-xl font-semibold">{{ failedJobsCount }}</p> </div> <div class="rounded-lg border p-3 text-center"> - <p class="text-xs text-muted-foreground">Stale candidates</p> - <p class="text-xl font-semibold"> - {{ extractionRunLive ? 0 : inProgressJobsCount }} - </p> + <p class="text-xs text-muted-foreground">Archived</p> + <p class="text-xl font-semibold">{{ archivedJobsCount }}</p> </div> </div> diff --git a/src/dev-ui/app/pages/data-sources/index.vue b/src/dev-ui/app/pages/data-sources/index.vue index 6aae7709c..ad878e693 100644 --- a/src/dev-ui/app/pages/data-sources/index.vue +++ b/src/dev-ui/app/pages/data-sources/index.vue @@ -48,6 +48,7 @@ import { SelectValue, } from '@/components/ui/select' import SyncPhaseIndicator from '@/components/graph/SyncPhaseIndicator.vue' +import { latestSyncRun, sortSyncRunsByRecent } from '@/utils/kgDataSourcesSync' import { Card, CardContent, CardHeader, CardTitle, CardDescription } from '@/components/ui/card' import { CopyableText } from '@/components/ui/copyable-text' import { @@ -727,7 +728,7 @@ async function loadDataSources() { */ const hasActiveSyncs = computed(() => dataSources.value.some((ds) => { - const latestStatus = ds.sync_runs?.[0]?.status + const latestStatus = latestSyncRun(ds.sync_runs)?.status return latestStatus !== undefined && ACTIVE_STATUSES.includes(latestStatus) }), ) @@ -1210,8 +1211,8 @@ async function handleDeleteDs() { </div> <div class="flex items-center gap-2"> <SyncPhaseIndicator - v-if="ds.sync_runs?.[0]" - :status="ds.sync_runs[0].status" + v-if="latestSyncRun(ds.sync_runs)" + :status="latestSyncRun(ds.sync_runs)!.status" /> <Badge v-else variant="secondary" class="text-[10px]">Idle</Badge> <!-- Edit Ontology button (FAIL 2) --> @@ -1343,7 +1344,7 @@ async function handleDeleteDs() { <div v-if="ds.sync_runs && ds.sync_runs.length > 0" class="border-t px-4 py-3"> <p class="text-[11px] font-semibold uppercase tracking-wider text-muted-foreground mb-2">Sync History</p> <div class="space-y-1"> - <div v-for="run in ds.sync_runs" :key="run.id" class="flex items-center gap-2 text-xs text-muted-foreground"> + <div v-for="run in sortSyncRunsByRecent(ds.sync_runs)" :key="run.id" class="flex items-center gap-2 text-xs text-muted-foreground"> <SyncPhaseIndicator :status="run.status" /> <span>{{ new Date(run.started_at).toLocaleString() }}</span> <span v-if="run.completed_at"> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue index 162f0afb5..55b4028bf 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue @@ -27,6 +27,7 @@ import { isActiveSyncStatus, isSyncTerminal, latestSyncRun, + sortSyncRunsByRecent, type SyncRunStatus, } from '@/utils/kgDataSourcesSync' import { @@ -397,7 +398,7 @@ async function prepareAllDataSources() { preparingAll.value = true try { - await Promise.allSettled( + const results = await Promise.allSettled( queue.map((ds) => apiFetch(`/management/data-sources/${ds.id}/sync`, { method: 'POST', @@ -405,9 +406,25 @@ async function prepareAllDataSources() { }), ), ) - toast.success(`Preparing ${queue.length} data source${queue.length === 1 ? '' : 's'}`) + const failures = results.filter((result) => result.status === 'rejected') await loadDataSources() if (hasAnyActiveSync(dataSources.value)) startPolling() + + if (failures.length === queue.length) { + const reason = failures[0]?.status === 'rejected' + ? (failures[0].reason instanceof Error ? failures[0].reason.message : 'Request failed') + : 'Request failed' + toast.error('Failed to start preparation', { description: reason }) + return + } + if (failures.length > 0) { + toast.warning( + `Started ${queue.length - failures.length} of ${queue.length} preparations`, + { description: 'Some sources could not be queued. Check permissions and retry.' }, + ) + return + } + toast.success(`Preparing ${queue.length} data source${queue.length === 1 ? '' : 's'}`) } catch { toast.error('Failed to start preparation') } finally { @@ -415,6 +432,10 @@ async function prepareAllDataSources() { } } +function recentSyncRuns(ds: DataSourceItem): SyncRun[] { + return sortSyncRunsByRecent(ds.sync_runs).slice(0, 2) +} + // Edit config sheet const editConfigOpen = ref(false) const editConfigDs = ref<DataSourceItem | null>(null) @@ -853,7 +874,7 @@ watch(tenantVersion, async () => { <div v-if="ds.sync_runs?.length" class="mt-2 space-y-1"> <div - v-for="run in ds.sync_runs.slice(0, 2)" + v-for="run in recentSyncRuns(ds)" :key="run.id" class="flex items-center gap-2 text-[10px] text-muted-foreground" > diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 07c01b480..e5988b0ec 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -47,6 +47,7 @@ import SharedConversationPanel from '@/components/extraction/SharedConversationP import GraphDesignEntitiesPanel from '@/components/graph-management/GraphDesignEntitiesPanel.vue' import GraphDesignRelationshipsPanel from '@/components/graph-management/GraphDesignRelationshipsPanel.vue' import GraphExtractionJobsWorkspace from '@/components/graph-management/GraphExtractionJobsWorkspace.vue' +import GraphExtractionArchivedHistory from '@/components/graph-management/GraphExtractionArchivedHistory.vue' import { GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS, GRAPH_MANAGEMENT_MODE_LABELS, @@ -1675,192 +1676,7 @@ watch( </section> <section v-else-if="activeStep === 'mutation-logs'" class="space-y-4"> - <div - v-if="mutationLogsSectionState.phase === 'forbidden'" - class="rounded-lg border border-destructive/40 bg-destructive/5 p-4 text-sm" - role="alert" - > - <p class="font-medium text-destructive">{{ mutationLogsSectionState.title }}</p> - <p class="mt-1 text-muted-foreground">{{ mutationLogsSectionState.message }}</p> - </div> - <div - v-else-if="mutationLogsSectionState.phase === 'error'" - class="rounded-lg border border-dashed p-4 text-sm" - role="alert" - > - <p class="font-medium">{{ mutationLogsSectionState.title }}</p> - <p class="mt-1 text-muted-foreground">{{ mutationLogsSectionState.message }}</p> - <Button class="mt-3" size="sm" variant="outline" @click="loadMutationLogRuns"> - Retry mutation log load - </Button> - </div> - <Card v-else> - <CardHeader> - <CardTitle class="text-base">MutationLogs</CardTitle> - <CardDescription> - Knowledge-graph scoped mutation runs with per-entry operation previews and run metrics. - </CardDescription> - </CardHeader> - <CardContent class="grid gap-3 xl:grid-cols-[280px_1fr]"> - <div class="rounded border"> - <div class="flex items-center justify-between border-b px-3 py-2"> - <p class="text-xs font-medium text-muted-foreground">Runs</p> - <Button size="sm" variant="ghost" class="h-6 px-2 text-[10px]" @click="loadMutationLogRuns"> - Refresh - </Button> - </div> - <div v-if="mutationLogLoading" class="flex items-center gap-2 px-3 py-4 text-xs text-muted-foreground" role="status"> - <Loader2 class="size-3.5 animate-spin" /> - {{ mutationLogsSectionState.message }} - </div> - <div - v-else-if="mutationLogRuns.length === 0" - class="space-y-2 px-3 py-4 text-xs text-muted-foreground" - > - <p>{{ mutationLogsSectionState.message }}</p> - <Button size="sm" variant="outline" @click="loadMutationLogRuns"> - {{ mutationLogsSectionState.actionLabel ?? 'Refresh runs' }} - </Button> - </div> - <div v-else class="max-h-64 overflow-auto p-2 space-y-1.5"> - <button - v-for="run in mutationLogRuns" - :key="run.id" - type="button" - tabindex="0" - class="w-full rounded border px-2 py-1.5 text-left text-xs transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" - :class="selectedMutationLogRunId === run.id ? 'border-primary bg-primary/5' : 'hover:bg-muted/40'" - @click="selectMutationLogRun(run.id)" - @keydown="onMutationRunKeydown($event, run.id)" - > - <p class="font-medium truncate">{{ run.data_source_name }}</p> - <p class="text-muted-foreground truncate">{{ new Date(run.started_at).toLocaleString() }}</p> - <div class="mt-1 flex items-center justify-between"> - <Badge variant="outline" class="text-[10px]">{{ run.status }}</Badge> - <span class="font-mono text-[10px] text-muted-foreground">{{ run.mutation_log_id }}</span> - </div> - </button> - </div> - </div> - - <div v-if="selectedMutationLogRun" class="space-y-3 rounded border p-3"> - <p class="text-xs font-medium text-muted-foreground">Run summary</p> - <div class="flex flex-wrap items-center gap-2"> - <Badge>{{ selectedMutationLogRun.status }}</Badge> - <p class="text-xs text-muted-foreground"> - Data source: - <span class="font-medium text-foreground">{{ selectedMutationLogRun.data_source_name }}</span> - </p> - </div> - <div class="grid gap-2 sm:grid-cols-2"> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground">MutationLog</p> - <p class="mt-1 font-mono break-all">{{ selectedMutationLogRun.mutation_log_id }}</p> - </div> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground">Session</p> - <p class="mt-1 font-mono break-all">{{ selectedMutationLogRun.session_id ?? 'None' }}</p> - </div> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground">Started</p> - <p class="mt-1">{{ new Date(selectedMutationLogRun.started_at).toLocaleString() }}</p> - </div> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground">Completed</p> - <p class="mt-1"> - {{ selectedMutationLogRun.completed_at ? new Date(selectedMutationLogRun.completed_at).toLocaleString() : 'In progress' }} - </p> - </div> - </div> - <div class="grid gap-2 sm:grid-cols-2"> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground flex items-center gap-1.5"> - <Coins class="size-3.5" /> - Token usage - </p> - <p class="mt-1 font-medium">{{ (selectedMutationLogRun.token_usage_total ?? 0).toLocaleString() }}</p> - </div> - <div class="rounded border px-3 py-2 text-xs"> - <p class="text-muted-foreground flex items-center gap-1.5"> - <DollarSign class="size-3.5" /> - Cost (USD) - </p> - <p class="mt-1 font-medium">${{ (selectedMutationLogRun.cost_total_usd ?? 0).toFixed(2) }}</p> - </div> - </div> - <div class="rounded border p-3"> - <p class="mb-2 text-xs font-medium text-muted-foreground">Operation class counts</p> - <div v-if="Object.keys(selectedMutationLogRun.operation_counts).length === 0" class="text-xs text-muted-foreground"> - No operation class counts recorded for this run. - </div> - <div v-else class="space-y-1.5"> - <div - v-for="([opClass, count]) in Object.entries(selectedMutationLogRun.operation_counts)" - :key="opClass" - class="flex items-center justify-between rounded border px-2 py-1.5 text-xs" - > - <span class="font-mono">{{ opClass }}</span> - <Badge variant="secondary">{{ count }}</Badge> - </div> - </div> - </div> - <div class="rounded border p-3"> - <div class="mb-2 flex items-center justify-between gap-2"> - <p class="text-xs font-medium text-muted-foreground">Per-entry operation previews</p> - <div - v-if="hasMutationLogEntryPreviewPage(mutationLogEntryPreviewPage)" - class="flex items-center gap-1" - > - <Button - size="sm" - variant="ghost" - class="h-6 px-2 text-[10px]" - :disabled="mutationLogEntryPreviewLoading || mutationLogEntryPreviewOffset === 0" - @click="loadMutationLogEntryPreviews(mutationLogEntryPreviewOffset - MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE)" - > - Previous - </Button> - <Button - size="sm" - variant="ghost" - class="h-6 px-2 text-[10px]" - :disabled="mutationLogEntryPreviewLoading || (mutationLogEntryPreviewPage?.offset ?? 0) + (mutationLogEntryPreviewPage?.entries.length ?? 0) >= (mutationLogEntryPreviewPage?.total ?? 0)" - @click="loadMutationLogEntryPreviews(mutationLogEntryPreviewOffset + MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE)" - > - Next - </Button> - </div> - </div> - <div v-if="mutationLogEntryPreviewLoading" class="flex items-center gap-2 text-xs text-muted-foreground"> - <Loader2 class="size-3.5 animate-spin" /> - Loading entry previews... - </div> - <div - v-else-if="!hasMutationLogEntryPreviewPage(mutationLogEntryPreviewPage)" - class="rounded border border-dashed px-3 py-4 text-xs text-muted-foreground" - > - {{ MUTATION_LOG_NO_PREVIEW_MESSAGE }} - </div> - <div v-else class="space-y-1.5"> - <div - v-for="entry in mutationLogEntryPreviewPage?.entries ?? []" - :key="`${entry.line_number}-${entry.operation_class}`" - class="rounded border px-2 py-1.5 text-xs" - > - <div class="flex items-center justify-between gap-2"> - <span class="font-mono">{{ entry.operation_class }}</span> - <span class="text-[10px] text-muted-foreground">Line {{ entry.line_number }}</span> - </div> - <p class="mt-1 text-muted-foreground">{{ entry.summary }}</p> - </div> - </div> - </div> - </div> - <div v-else class="rounded border border-dashed p-6 text-sm text-muted-foreground"> - Select a mutation run to view summary and per-entry previews. - </div> - </CardContent> - </Card> + <GraphExtractionArchivedHistory :kg-id="kgId" /> </section> <section v-else-if="activeStep === 'graph-management'" class="space-y-4"> diff --git a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts index 1254800df..cef5230da 100644 --- a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts +++ b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts @@ -12,6 +12,7 @@ import { shortCommitHash, unpulledCommitStatusLabel, } from '@/utils/kgDataSourcesCommits' +import { latestSyncRun, sortSyncRunsByRecent } from '@/utils/kgDataSourcesSync' const phase1Vue = readFileSync( resolve(__dirname, '../pages/knowledge-graphs/[kgId]/data-sources/index.vue'), @@ -110,3 +111,28 @@ describe('kgDataSourcesCommits helpers', () => { expect(isIngestionPreparedAtHead({ tracked_branch_head_commit: 'abc', clone_head_commit: 'abc' })).toBe(true) }) }) + +describe('sync run ordering helpers', () => { + it('treats the most recent started_at run as latest', () => { + const runs = [ + { + id: 'old-failed', + status: 'failed' as const, + error: '403', + started_at: '2026-06-12T19:02:50.000Z', + }, + { + id: 'new-ingested', + status: 'ingested' as const, + error: null, + started_at: '2026-06-12T19:10:49.000Z', + }, + ] + + expect(latestSyncRun(runs)?.id).toBe('new-ingested') + expect(sortSyncRunsByRecent(runs).map((run) => run.id)).toEqual([ + 'new-ingested', + 'old-failed', + ]) + }) +}) diff --git a/src/dev-ui/app/utils/kgDataSourcesSync.ts b/src/dev-ui/app/utils/kgDataSourcesSync.ts index b56757788..c5b5d91ae 100644 --- a/src/dev-ui/app/utils/kgDataSourcesSync.ts +++ b/src/dev-ui/app/utils/kgDataSourcesSync.ts @@ -27,12 +27,24 @@ export interface SyncRunSummary { id: string status: SyncRunStatus error: string | null + started_at?: string token_usage_total?: number | null cost_total_usd?: number | null } +export function sortSyncRunsByRecent<T extends SyncRunSummary>( + runs: T[] | undefined, +): T[] { + if (!runs?.length) return [] + return [...runs].sort((left, right) => { + const leftTime = left.started_at ? Date.parse(left.started_at) : 0 + const rightTime = right.started_at ? Date.parse(right.started_at) : 0 + return rightTime - leftTime + }) +} + export function latestSyncRun<T extends SyncRunSummary>(runs: T[] | undefined): T | undefined { - return runs?.[0] + return sortSyncRunsByRecent(runs)[0] } export function hasAnyActiveSync<T extends { sync_runs?: SyncRunSummary[] }>( diff --git a/src/dev-ui/app/utils/kgManageState.ts b/src/dev-ui/app/utils/kgManageState.ts index 2d567463d..c27d7fd12 100644 --- a/src/dev-ui/app/utils/kgManageState.ts +++ b/src/dev-ui/app/utils/kgManageState.ts @@ -31,10 +31,10 @@ export const SECTION_STATE_MESSAGES: Record< forbidden: 'You do not have permission to manage this knowledge graph.', }, 'mutation-logs': { - loading: 'Loading mutation log runs for this knowledge graph…', - empty: 'No mutation log runs recorded for this knowledge graph yet.', - error: 'Could not load mutation log runs for this knowledge graph.', - forbidden: 'You do not have permission to view mutation logs for this graph.', + loading: 'Loading archived extraction history…', + empty: 'No archived extraction jobs with graph writes yet.', + error: 'Could not load archived extraction history.', + forbidden: 'You do not have permission to view extraction archive for this graph.', }, 'data-sources': { loading: 'Loading data source readiness for this knowledge graph…', diff --git a/src/dev-ui/app/utils/kgManageWorkspace.ts b/src/dev-ui/app/utils/kgManageWorkspace.ts index 6e182d51a..4964bba9e 100644 --- a/src/dev-ui/app/utils/kgManageWorkspace.ts +++ b/src/dev-ui/app/utils/kgManageWorkspace.ts @@ -16,7 +16,7 @@ export type StepActionLabel = 'Open' | 'Revisit' | 'Run' export const WORKSPACE_STEP_TITLES: Record<WorkspaceStepId, string> = { 'data-sources': 'Data Sources', 'graph-management': 'Graph Management', - 'mutation-logs': 'MutationLogs', + 'mutation-logs': 'Extraction Archive', maintain: 'Maintain', } From 710fc5511d4779a68dec305441088dc23350bb90 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sat, 13 Jun 2026 00:48:36 -0400 Subject: [PATCH 128/153] feat(dev): add backup/restore and harden GMA prepopulation against graph failures Add one-command dev DB backup and restore, auto-repair corrupt tenant AGE graphs, return HTTP 503 for graph storage errors, and update GMA instructions to smoke-test prepopulation and stop on infrastructure failures. Co-authored-by: Cursor <cursoragent@cursor.com> --- .gitignore | 1 + AGENTS.md | 23 +- Makefile | 13 + scripts/dev-data-backup.sh | 344 ++++++++++++++++++ .../kartograph_agent_runtime/agent_prompt.py | 9 + src/agent-runtime/tests/test_agent_prompt.py | 3 + .../application/schema_authoring_guide.py | 39 +- .../application/skill_resolution_service.py | 27 +- .../PREPOPULATION_WORKFLOW.md | 10 + .../presentation/workload_routes.py | 106 ++++-- .../infrastructure/tenant_graph_handler.py | 77 +++- .../graph_mutation_writer.py | 2 + .../extraction_workload/graph_reader.py | 70 ++-- .../extraction_workload/workload_errors.py | 36 ++ .../test_schema_authoring_guide.py | 4 + .../test_skill_resolution_service.py | 3 + .../presentation/test_workload_routes.py | 39 ++ .../test_tenant_graph_handler.py | 31 +- .../test_workload_errors.py | 34 ++ 19 files changed, 758 insertions(+), 113 deletions(-) create mode 100755 scripts/dev-data-backup.sh create mode 100644 src/api/infrastructure/extraction_workload/workload_errors.py create mode 100644 src/api/tests/unit/infrastructure/extraction_workload/test_workload_errors.py diff --git a/.gitignore b/.gitignore index 629979d53..f3bc136db 100644 --- a/.gitignore +++ b/.gitignore @@ -173,6 +173,7 @@ src/dev-ui/.output/ certs/ .instances/ +.kartograph/backups/ # Demo web interface (not for production) demo-web/ diff --git a/AGENTS.md b/AGENTS.md index 38e3da92d..1a72bfd7e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -127,9 +127,30 @@ must be triggered explicitly. **Standard (single developer):** ```bash make dev # starts all services: Postgres, SpiceDB, Keycloak, API, Dev UI -make down # tears everything down +make down # stops containers; Postgres data volume is preserved ``` +**Dev data backup / restore** (knowledge graphs, ontology, graph data, IAM, SpiceDB): + +```bash +make dev-backup # snapshot DBs to .kartograph/backups/<timestamp>/ +make dev-backup-list # list available snapshots +make dev-restore # restore the latest snapshot (prompts for confirmation) +make dev-restore BACKUP=2026-06-12T20-10-33Z +make dev-repair-age-graphs # fix corrupt AGE graphs without full DB restore +``` + +For isolated instances, set the compose project name: + +```bash +COMPOSE_PROJECT=kg-my-feature ./scripts/dev-data-backup.sh backup +COMPOSE_PROJECT=kg-my-feature ./scripts/dev-data-backup.sh restore latest --yes +``` + +Avoid `docker compose down -v` unless you intend to wipe volumes. After a +restore, if the dev UI shows an empty tenant, delete `~/.kartograph/token.json` +and sign in again. + **Isolated instance (agents / worktrees):** When working in a worktree or running multiple instances in parallel, diff --git a/Makefile b/Makefile index 02fe7f065..915a581b0 100755 --- a/Makefile +++ b/Makefile @@ -41,6 +41,19 @@ down: -@docker ps -aq --filter name=kartograph-worker- | xargs -r docker rm -f -@docker ps -aq --filter name=kartograph-extract- | xargs -r docker rm -f +.PHONY: dev-backup dev-restore dev-backup-list dev-repair-age-graphs +dev-backup: + @./scripts/dev-data-backup.sh backup + +dev-restore: + @./scripts/dev-data-backup.sh restore $(or $(BACKUP),latest) + +dev-backup-list: + @./scripts/dev-data-backup.sh list + +dev-repair-age-graphs: + @./scripts/dev-data-backup.sh repair + .PHONY: run run: diff --git a/scripts/dev-data-backup.sh b/scripts/dev-data-backup.sh new file mode 100755 index 000000000..cb66d8b4e --- /dev/null +++ b/scripts/dev-data-backup.sh @@ -0,0 +1,344 @@ +#!/usr/bin/env bash +# +# Backup and restore Kartograph development databases (PostgreSQL). +# +# Captures the kartograph application database (metadata, ontology, AGE graph, +# IAM, outbox, etc.) and the spicedb authorization database. Optionally archives +# prepared JobPackage files from the host work dir. +# +# Usage: +# ./scripts/dev-data-backup.sh backup [--project <compose-project>] +# ./scripts/dev-data-backup.sh restore [--project <compose-project>] [backup-id|latest] [--yes] +# ./scripts/dev-data-backup.sh list +# +# Makefile shortcuts: make dev-backup, make dev-restore, make dev-backup-list +# +# Default compose project is "kartograph" (standard `make dev`). Isolated instances +# use project names like "kg-my-feature" from dev-instance.sh: +# COMPOSE_PROJECT=kg-my-feature ./scripts/dev-data-backup.sh backup + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +BACKUP_ROOT="$REPO_ROOT/.kartograph/backups" + +COMPOSE_PROJECT="${COMPOSE_PROJECT:-kartograph}" +COMPOSE_FILES=(-f "$REPO_ROOT/compose.yaml" -f "$REPO_ROOT/compose.dev.yaml") +AUTO_CONFIRM=false +BACKUP_ID="" + +# shellcheck disable=SC1091 +source "$REPO_ROOT/env/postgres.env" + +POSTGRES_USER="${POSTGRES_USER:-kartograph}" +POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-kartograph_dev_password}" +APP_DATABASE="${POSTGRES_DB:-kartograph}" +AUTH_DATABASE="spicedb" + +usage() { + cat <<'EOF' +Usage: + dev-data-backup.sh backup [--project <compose-project>] + dev-data-backup.sh restore [--project <compose-project>] [backup-id|latest] [--yes] + dev-data-backup.sh repair [--project <compose-project>] + dev-data-backup.sh list + +Environment: + COMPOSE_PROJECT Docker Compose project name (default: kartograph) + +Examples: + make dev-backup + make dev-restore + ./scripts/dev-data-backup.sh restore 2026-06-12T19-30-00Z + COMPOSE_PROJECT=kg-kartograph ./scripts/dev-data-backup.sh backup +EOF +} + +log() { + printf '%s\n' "$*" +} + +die() { + printf 'ERROR: %s\n' "$*" >&2 + exit 1 +} + +compose() { + docker compose -p "$COMPOSE_PROJECT" "${COMPOSE_FILES[@]}" "$@" +} + +postgres_container_id() { + local container_id + container_id="$(compose ps -q postgres 2>/dev/null | head -n 1 || true)" + if [[ -z "$container_id" ]]; then + die "Postgres container not found for compose project '$COMPOSE_PROJECT'. Is 'make dev' running?" + fi + printf '%s' "$container_id" +} + +postgres_exec() { + local container_id + container_id="$(postgres_container_id)" + docker exec -e PGPASSWORD="$POSTGRES_PASSWORD" "$container_id" "$@" +} + +git_commit_short() { + if git -C "$REPO_ROOT" rev-parse --is-inside-work-tree >/dev/null 2>&1; then + git -C "$REPO_ROOT" rev-parse --short HEAD 2>/dev/null || echo "unknown" + else + echo "unknown" + fi +} + +timestamp_utc() { + date -u +"%Y-%m-%dT%H-%M-%SZ" +} + +stop_dependent_services() { + log "Stopping API and SpiceDB to release database connections..." + compose stop api spicedb >/dev/null 2>&1 || true +} + +start_dependent_services() { + log "Starting API and SpiceDB..." + compose start spicedb api >/dev/null 2>&1 || compose up -d spicedb api +} + +terminate_db_connections() { + local database="$1" + postgres_exec psql -U "$POSTGRES_USER" -d postgres -v ON_ERROR_STOP=1 -c \ + "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${database}' AND pid <> pg_backend_pid();" \ + >/dev/null +} + +dump_database() { + local database="$1" + local output_path="$2" + log " Dumping database '${database}'..." + postgres_exec pg_dump -U "$POSTGRES_USER" -d "$database" -Fc --no-owner --no-acl \ + >"$output_path" +} + +restore_database() { + local database="$1" + local dump_path="$2" + local container_id + container_id="$(postgres_container_id)" + log " Restoring database '${database}'..." + terminate_db_connections "$database" + # Stream the custom-format dump into the container (shell redirect via function breaks stdin). + docker exec -i -e PGPASSWORD="$POSTGRES_PASSWORD" "$container_id" \ + pg_restore -U "$POSTGRES_USER" -d "$database" --clean --if-exists --no-owner --no-acl \ + <"$dump_path" +} + +maybe_backup_job_packages() { + local backup_dir="$1" + local source_dir="/tmp/kartograph/job_packages" + if [[ -d "$source_dir" ]] && [[ -n "$(ls -A "$source_dir" 2>/dev/null || true)" ]]; then + log " Archiving prepared JobPackages from ${source_dir}..." + tar -C "$(dirname "$source_dir")" -czf "$backup_dir/job_packages.tar.gz" "$(basename "$source_dir")" + fi +} + +maybe_restore_job_packages() { + local backup_dir="$1" + local archive="$backup_dir/job_packages.tar.gz" + if [[ -f "$archive" ]]; then + log " Restoring prepared JobPackages to /tmp/kartograph/..." + mkdir -p /tmp + tar -C /tmp -xzf "$archive" + fi +} + +write_manifest() { + local backup_dir="$1" + local created_at + created_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" + cat >"$backup_dir/manifest.json" <<EOF +{ + "created_at": "${created_at}", + "compose_project": "${COMPOSE_PROJECT}", + "git_commit": "$(git_commit_short)", + "databases": ["${APP_DATABASE}", "${AUTH_DATABASE}"], + "files": { + "kartograph_dump": "kartograph.dump", + "spicedb_dump": "spicedb.dump", + "job_packages_archive": "job_packages.tar.gz" + } +} +EOF +} + +cmd_backup() { + local backup_id backup_dir + backup_id="$(timestamp_utc)" + backup_dir="$BACKUP_ROOT/$backup_id" + mkdir -p "$backup_dir" + + log "Creating dev backup '${backup_id}' (project: ${COMPOSE_PROJECT})..." + dump_database "$APP_DATABASE" "$backup_dir/kartograph.dump" + dump_database "$AUTH_DATABASE" "$backup_dir/spicedb.dump" + maybe_backup_job_packages "$backup_dir" + write_manifest "$backup_dir" + + ln -sfn "$backup_id" "$BACKUP_ROOT/latest" + log "Backup complete: ${backup_dir}" + log "Latest symlink: ${BACKUP_ROOT}/latest" +} + +resolve_backup_dir() { + local requested="${1:-latest}" + local backup_dir="" + if [[ "$requested" == "latest" ]]; then + if [[ -L "$BACKUP_ROOT/latest" ]]; then + backup_dir="$(readlink -f "$BACKUP_ROOT/latest")" + elif [[ -d "$BACKUP_ROOT/latest" ]]; then + backup_dir="$BACKUP_ROOT/latest" + else + backup_dir="$(find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | sort | tail -n 1)" + fi + elif [[ -d "$BACKUP_ROOT/$requested" ]]; then + backup_dir="$BACKUP_ROOT/$requested" + elif [[ -d "$requested" ]]; then + backup_dir="$requested" + fi + + if [[ -z "$backup_dir" || ! -d "$backup_dir" ]]; then + die "Backup not found: ${requested}. Run 'make dev-backup-list' to see available backups." + fi + if [[ ! -f "$backup_dir/kartograph.dump" || ! -f "$backup_dir/spicedb.dump" ]]; then + die "Backup '${backup_dir}' is missing required dump files." + fi + printf '%s' "$backup_dir" +} + +graph_is_queryable() { + local graph_name="$1" + postgres_exec psql -U "$POSTGRES_USER" -d "$APP_DATABASE" -v ON_ERROR_STOP=1 -c \ + "LOAD 'age'; SET search_path = ag_catalog, \"\$user\", public; SELECT * FROM cypher('${graph_name}', \$\$ RETURN 1 \$\$) as (x agtype);" \ + >/dev/null 2>&1 +} + +repair_tenant_age_graph() { + local graph_name="$1" + if graph_is_queryable "$graph_name"; then + return 0 + fi + log " Repairing AGE graph ${graph_name}..." + postgres_exec psql -U "$POSTGRES_USER" -d "$APP_DATABASE" -c \ + "LOAD 'age'; SET search_path = ag_catalog, \"\$user\", public; SELECT ag_catalog.drop_graph('${graph_name}', true);" \ + >/dev/null 2>&1 || true + postgres_exec psql -U "$POSTGRES_USER" -d "$APP_DATABASE" -c \ + "LOAD 'age'; SET search_path = ag_catalog, \"\$user\", public; SELECT ag_catalog.create_graph('${graph_name}');" +} + +repair_all_tenant_age_graphs() { + log "Ensuring tenant AGE graphs are queryable after restore..." + local graph_names="" + graph_names="$(postgres_exec psql -U "$POSTGRES_USER" -d "$APP_DATABASE" -Atc \ + "SELECT DISTINCT graph_name FROM ( + SELECT name AS graph_name FROM ag_catalog.ag_graph WHERE name LIKE 'tenant_%' + UNION + SELECT 'tenant_' || id AS graph_name FROM tenants + ) AS tenant_graphs ORDER BY graph_name")" + while IFS= read -r graph_name; do + [[ -z "$graph_name" ]] && continue + repair_tenant_age_graph "$graph_name" + done <<< "$graph_names" +} + +cmd_restore() { + local backup_dir + backup_dir="$(resolve_backup_dir "$BACKUP_ID")" + + if [[ "$AUTO_CONFIRM" != "true" ]]; then + log "This will REPLACE all data in databases '${APP_DATABASE}' and '${AUTH_DATABASE}'" + log "for compose project '${COMPOSE_PROJECT}' from:" + log " ${backup_dir}" + read -r -p "Continue? [y/N] " reply + case "$reply" in + y|Y|yes|YES) ;; + *) log "Aborted."; exit 1 ;; + esac + fi + + stop_dependent_services + log "Restoring dev backup from '${backup_dir}'..." + restore_database "$APP_DATABASE" "$backup_dir/kartograph.dump" + restore_database "$AUTH_DATABASE" "$backup_dir/spicedb.dump" + repair_all_tenant_age_graphs + maybe_restore_job_packages "$backup_dir" + start_dependent_services + + log "Restore complete." + log "If the dev UI shows an empty tenant, clear ~/.kartograph/token.json and sign in again." + log "Note: repaired AGE graphs start empty — re-run instance prepopulation if needed." +} + +cmd_list() { + if [[ ! -d "$BACKUP_ROOT" ]]; then + log "No backups yet. Run 'make dev-backup' first." + exit 0 + fi + log "Available backups in ${BACKUP_ROOT}:" + find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d ! -name 'latest' -printf '%f\n' 2>/dev/null \ + | sort -r \ + || true + if [[ -L "$BACKUP_ROOT/latest" ]]; then + log "" + log "latest -> $(readlink "$BACKUP_ROOT/latest")" + fi +} + +ACTION="${1:-}" +shift || true + +while [[ $# -gt 0 ]]; do + case "$1" in + --project) + COMPOSE_PROJECT="$2" + shift 2 + ;; + --yes|-y) + AUTO_CONFIRM=true + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + if [[ -z "$BACKUP_ID" ]]; then + BACKUP_ID="$1" + shift + else + die "Unknown argument: $1" + fi + ;; + esac +done + +case "$ACTION" in + backup) + cmd_backup + ;; + restore) + BACKUP_ID="${BACKUP_ID:-latest}" + cmd_restore + ;; + repair) + repair_all_tenant_age_graphs + log "AGE graph repair complete." + ;; + list) + cmd_list + ;; + ""|-h|--help|help) + usage + ;; + *) + die "Unknown command: ${ACTION}. Run with --help for usage." + ;; +esac diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 9c0ce3db0..fee8ebe9f 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -55,6 +55,15 @@ 7. Create relationship edges after entity IDs are known 8. Verify with `kartograph_list_instances_by_type` and `kartograph_get_workspace_readiness` +### Failure modes (stop prepopulation on infra errors) + +- **422** — fix ontology or JSONL; retry is appropriate. +- **500/503 on readiness or apply after validate passed** — platform/graph storage issue; **stop**, report to the operator, do not advance to the next prepopulated label. Suggest `make dev-repair-age-graphs` in local dev. +- **`approved_at: null`** — optional; does **not** block prepopulation. +- **Validate pass + apply 500/503** — backend bug; report both outcomes; do not skip to the next entity type. + +Start prepopulation only when schema save succeeded **and** readiness returns 200 with gaps. + Writes persist to the platform database for the active knowledge graph. """.strip() diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index fbb157042..a54b07664 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -31,6 +31,7 @@ def test_build_agent_system_prompt_includes_skills_tools_and_session_scope() -> assert "**schema_modeling**" in prompt assert "kartograph_get_schema_ontology" in prompt assert "Quick workflow" in prompt + assert "Failure modes" in prompt assert "Bash" in prompt assert "instance_generators" in prompt assert "kg-123" in prompt @@ -77,6 +78,8 @@ def test_build_agent_system_prompt_includes_workspace_readiness() -> None: assert "kartograph_get_workspace_readiness" in prompt assert "Read" in prompt assert "Glob" in prompt + assert "Failure modes" in prompt + assert "dev-repair-age-graphs" in prompt def test_build_agent_system_prompt_omits_tools_without_workload_token() -> None: diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index c9f1c8c36..8a1e448ad 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -35,7 +35,21 @@ ## Prepopulation execution -When `kartograph_get_workspace_readiness` shows gaps after ontology save, **execute immediately**. +Start prepopulation immediately **only when all are true**: + +1. `kartograph_save_schema_ontology` succeeded. +2. `kartograph_get_workspace_readiness` returns **200** (not 500/503). +3. Readiness shows prepopulated gaps (`next_action` / `prepopulation_tasks` name a label). +4. No systemic server errors on schema tools in this session. + +If readiness is unavailable after a successful schema save, **stop and report** — do not advance to +the next prepopulated label. + +When readiness shows gaps and the checks above pass, **execute immediately** — do not ask permission. + +**First prepopulated entity type only:** smoke-test the pipeline with 1–2 instances before the full +batch (`preview_instances.py --limit 2`, validate, apply, verify with +`kartograph_list_instances_by_type`). Then run the full scanner output. **Entities** (all entity gaps before any relationship gap): @@ -142,4 +156,27 @@ - Prepopulated relationships may only reference prepopulated entity types. Call `kartograph_get_workspace_readiness` for gaps, `next_action`, `prepopulation_tasks`, and `blocking_reasons`. + +## Failure modes (schema tools) + +Classify outcomes before continuing prepopulation: + +| Outcome | Meaning | Action | +|---------|---------|--------| +| 422 + validation errors | Ontology or JSONL issue | Fix payload; retry | +| 422 on save | Authoring issue | Fix ontology draft | +| **500 or 503 on readiness/apply after validate passed** | **Platform / graph storage** | **Stop; report; do not continue to next label** | +| 500 on multiple schema endpoints | Systemic infra | Stop; suggest dev repair or env restart | + +**Validation vs apply:** If `kartograph_validate_graph_mutations_from_file` passes and +`kartograph_apply_graph_mutations_from_file` returns 500/503, that is a **backend bug** — not bad +JSONL. Report validation success and apply failure together. Do not retry in a loop or skip to the +next entity type. + +**`approved_at`:** Optional metadata on save. `null` is valid and does **not** block prepopulation. +Only pass `approved_at` when the user explicitly approved a timestamp. + +**Do not** conflate schema design, prepopulation planning, and implementation in one turn when the +user listed multiple deliverables — but **do** stop all implementation when graph tools return +systemic server errors. """.strip() diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index a3b0c28eb..e3a36cd91 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -72,10 +72,22 @@ class ResolvedExtractionSkillPack: ), ( "When readiness shows prepopulated gaps after schema save, execute immediately — do not ask " - "permission. One label per turn unless the user requested a single type only (then finish fully). " + "permission — but only after kartograph_get_workspace_readiness returns 200 with gaps. " + "If readiness or apply returns 500/503 while validation passed, stop prepopulation, report " + "a platform/infrastructure issue, and do not advance to the next label. " + "One label per turn unless the user requested a single type only (then finish fully). " "Use readiness next_action and prepopulation_tasks for the suggested scanner path. " "Only ask when discovery strategy is ambiguous or strict CREATE reports duplicates." ), + ( + "approved_at on saved ontology is optional metadata; null does not block prepopulation. " + "Do not treat missing approved_at as schema activation failure." + ), + ( + "If kartograph_validate_graph_mutations_from_file passes and apply-from-file returns " + "500/503, report both outcomes as a backend bug — do not retry blindly or continue to " + "the next prepopulated type." + ), ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { @@ -132,13 +144,22 @@ class ResolvedExtractionSkillPack: "Follow instance_generators/PREPOPULATION_WORKFLOW.md. Per gap: {Label}.py (case-sensitive filename) " "→ out/{Label}_instances.json → preview_instances.py (optional) → entities_to_jsonl.py or " "relationships_to_jsonl.py → validate/apply-from-file. Use scanner_common.generate_slug() and " - "dedupe_instances(). Entities before relationships. Primary relationship direction only." + "dedupe_instances(). Entities before relationships. Primary relationship direction only. " + "For the first prepopulated entity type after schema save, smoke-test with 1–2 instances " + "before the full batch. Stop and report if readiness/apply return 500/503 after validate passed." ), "readiness_reporting": ( "After schema or prepopulation work, call kartograph_get_workspace_readiness and cite " "next_action, prepopulation_tasks, blocking_reasons, and transition_eligible. When gaps remain " "after schema save, state which single prepopulation task you are executing next — do not poll " - "the user for permission to start unless the user asked for multiple types at once." + "the user for permission to start unless the user asked for multiple types at once. " + "If readiness returns 500/503, stop prepopulation and report infrastructure failure — do not " + "interpret approved_at=null as the cause." + ), + "failure_modes": ( + "422 = fix ontology or JSONL. 500/503 on readiness or apply after validate passed = platform " + "graph storage issue — stop, report, suggest dev repair; do not continue to next label. " + "approved_at null is allowed. Validation success means apply should work unless the server fails." ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { diff --git a/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md b/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md index d57cdc876..e068c344a 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md +++ b/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md @@ -2,6 +2,16 @@ Use this checklist for every `prepopulated: true` type after the ontology is saved. +## First entity type — smoke test (recommended) + +After the ontology is saved, for the **first** prepopulated entity label only: + +1. Run the scanner but limit output (e.g. `preview_instances.py {Label} --limit 2` or a tiny hand-trimmed JSON). +2. Convert to JSONL, `kartograph_validate_graph_mutations_from_file`, then `kartograph_apply_graph_mutations_from_file`. +3. Confirm with `kartograph_list_instances_by_type` and `kartograph_get_workspace_readiness` (must return **200**). +4. If validate passes but apply or readiness returns **500/503**, **stop** — report a platform issue; do not run the full batch or advance to the next label. +5. When the smoke test succeeds, run the full scanner → JSONL → apply pipeline for that label. + ## Entity prepopulation (six steps) ### Step 1 — Create scanner diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index a769bc8aa..18d226ad9 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -19,6 +19,7 @@ get_workload_graph_reader, get_workload_schema_service, ) +from infrastructure.extraction_workload.workload_errors import raise_graph_storage_http_error from management.domain.ontology_prepopulation import PrepopulationValidationError from management.domain.relationship_pairing import ontology_config_from_authoring_payload from management.domain.value_objects import OntologyConfig @@ -27,6 +28,14 @@ router = APIRouter(prefix="/workloads", tags=["extraction-workloads"]) +async def _await_graph_operation(awaitable): + """Run a graph-backed coroutine and map storage failures to HTTP 503.""" + try: + return await awaitable + except Exception as exc: + raise_graph_storage_http_error(exc) + + def _require_chat_scope(auth: WorkloadAuthContext) -> None: if "workload:chat" not in auth.credentials.scopes: raise HTTPException( @@ -220,11 +229,14 @@ async def workload_validate_mutations( schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., ) -> WorkloadMutationValidateResponse: _require_chat_scope(auth) - result = await schema_service.validate_mutation_jsonl( - tenant_id=auth.tenant_id, - knowledge_graph_id=auth.knowledge_graph_id, - jsonl=request.jsonl, - ) + try: + result = await schema_service.validate_mutation_jsonl( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + jsonl=request.jsonl, + ) + except Exception as exc: + raise_graph_storage_http_error(exc) return WorkloadMutationValidateResponse( valid=bool(result.get("valid")), errors=[str(item) for item in result.get("errors", [])], @@ -242,11 +254,14 @@ async def workload_apply_mutations( schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., ) -> WorkloadMutationApplyResponse: _require_chat_scope(auth) - result = await schema_service.apply_mutation_jsonl( - tenant_id=auth.tenant_id, - knowledge_graph_id=auth.knowledge_graph_id, - jsonl=request.jsonl, - ) + try: + result = await schema_service.apply_mutation_jsonl( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + jsonl=request.jsonl, + ) + except Exception as exc: + raise_graph_storage_http_error(exc) return WorkloadMutationApplyResponse( applied=bool(result.get("applied")), errors=[str(item) for item in result.get("errors", [])], @@ -282,11 +297,13 @@ async def workload_check_slugs( normalized = tuple( sorted({str(slug).strip() for slug in request.slugs if str(slug).strip()}) ) - existing, missing = await reader.partition_slugs_by_existence( - tenant_id=auth.tenant_id, - knowledge_graph_id=auth.knowledge_graph_id, - entity_type=request.entity_type.strip(), - slugs=normalized, + existing, missing = await _await_graph_operation( + reader.partition_slugs_by_existence( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + entity_type=request.entity_type.strip(), + slugs=normalized, + ) ) return WorkloadCheckSlugsResponse( entity_type=request.entity_type.strip(), @@ -307,11 +324,13 @@ async def workload_search_graph_by_slug( ) -> WorkloadGraphSearchResponse: _require_chat_scope(auth) - nodes = await reader.search_by_slug( - tenant_id=auth.tenant_id, - knowledge_graph_id=auth.knowledge_graph_id, - slug=slug, - entity_type=entity_type, + nodes = await _await_graph_operation( + reader.search_by_slug( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + slug=slug, + entity_type=entity_type, + ) ) serialized = [ { @@ -338,12 +357,14 @@ async def workload_list_instances_by_type( ) -> WorkloadInstanceListResponse: _require_chat_scope(auth) - nodes, total = await reader.list_instances_by_type( - tenant_id=auth.tenant_id, - knowledge_graph_id=auth.knowledge_graph_id, - entity_type=entity_type, - limit=limit, - offset=offset, + nodes, total = await _await_graph_operation( + reader.list_instances_by_type( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + entity_type=entity_type, + limit=limit, + offset=offset, + ) ) serialized = [ { @@ -379,14 +400,16 @@ async def workload_list_relationship_instances( ) -> WorkloadRelationshipListResponse: _require_chat_scope(auth) - relationships, total = await reader.list_relationship_instances( - tenant_id=auth.tenant_id, - knowledge_graph_id=auth.knowledge_graph_id, - relationship_type=relationship_type, - source_entity_type=source_entity_type, - target_entity_type=target_entity_type, - limit=limit, - offset=offset, + relationships, total = await _await_graph_operation( + reader.list_relationship_instances( + tenant_id=auth.tenant_id, + knowledge_graph_id=auth.knowledge_graph_id, + relationship_type=relationship_type, + source_entity_type=source_entity_type, + target_entity_type=target_entity_type, + limit=limit, + offset=offset, + ) ) serialized = [ { @@ -429,12 +452,15 @@ async def workload_get_workspace_readiness( ) ontology = await schema_service.get_ontology(knowledge_graph_id=auth.knowledge_graph_id) - snapshot = await build_workload_readiness_snapshot( - ontology=ontology, - knowledge_graph_id=auth.knowledge_graph_id, - tenant_id=auth.tenant_id, - graph_reader=reader, - ) + try: + snapshot = await build_workload_readiness_snapshot( + ontology=ontology, + knowledge_graph_id=auth.knowledge_graph_id, + tenant_id=auth.tenant_id, + graph_reader=reader, + ) + except Exception as exc: + raise_graph_storage_http_error(exc) return WorkloadReadinessResponse(**snapshot) diff --git a/src/api/graph/infrastructure/tenant_graph_handler.py b/src/api/graph/infrastructure/tenant_graph_handler.py index 766daf786..54cb848e3 100644 --- a/src/api/graph/infrastructure/tenant_graph_handler.py +++ b/src/api/graph/infrastructure/tenant_graph_handler.py @@ -43,9 +43,13 @@ class AGEGraphProvisioner: """Creates AGE graphs using a psycopg2 connection. Uses create-if-not-exists semantics by checking ag_catalog.ag_graph - before calling create_graph. This is idempotent and safe for replay. + before calling create_graph. When catalog metadata exists but the graph + is not queryable (common after a partial Postgres restore), the graph is + dropped and recreated. This is idempotent and safe for replay. """ + _GRAPH_OID_MISSING = "does not exist" + def __init__(self, connection_factory: "ConnectionFactory") -> None: """Initialize the provisioner with a connection factory. @@ -54,6 +58,41 @@ def __init__(self, connection_factory: "ConnectionFactory") -> None: """ self._connection_factory = connection_factory + @staticmethod + def _prepare_age_session(cursor) -> None: + cursor.execute("LOAD 'age'") + cursor.execute('SET search_path = ag_catalog, "$user", public') + + def _graph_is_queryable(self, cursor, graph_name: str) -> bool: + """Return True when cypher queries can run against the named graph.""" + self._prepare_age_session(cursor) + try: + cursor.execute( + "SELECT * FROM cypher(%s, $$ RETURN 1 $$) AS (result agtype)", + (graph_name,), + ) + cursor.fetchone() + return True + except Exception as exc: + message = str(exc).lower() + if self._GRAPH_OID_MISSING in message or "graph" in message: + return False + raise + + @staticmethod + def _drop_graph(cursor, graph_name: str) -> None: + cursor.execute( + "SELECT ag_catalog.drop_graph(%s, true)", + (graph_name,), + ) + + @staticmethod + def _create_graph(cursor, graph_name: str) -> None: + cursor.execute( + "SELECT ag_catalog.create_graph(%s)", + (graph_name,), + ) + def ensure_graph_exists(self, graph_name: str) -> None: """Create the AGE graph if it does not already exist. @@ -61,6 +100,10 @@ def ensure_graph_exists(self, graph_name: str) -> None: and graph creation atomic, preventing race conditions under concurrent duplicate event deliveries (e.g. outbox replay). + When catalog metadata exists but the graph OID is missing or corrupt + (``graph with oid N does not exist``), the stale graph is dropped and + recreated so workload reads/writes can proceed. + The connection is always committed or rolled back on every code path, including the no-op/exists path, to avoid leaking open transactions back to the connection pool. @@ -75,32 +118,26 @@ def ensure_graph_exists(self, graph_name: str) -> None: conn = self._connection_factory.get_connection() try: with conn.cursor() as cursor: - # Acquire a transaction-level advisory lock keyed by graph name. - # This makes the check + create atomic: concurrent callers block - # here until the first caller's transaction commits or rolls back. - # The lock is released automatically when the transaction ends. cursor.execute( "SELECT pg_advisory_xact_lock(hashtext(%s)::bigint)", (graph_name,), ) - # Check if graph exists in the AGE catalog cursor.execute( "SELECT 1 FROM ag_catalog.ag_graph WHERE name = %s", (graph_name,), ) - if cursor.fetchone() is not None: - # Graph already exists — idempotent no-op. - # Rollback to release advisory lock and cleanly end the - # transaction; avoids leaking an open transaction to the pool. + catalog_exists = cursor.fetchone() is not None + + if catalog_exists and self._graph_is_queryable(cursor, graph_name): conn.rollback() return - # Attempt to create the graph - cursor.execute( - "SELECT ag_catalog.create_graph(%s)", - (graph_name,), - ) + if catalog_exists: + self._prepare_age_session(cursor) + self._drop_graph(cursor, graph_name) + + self._create_graph(cursor, graph_name) conn.commit() @@ -112,6 +149,16 @@ def ensure_graph_exists(self, graph_name: str) -> None: self._connection_factory.return_connection(conn) +def ensure_tenant_graph_operational( + connection_factory: "ConnectionFactory", + tenant_id: str, +) -> str: + """Ensure ``tenant_{tenant_id}`` exists and accepts Cypher queries.""" + graph_name = f"tenant_{tenant_id}" + AGEGraphProvisioner(connection_factory).ensure_graph_exists(graph_name) + return graph_name + + class TenantAGEGraphHandler: """Outbox event handler that provisions an AGE graph per tenant. diff --git a/src/api/infrastructure/extraction_workload/graph_mutation_writer.py b/src/api/infrastructure/extraction_workload/graph_mutation_writer.py index 619e0b9e9..956e9135b 100644 --- a/src/api/infrastructure/extraction_workload/graph_mutation_writer.py +++ b/src/api/infrastructure/extraction_workload/graph_mutation_writer.py @@ -13,6 +13,7 @@ from graph.domain.value_objects import MutationOperation, MutationOperationType from graph.infrastructure.age_bulk_loading import AgeBulkLoadingStrategy from graph.infrastructure.age_client import AgeGraphClient +from graph.infrastructure.tenant_graph_handler import ensure_tenant_graph_operational from graph.infrastructure.mutation_applier import MutationApplier from graph.infrastructure.postgres_kg_type_definition_store import ( PostgresKnowledgeGraphTypeDefinitionStore, @@ -115,6 +116,7 @@ def _apply_sync( ) -> dict[str, Any]: graph_name = f"tenant_{tenant_id}" factory = ConnectionFactory(self._settings, pool=self._pool) + ensure_tenant_graph_operational(factory, tenant_id) client = AgeGraphClient( self._settings, connection_factory=factory, diff --git a/src/api/infrastructure/extraction_workload/graph_reader.py b/src/api/infrastructure/extraction_workload/graph_reader.py index 2f15c81eb..acda8b195 100644 --- a/src/api/infrastructure/extraction_workload/graph_reader.py +++ b/src/api/infrastructure/extraction_workload/graph_reader.py @@ -8,6 +8,7 @@ from graph.application.services import GraphQueryService from graph.infrastructure.age_client import AgeGraphClient from graph.infrastructure.graph_repository import GraphExtractionReadOnlyRepository +from graph.infrastructure.tenant_graph_handler import ensure_tenant_graph_operational from infrastructure.database.connection import ConnectionFactory from infrastructure.database.connection_pool import ConnectionPool from infrastructure.settings import DatabaseSettings @@ -27,6 +28,13 @@ def __init__( self._pool = pool self._settings = settings + def _connect_for_tenant(self, tenant_id: str) -> AgeGraphClient: + factory = ConnectionFactory(self._settings, pool=self._pool) + graph_name = ensure_tenant_graph_operational(factory, tenant_id) + client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) + client.connect() + return client + async def search_by_slug( self, *, @@ -35,14 +43,11 @@ async def search_by_slug( slug: str, entity_type: str | None = None, ) -> list[WorkloadGraphNode]: - graph_name = f"tenant_{tenant_id}" - factory = ConnectionFactory(self._settings, pool=self._pool) - client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) - client.connect() + client = await asyncio.to_thread(self._connect_for_tenant, tenant_id) try: repository = GraphExtractionReadOnlyRepository( client=client, - graph_id=graph_name, + graph_id=client.graph_name, ) service = GraphQueryService(repository=repository, probe=DefaultGraphServiceProbe()) nodes = service.search_by_slug( @@ -71,14 +76,11 @@ async def list_instances_by_type( limit: int = 100, offset: int = 0, ) -> tuple[list[WorkloadGraphNode], int]: - graph_name = f"tenant_{tenant_id}" - factory = ConnectionFactory(self._settings, pool=self._pool) - client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) - client.connect() + client = await asyncio.to_thread(self._connect_for_tenant, tenant_id) try: repository = GraphExtractionReadOnlyRepository( client=client, - graph_id=graph_name, + graph_id=client.graph_name, ) service = GraphQueryService(repository=repository, probe=DefaultGraphServiceProbe()) bounded_limit = max(1, min(limit, 500)) @@ -113,14 +115,11 @@ async def count_entity_instances_by_type( knowledge_graph_id: str, entity_type: str, ) -> int: - graph_name = f"tenant_{tenant_id}" - factory = ConnectionFactory(self._settings, pool=self._pool) - client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) - client.connect() + client = await asyncio.to_thread(self._connect_for_tenant, tenant_id) try: repository = GraphExtractionReadOnlyRepository( client=client, - graph_id=graph_name, + graph_id=client.graph_name, ) service = GraphQueryService(repository=repository, probe=DefaultGraphServiceProbe()) return service.count_by_label( @@ -146,14 +145,11 @@ async def list_relationship_instances( limit: int = 100, offset: int = 0, ) -> tuple[list[WorkloadGraphRelationship], int]: - graph_name = f"tenant_{tenant_id}" - factory = ConnectionFactory(self._settings, pool=self._pool) - client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) - client.connect() + client = await asyncio.to_thread(self._connect_for_tenant, tenant_id) try: repository = GraphExtractionReadOnlyRepository( client=client, - graph_id=graph_name, + graph_id=client.graph_name, ) bounded_limit = max(1, min(limit, 500)) bounded_offset = max(0, offset) @@ -198,14 +194,11 @@ async def count_relationship_instances( source_entity_type: str | None = None, target_entity_type: str | None = None, ) -> int: - graph_name = f"tenant_{tenant_id}" - factory = ConnectionFactory(self._settings, pool=self._pool) - client = AgeGraphClient(self._settings, connection_factory=factory, graph_name=graph_name) - client.connect() + client = await asyncio.to_thread(self._connect_for_tenant, tenant_id) try: repository = GraphExtractionReadOnlyRepository( client=client, - graph_id=graph_name, + graph_id=client.graph_name, ) return repository.count_relationship_instances( relationship_type, @@ -225,18 +218,13 @@ async def find_existing_node_ids( ) -> frozenset[str]: if not node_ids: return frozenset() - graph_name = f"tenant_{tenant_id}" def _query() -> set[str]: - factory = ConnectionFactory(self._settings, pool=self._pool) - client = AgeGraphClient( - self._settings, connection_factory=factory, graph_name=graph_name - ) - client.connect() + client = self._connect_for_tenant(tenant_id) try: repository = GraphExtractionReadOnlyRepository( client=client, - graph_id=graph_name, + graph_id=client.graph_name, ) return repository.find_existing_node_ids( list(node_ids), @@ -256,18 +244,13 @@ async def find_existing_edge_ids( ) -> frozenset[str]: if not edge_ids: return frozenset() - graph_name = f"tenant_{tenant_id}" def _query() -> set[str]: - factory = ConnectionFactory(self._settings, pool=self._pool) - client = AgeGraphClient( - self._settings, connection_factory=factory, graph_name=graph_name - ) - client.connect() + client = self._connect_for_tenant(tenant_id) try: repository = GraphExtractionReadOnlyRepository( client=client, - graph_id=graph_name, + graph_id=client.graph_name, ) return repository.find_existing_edge_ids( list(edge_ids), @@ -288,18 +271,13 @@ async def find_existing_slugs_for_entity_type( ) -> frozenset[str]: if not slugs: return frozenset() - graph_name = f"tenant_{tenant_id}" def _query() -> set[str]: - factory = ConnectionFactory(self._settings, pool=self._pool) - client = AgeGraphClient( - self._settings, connection_factory=factory, graph_name=graph_name - ) - client.connect() + client = self._connect_for_tenant(tenant_id) try: repository = GraphExtractionReadOnlyRepository( client=client, - graph_id=graph_name, + graph_id=client.graph_name, ) return repository.find_existing_slugs_for_entity_type( entity_type, diff --git a/src/api/infrastructure/extraction_workload/workload_errors.py b/src/api/infrastructure/extraction_workload/workload_errors.py new file mode 100644 index 000000000..d3b527920 --- /dev/null +++ b/src/api/infrastructure/extraction_workload/workload_errors.py @@ -0,0 +1,36 @@ +"""Map graph storage failures to workload HTTP responses for GMA tools.""" + +from __future__ import annotations + +from fastapi import HTTPException, status + +from infrastructure.database.exceptions import GraphQueryError + +_GRAPH_STORAGE_MARKERS = ( + "graph with oid", + "does not exist", + "ag_catalog", +) + + +def is_graph_storage_error(exc: BaseException) -> bool: + """Return True when the failure indicates tenant AGE graph storage is unavailable.""" + if isinstance(exc, GraphQueryError): + return True + message = str(exc).lower() + return any(marker in message for marker in _GRAPH_STORAGE_MARKERS) + + +def raise_graph_storage_http_error(exc: BaseException) -> None: + """Raise HTTP 503 with operator-facing guidance for graph storage failures.""" + if not is_graph_storage_error(exc): + raise exc + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail=( + "Tenant graph storage is unavailable (corrupt or missing AGE graph). " + "This is a platform/infrastructure issue, not invalid JSONL or ontology. " + "In local dev, run `make dev-repair-age-graphs` or restore from " + "`make dev-backup`, then retry prepopulation from the current label." + ), + ) from exc diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py index 997a3a184..257127402 100644 --- a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -14,3 +14,7 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: assert "prepopulated" in SCHEMA_AUTHORING_GUIDE assert "/tmp" in SCHEMA_AUTHORING_GUIDE assert "data_source.py" not in SCHEMA_AUTHORING_GUIDE + assert "## Failure modes" in SCHEMA_AUTHORING_GUIDE + assert "approved_at" in SCHEMA_AUTHORING_GUIDE + assert "500/503" in SCHEMA_AUTHORING_GUIDE + assert "smoke-test" in SCHEMA_AUTHORING_GUIDE.lower() or "smoke test" in SCHEMA_AUTHORING_GUIDE.lower() diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 5f6bb40bb..19d4a7f9b 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -41,6 +41,7 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): "schema_workflow", "prepopulation", "readiness_reporting", + "failure_modes", } assert "entities_to_jsonl.py" in resolved.skills["prepopulation"] assert "_instances.json" in resolved.skills["prepopulation"] @@ -49,6 +50,8 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): assert "entities_to_jsonl.py" in guardrails_text assert "never /tmp" in guardrails_text or "Never /tmp" in guardrails_text assert "do not ask" in guardrails_text + assert "500/503" in guardrails_text + assert "approved_at" in guardrails_text assert "kartograph_save_schema_ontology" in guardrails_text assert len(resolved.prompt_hierarchy) > 0 diff --git a/src/api/tests/unit/extraction/presentation/test_workload_routes.py b/src/api/tests/unit/extraction/presentation/test_workload_routes.py index 6cd4d2276..6f8646379 100644 --- a/src/api/tests/unit/extraction/presentation/test_workload_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_workload_routes.py @@ -15,6 +15,7 @@ get_workload_graph_reader, get_workload_schema_service, ) +from infrastructure.database.exceptions import GraphQueryError from management.domain.value_objects import EdgeTypeDefinition, NodeTypeDefinition, OntologyConfig @@ -118,6 +119,11 @@ async def partition_slugs_by_existence(self, **kwargs): return existing, missing +class _BrokenGraphReader(_FakeGraphReader): + async def count_entity_instances_by_type(self, **kwargs): + raise GraphQueryError("graph with oid 17491 does not exist", query="MATCH (n) RETURN n") + + class _FakeExtractionJobsService: def __init__(self) -> None: self.saved_payload: dict[str, object] | None = None @@ -215,6 +221,7 @@ def test_workload_get_schema_authoring_guide(workload_client: tuple[TestClient, assert "kartograph_get_schema_ontology" in response.json()["guide"] assert "PREPOPULATION_WORKFLOW.md" in response.json()["guide"] assert "case-sensitive" in response.json()["guide"] + assert "Failure modes" in response.json()["guide"] def test_workload_get_workspace_readiness(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: @@ -231,6 +238,38 @@ def test_workload_get_workspace_readiness(workload_client: tuple[TestClient, _Fa assert payload["prepopulated_entity_types"][0]["label"] == "service" +def test_workload_get_workspace_readiness_returns_503_for_graph_storage_errors() -> None: + fake = _FakeSchemaService() + fake.saved = OntologyConfig( + node_types=( + NodeTypeDefinition(label="service", prepopulated=True, prepopulated_instance_count=0), + ), + edge_types=(), + ) + issuer = ScopedWorkloadCredentialIssuer(default_ttl=__import__("datetime").timedelta(minutes=10)) + credentials = issuer.issue_for_sticky_session( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + ) + app = FastAPI() + app.include_router(workload_routes.router, prefix="/extraction") + app.dependency_overrides[get_workload_schema_service] = lambda: fake + app.dependency_overrides[get_workload_graph_reader] = lambda: _BrokenGraphReader() + app.dependency_overrides[get_workload_extraction_jobs_service] = lambda: _FakeExtractionJobsService() + app.dependency_overrides[get_workload_auth_context] = lambda: WorkloadAuthContext( + credentials=credentials, + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + ) + client = TestClient(app) + response = client.get( + "/extraction/workloads/schema/readiness", + headers={"X-Workload-Token": credentials.token}, + ) + assert response.status_code == 503 + assert "dev-repair-age-graphs" in response.json()["detail"] + + def test_workload_list_instances_by_type(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: client, _fake, token = workload_client response = client.get( diff --git a/src/api/tests/unit/graph/infrastructure/test_tenant_graph_handler.py b/src/api/tests/unit/graph/infrastructure/test_tenant_graph_handler.py index 0a78635b7..8d93c9660 100644 --- a/src/api/tests/unit/graph/infrastructure/test_tenant_graph_handler.py +++ b/src/api/tests/unit/graph/infrastructure/test_tenant_graph_handler.py @@ -229,23 +229,41 @@ def test_creates_graph_when_not_exists(self) -> None: # Verify connection was committed mock_conn.commit.assert_called() - def test_skips_create_when_graph_already_exists(self) -> None: - """Provisioner does NOT call create_graph if graph already exists.""" + def test_skips_create_when_graph_already_exists_and_is_queryable(self) -> None: + """Provisioner does NOT call create_graph if graph already exists and works.""" provisioner, mock_connection_factory, mock_conn, mock_cursor = ( self._make_provisioner() ) - # Simulate: graph ALREADY EXISTS (SELECT returns a row) - mock_cursor.fetchone.return_value = (1,) + mock_cursor.fetchone.side_effect = [(1,), (1,)] provisioner.ensure_graph_exists("tenant_abc123") - # Verify no create_graph call was made create_calls = [ c for c in mock_cursor.execute.call_args_list if "create_graph" in str(c) ] assert len(create_calls) == 0 + def test_recreates_graph_when_catalog_exists_but_graph_is_corrupt(self) -> None: + """Provisioner drops and recreates graphs that fail a probe query.""" + provisioner, mock_connection_factory, mock_conn, mock_cursor = ( + self._make_provisioner() + ) + + def _execute(sql, params=None): + if "cypher" in str(sql): + raise RuntimeError("graph with oid 17491 does not exist") + + mock_cursor.fetchone.return_value = (1,) + mock_cursor.execute.side_effect = _execute + + provisioner.ensure_graph_exists("tenant_abc123") + + executed = " ".join(str(call) for call in mock_cursor.execute.call_args_list) + assert "drop_graph" in executed + assert "create_graph" in executed + mock_conn.commit.assert_called() + def test_returns_connection_to_factory_on_success(self) -> None: """Connection is always returned to factory after provisioning.""" provisioner, mock_connection_factory, mock_conn, mock_cursor = ( @@ -285,8 +303,7 @@ def test_rollback_or_commit_called_on_no_op_path(self) -> None: self._make_provisioner() ) - # Simulate: graph ALREADY EXISTS — no-op path - mock_cursor.fetchone.return_value = (1,) + mock_cursor.fetchone.side_effect = [(1,), (1,)] provisioner.ensure_graph_exists("tenant_abc123") diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_workload_errors.py b/src/api/tests/unit/infrastructure/extraction_workload/test_workload_errors.py new file mode 100644 index 000000000..def6f955a --- /dev/null +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_workload_errors.py @@ -0,0 +1,34 @@ +"""Unit tests for workload graph storage error mapping.""" + +from __future__ import annotations + +import pytest +from fastapi import HTTPException + +from infrastructure.database.exceptions import GraphQueryError +from infrastructure.extraction_workload.workload_errors import ( + is_graph_storage_error, + raise_graph_storage_http_error, +) + + +def test_is_graph_storage_error_detects_graph_query_error() -> None: + assert is_graph_storage_error(GraphQueryError("graph with oid 1 does not exist", query="MATCH (n) RETURN n")) + + +def test_is_graph_storage_error_detects_message_markers() -> None: + assert is_graph_storage_error(RuntimeError("graph with oid 17491 does not exist")) + + +def test_raise_graph_storage_http_error_maps_to_503() -> None: + with pytest.raises(HTTPException) as exc_info: + raise_graph_storage_http_error( + GraphQueryError("graph with oid 17491 does not exist", query="MATCH (n) RETURN n") + ) + assert exc_info.value.status_code == 503 + assert "dev-repair-age-graphs" in str(exc_info.value.detail) + + +def test_raise_graph_storage_http_error_reraises_unrelated_errors() -> None: + with pytest.raises(ValueError, match="unrelated"): + raise_graph_storage_http_error(ValueError("unrelated")) From ad08a82411de05accb52fbe2892591b737239a8d Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sat, 13 Jun 2026 21:22:25 -0400 Subject: [PATCH 129/153] feat(extraction): streamline GMA prepopulation with run_scanner and apply chaining Add run_scanner.py to combine scan-to-JSONL in one step, enrich readiness tasks with order/run_command and underscore relationship paths, and return next_action from apply so agents can chain labels without polling readiness every batch. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 6 +- src/agent-runtime/tests/test_agent_prompt.py | 1 + .../application/schema_authoring_guide.py | 33 ++- .../application/skill_resolution_service.py | 22 +- .../PREPOPULATION_WORKFLOW.md | 33 ++- .../instance_generator_templates/README.md | 15 +- .../instance_generator_templates/__init__.py | 1 + .../run_scanner.py | 235 ++++++++++++++++++ .../scanner_common.py | 18 ++ .../presentation/workload_routes.py | 34 +++ .../workspace_readiness.py | 64 ++++- .../test_schema_authoring_guide.py | 4 +- .../test_skill_resolution_service.py | 6 +- .../infrastructure/test_run_scanner.py | 159 ++++++++++++ .../infrastructure/test_scanner_common.py | 18 ++ ...est_sticky_session_workdir_materializer.py | 1 + .../presentation/test_workload_routes.py | 12 +- .../test_workspace_readiness.py | 5 + 18 files changed, 619 insertions(+), 48 deletions(-) create mode 100644 src/api/extraction/infrastructure/instance_generator_templates/run_scanner.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_run_scanner.py diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index fee8ebe9f..f4c9a6c11 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -49,11 +49,11 @@ 1. `kartograph_get_schema_authoring_guide` 2. `kartograph_get_workspace_readiness` 3. `kartograph_get_schema_ontology` -4. Prepopulation: `{Label}.py` (case-sensitive) → `out/{Label}_instances.json` → `preview_instances.py` → `entities_to_jsonl.py` → apply-from-file +4. Prepopulation: `run_scanner.py {Label} --entity` → apply-from-file (or manual JSONL pipeline) 5. Model types → `kartograph_save_schema_ontology` -6. Apply CREATE mutations → `kartograph_apply_graph_mutations` (small fixes inline; bulk via generator output) +6. Apply CREATE mutations → `kartograph_apply_graph_mutations_from_file` (apply returns `next_action`) 7. Create relationship edges after entity IDs are known -8. Verify with `kartograph_list_instances_by_type` and `kartograph_get_workspace_readiness` +8. Verify with `kartograph_list_instances_by_type` and readiness when needed ### Failure modes (stop prepopulation on infra errors) diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index a54b07664..df850e71a 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -32,6 +32,7 @@ def test_build_agent_system_prompt_includes_skills_tools_and_session_scope() -> assert "kartograph_get_schema_ontology" in prompt assert "Quick workflow" in prompt assert "Failure modes" in prompt + assert "run_scanner.py" in prompt assert "Bash" in prompt assert "instance_generators" in prompt assert "kg-123" in prompt diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 8a1e448ad..b0a8de84f 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -20,7 +20,7 @@ relationship workflow, slug rules, batch sizes, and verification checklist. Bundled platform scripts (do not edit): `entities_to_jsonl.py`, `relationships_to_jsonl.py`, -`preview_instances.py`, `scanner_common.py`. +`preview_instances.py`, `run_scanner.py`, `scanner_common.py`. Copy `_entity_scanner.example.py` to `{Label}.py` — **filename must match ontology label exactly** (case-sensitive: `E2ETest.py`, not `e2etest.py`). Domain references: `instance_generators/examples/`. @@ -51,7 +51,14 @@ batch (`preview_instances.py --limit 2`, validate, apply, verify with `kartograph_list_instances_by_type`). Then run the full scanner output. -**Entities** (all entity gaps before any relationship gap): +**Entities** (all entity gaps before any relationship gap). Prefer `run_scanner.py`: + +```bash +python3 instance_generators/run_scanner.py E2ETest --entity +# kartograph_apply_graph_mutations_from_file path=<printed jsonl_path> +``` + +Manual pipeline: ```bash python3 instance_generators/E2ETest.py repository-files > instance_generators/out/E2ETest_instances.json @@ -59,17 +66,18 @@ python3 instance_generators/entities_to_jsonl.py E2ETest \\ --data-source-id schema-bootstrap \\ instance_generators/out/E2ETest_instances.json > instance_generators/out/E2ETest_instances.jsonl -# validate-from-file → apply-from-file path=instance_generators/out/E2ETest_instances.jsonl +# apply-from-file path=instance_generators/out/E2ETest_instances.jsonl ``` -**Relationships** (after entity slugs exist; name files `{source}_{rel}_{target}_instances.*`): +Apply pre-validates internally; validate-from-file is an optional dry run. Apply responses include +`next_action` and remaining prepopulation gaps — use those instead of polling readiness after +every batch when chaining scanners. + +**Relationships** (after entity slugs exist; files use `{source}_{rel}_{target}_instances.*`): ```bash -python3 instance_generators/repository_defines_test.py repository-files \\ - > instance_generators/out/repository_defines_test_instances.json -python3 instance_generators/relationships_to_jsonl.py defines repository test \\ - instance_generators/out/repository_defines_test_instances.json \\ - > instance_generators/out/repository_defines_test_instances.jsonl +python3 instance_generators/run_scanner.py \\ + --relationship --source ComponentTest --rel tests --target APIEndpoint ``` Scanner stdout contract: @@ -98,8 +106,8 @@ 1. `kartograph_get_schema_authoring_guide` · `kartograph_get_workspace_readiness` · `kartograph_get_schema_ontology` 2. `kartograph_save_schema_ontology` when schema is confirmed 3. Prepopulation pipeline above per gap -4. `kartograph_validate_graph_mutations_from_file` → `kartograph_apply_graph_mutations_from_file` -5. Verify with `kartograph_list_instances_by_type` and readiness +4. `kartograph_apply_graph_mutations_from_file` (apply pre-validates; validate is optional dry run) +5. Verify with `kartograph_list_instances_by_type` and readiness when apply does not return next_action ## Entity type shape @@ -155,7 +163,8 @@ - Every `prepopulated=true` relationship type needs ≥1 live edge. - Prepopulated relationships may only reference prepopulated entity types. -Call `kartograph_get_workspace_readiness` for gaps, `next_action`, `prepopulation_tasks`, and `blocking_reasons`. +Call `kartograph_get_workspace_readiness` for gaps, `next_action`, `prepopulation_tasks` +(with `order`, `blocking_types`, and `run_command`), and `blocking_reasons`. ## Failure modes (schema tools) diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index e3a36cd91..0d91773ec 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -60,10 +60,11 @@ class ResolvedExtractionSkillPack: ), ( "Prepopulation (prepopulated=true types): copy _entity_scanner.example.py to " - "instance_generators/{Label}.py (case-sensitive — must match ontology label exactly) → " - "out/{Label}_instances.json → preview_instances.py (optional) → entities_to_jsonl.py → " - "validate/apply instance_generators/out/{Label}_instances.jsonl in one batch. " - "Never /tmp (not persisted, invalid for apply-from-file). All entity gaps before relationship gaps." + "instance_generators/{Label}.py (case-sensitive) → " + "`python3 instance_generators/run_scanner.py {Label} --entity` (or manual JSONL pipeline) " + "→ kartograph_apply_graph_mutations_from_file with printed jsonl_path. Apply pre-validates; " + "validate is optional dry run. Use apply response next_action to chain labels. " + "Never /tmp. All entity gaps before relationship gaps." ), ( "Single prepopulation deliverable (one entity or relationship type): execute the full " @@ -141,12 +142,13 @@ class ResolvedExtractionSkillPack: "Read/save ontology via kartograph_get_schema_ontology and kartograph_save_schema_ontology." ), "prepopulation": ( - "Follow instance_generators/PREPOPULATION_WORKFLOW.md. Per gap: {Label}.py (case-sensitive filename) " - "→ out/{Label}_instances.json → preview_instances.py (optional) → entities_to_jsonl.py or " - "relationships_to_jsonl.py → validate/apply-from-file. Use scanner_common.generate_slug() and " - "dedupe_instances(). Entities before relationships. Primary relationship direction only. " - "For the first prepopulated entity type after schema save, smoke-test with 1–2 instances " - "before the full batch. Stop and report if readiness/apply return 500/503 after validate passed." + "Follow instance_generators/PREPOPULATION_WORKFLOW.md. Per gap: {Label}.py (case-sensitive) " + "→ `run_scanner.py {Label} --entity` (preferred) or manual entities_to_jsonl.py → " + "apply-from-file. Relationships: run_scanner.py --relationship --source --rel --target. " + "Readiness prepopulation_tasks include order, blocking_types, run_command, and underscore " + "output paths. Apply response includes next_action and remaining gaps. " + "preview_instances.py: use for smoke test or 50–500 instance spot-checks. " + "Entities before relationships. Stop on 500/503 after validate passed." ), "readiness_reporting": ( "After schema or prepopulation work, call kartograph_get_workspace_readiness and cite " diff --git a/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md b/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md index e068c344a..e37b09fa1 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md +++ b/src/api/extraction/infrastructure/instance_generator_templates/PREPOPULATION_WORKFLOW.md @@ -7,12 +7,28 @@ Use this checklist for every `prepopulated: true` type after the ontology is sav After the ontology is saved, for the **first** prepopulated entity label only: 1. Run the scanner but limit output (e.g. `preview_instances.py {Label} --limit 2` or a tiny hand-trimmed JSON). -2. Convert to JSONL, `kartograph_validate_graph_mutations_from_file`, then `kartograph_apply_graph_mutations_from_file`. +2. Convert to JSONL, optionally `kartograph_validate_graph_mutations_from_file`, then `kartograph_apply_graph_mutations_from_file`. 3. Confirm with `kartograph_list_instances_by_type` and `kartograph_get_workspace_readiness` (must return **200**). 4. If validate passes but apply or readiness returns **500/503**, **stop** — report a platform issue; do not run the full batch or advance to the next label. -5. When the smoke test succeeds, run the full scanner → JSONL → apply pipeline for that label. +5. When the smoke test succeeds, run the full pipeline for that label (see **Combined run** below). -## Entity prepopulation (six steps) +## Combined run (recommended) + +After the scanner exists, one command runs scan → JSON → JSONL: + +```bash +python3 instance_generators/run_scanner.py Resource --entity +python3 instance_generators/run_scanner.py \ + --relationship --source ComponentTest --rel tests --target APIEndpoint +``` + +The script prints a JSON summary with `jsonl_path` and `next_step`. Apply pre-validates +internally — call `kartograph_apply_graph_mutations_from_file` with the printed path +(optional `kartograph_validate_graph_mutations_from_file` first for dry run). + +Use `--validate-only` to stop after JSONL conversion without suggesting apply. + +## Entity prepopulation (manual steps) ### Step 1 — Create scanner @@ -33,7 +49,10 @@ python3 instance_generators/{Label}.py repository-files \ Stdout contract: `[{"slug": "kebab-or-snake-case", "properties": {...}}, ...]` -### Step 3 — Preview (optional, recommended) +### Step 3 — Preview (optional) + +Use when spot-checking **50–500** instances, or during the first-entity smoke test. +Skip for very small batches (≤10) and very large ones (>500) where terminal preview is not useful. ```bash python3 instance_generators/preview_instances.py {Label} --limit 5 @@ -53,7 +72,9 @@ python3 instance_generators/entities_to_jsonl.py {Label} \ The CLI `{Label}` must match the ontology entity type **exactly** (case-sensitive). `entities_to_jsonl.py` preserves that casing in CREATE `label` lines. -### Step 5 — Validate (dry run) +### Step 5 — Validate (optional dry run) + +Apply pre-validates the same checks. Use validate only when you want a dry run without writes. `kartograph_validate_graph_mutations_from_file` with path `instance_generators/out/{Label}_instances.jsonl`. @@ -63,7 +84,7 @@ CREATE is strict — duplicates fail here, not at apply time. `kartograph_apply_graph_mutations_from_file` with the same path, then: -1. Confirm apply result reports created count. +1. Confirm apply result reports created count and `next_action` / remaining gaps. 2. `kartograph_get_workspace_readiness()` — live count should increase; label leaves entity gaps. 3. `kartograph_list_instances_by_type(entity_type="{label}")` — spot-check slugs. diff --git a/src/api/extraction/infrastructure/instance_generator_templates/README.md b/src/api/extraction/infrastructure/instance_generator_templates/README.md index 7f7e1db7a..ebd5fc828 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/README.md +++ b/src/api/extraction/infrastructure/instance_generator_templates/README.md @@ -5,6 +5,7 @@ Prepopulation for `prepopulated: true` types uses **three kinds of files**: | File | Who writes it | Purpose | |------|---------------|---------| | `{Label}.py` | Agent | Scans `repository-files/` → JSON array on stdout | +| `run_scanner.py` | Platform | One command: scan → JSON → JSONL | | `entities_to_jsonl.py` | Platform | `{label}_instances.json` → `{label}_instances.jsonl` | | `relationships_to_jsonl.py` | Platform | `{key}_instances.json` → `{key}_instances.jsonl` | @@ -24,6 +25,15 @@ Copy `_entity_scanner.example.py` to `{Label}.py` or start from `examples/` for ## Entity prepopulation (one type per turn) +Preferred — combined run: + +```bash +python3 instance_generators/run_scanner.py E2ETest --entity +# apply the printed jsonl_path with kartograph_apply_graph_mutations_from_file +``` + +Manual steps: + ```bash python3 instance_generators/E2ETest.py repository-files \ > instance_generators/out/E2ETest_instances.json @@ -36,9 +46,8 @@ python3 instance_generators/entities_to_jsonl.py E2ETest \ > instance_generators/out/E2ETest_instances.jsonl ``` -Then `kartograph_validate_graph_mutations_from_file` and -`kartograph_apply_graph_mutations_from_file` with path -`instance_generators/out/E2ETest_instances.jsonl`. +Then `kartograph_apply_graph_mutations_from_file` with path +`instance_generators/out/E2ETest_instances.jsonl` (apply pre-validates; validate first is optional). ## Relationship prepopulation (after all entity gaps) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/__init__.py b/src/api/extraction/infrastructure/instance_generator_templates/__init__.py index b300d9bf6..1dce4faa2 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/__init__.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/__init__.py @@ -11,6 +11,7 @@ "entities_to_jsonl.py", "relationships_to_jsonl.py", "preview_instances.py", + "run_scanner.py", "scanner_common.py", "README.md", "PREPOPULATION_WORKFLOW.md", diff --git a/src/api/extraction/infrastructure/instance_generator_templates/run_scanner.py b/src/api/extraction/infrastructure/instance_generator_templates/run_scanner.py new file mode 100644 index 000000000..4e63a7fbf --- /dev/null +++ b/src/api/extraction/infrastructure/instance_generator_templates/run_scanner.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +"""Run a prepopulation scanner and convert output to JSONL in one step. + +Entity example: + + python3 instance_generators/run_scanner.py Resource --entity + +Relationship example: + + python3 instance_generators/run_scanner.py \\ + --relationship --source ComponentTest --rel tests --target APIEndpoint + +Then call ``kartograph_validate_graph_mutations_from_file`` (optional dry run) and +``kartograph_apply_graph_mutations_from_file`` with the printed ``jsonl_path``. +Apply pre-validates internally; separate validate is optional. +""" + +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from pathlib import Path +from typing import Any + +from scanner_common import relationship_output_paths, relationship_scanner_stem + +GENERATORS_DIR = Path(__file__).resolve().parent +OUT_DIR = GENERATORS_DIR / "out" + + +def _entity_paths(label: str) -> tuple[Path, Path, Path]: + scanner = GENERATORS_DIR / f"{label}.py" + json_path = OUT_DIR / f"{label}_instances.json" + jsonl_path = OUT_DIR / f"{label}_instances.jsonl" + return scanner, json_path, jsonl_path + + +def _relationship_paths(*, source: str, relationship: str, target: str) -> tuple[Path, Path, Path]: + stem = relationship_scanner_stem( + source=source, + relationship=relationship, + target=target, + ) + json_rel, jsonl_rel = relationship_output_paths( + source=source, + relationship=relationship, + target=target, + ) + return GENERATORS_DIR / f"{stem}.py", Path(json_rel), Path(jsonl_rel) + + +def _run_scanner(*, scanner_path: Path, repository_files: Path, json_path: Path) -> None: + if not scanner_path.is_file(): + raise FileNotFoundError(f"Scanner not found: {scanner_path}") + if not repository_files.is_dir(): + raise FileNotFoundError(f"Repository files directory not found: {repository_files}") + OUT_DIR.mkdir(parents=True, exist_ok=True) + with json_path.open("w", encoding="utf-8") as handle: + subprocess.run( + [sys.executable, str(scanner_path), str(repository_files)], + check=True, + stdout=handle, + ) + + +def _convert_entity_jsonl( + *, + label: str, + json_path: Path, + jsonl_path: Path, + data_source_id: str, +) -> int: + with jsonl_path.open("w", encoding="utf-8") as handle: + subprocess.run( + [ + sys.executable, + str(GENERATORS_DIR / "entities_to_jsonl.py"), + label, + "--data-source-id", + data_source_id, + str(json_path), + ], + check=True, + stdout=handle, + ) + return sum(1 for line in jsonl_path.read_text(encoding="utf-8").splitlines() if line.strip()) + + +def _convert_relationship_jsonl( + *, + source: str, + relationship: str, + target: str, + json_path: Path, + jsonl_path: Path, + data_source_id: str, +) -> int: + with jsonl_path.open("w", encoding="utf-8") as handle: + subprocess.run( + [ + sys.executable, + str(GENERATORS_DIR / "relationships_to_jsonl.py"), + relationship, + source, + target, + "--data-source-id", + data_source_id, + str(json_path), + ], + check=True, + stdout=handle, + ) + return sum(1 for line in jsonl_path.read_text(encoding="utf-8").splitlines() if line.strip()) + + +def _load_instance_count(json_path: Path) -> int: + payload = json.loads(json_path.read_text(encoding="utf-8")) + if not isinstance(payload, list): + raise ValueError("Scanner output must be a JSON array") + return len(payload) + + +def _emit_summary(summary: dict[str, Any]) -> None: + sys.stdout.write(json.dumps(summary, indent=2) + "\n") + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Run a prepopulation scanner and convert output to JSONL.", + ) + parser.add_argument( + "label", + nargs="?", + help="Entity type label when using --entity.", + ) + mode = parser.add_mutually_exclusive_group(required=True) + mode.add_argument("--entity", action="store_true", help="Run an entity scanner.") + mode.add_argument("--relationship", action="store_true", help="Run a relationship scanner.") + parser.add_argument("--source", help="Relationship source entity type label.") + parser.add_argument("--rel", help="Relationship type label.") + parser.add_argument("--target", help="Relationship target entity type label.") + parser.add_argument( + "--repository-files", + default="repository-files", + help="Directory passed to the scanner (default: repository-files).", + ) + parser.add_argument( + "--data-source-id", + default="schema-bootstrap", + help="data_source_id written into JSONL rows.", + ) + parser.add_argument( + "--validate-only", + action="store_true", + help="Stop after JSONL conversion; do not suggest apply.", + ) + args = parser.parse_args() + + repository_files = Path(args.repository_files) + if args.entity: + if not args.label: + parser.error("entity label is required when using --entity") + scanner_path, json_path, jsonl_path = _entity_paths(args.label) + _run_scanner( + scanner_path=scanner_path, + repository_files=repository_files, + json_path=json_path, + ) + instance_count = _load_instance_count(json_path) + jsonl_lines = _convert_entity_jsonl( + label=args.label, + json_path=json_path, + jsonl_path=jsonl_path, + data_source_id=args.data_source_id, + ) + summary: dict[str, Any] = { + "kind": "entity", + "label": args.label, + "scanner_path": str(scanner_path), + "json_path": str(json_path), + "jsonl_path": str(jsonl_path), + "instance_count": instance_count, + "jsonl_line_count": jsonl_lines, + } + else: + if not args.source or not args.rel or not args.target: + parser.error("--relationship requires --source, --rel, and --target") + scanner_path, json_path, jsonl_path = _relationship_paths( + source=args.source, + relationship=args.rel, + target=args.target, + ) + _run_scanner( + scanner_path=scanner_path, + repository_files=repository_files, + json_path=json_path, + ) + instance_count = _load_instance_count(json_path) + jsonl_lines = _convert_relationship_jsonl( + source=args.source, + relationship=args.rel, + target=args.target, + json_path=json_path, + jsonl_path=jsonl_path, + data_source_id=args.data_source_id, + ) + summary = { + "kind": "relationship", + "source_entity_type": args.source, + "relationship_type": args.rel, + "target_entity_type": args.target, + "scanner_path": str(scanner_path), + "json_path": str(json_path), + "jsonl_path": str(jsonl_path), + "instance_count": instance_count, + "jsonl_line_count": jsonl_lines, + } + + if args.validate_only: + summary["next_step"] = ( + f"kartograph_validate_graph_mutations_from_file path={summary['jsonl_path']}" + ) + else: + summary["next_step"] = ( + f"kartograph_apply_graph_mutations_from_file path={summary['jsonl_path']} " + "(apply pre-validates; optional validate first for dry run)" + ) + _emit_summary(summary) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/api/extraction/infrastructure/instance_generator_templates/scanner_common.py b/src/api/extraction/infrastructure/instance_generator_templates/scanner_common.py index d53e7ecdb..76ca6e0d6 100644 --- a/src/api/extraction/infrastructure/instance_generator_templates/scanner_common.py +++ b/src/api/extraction/infrastructure/instance_generator_templates/scanner_common.py @@ -39,6 +39,24 @@ def dedupe_instances( return unique, skipped +def relationship_scanner_stem(*, source: str, relationship: str, target: str) -> str: + """Filesystem-safe stem for relationship scanner and output files.""" + return f"{source}_{relationship}_{target}" + + +def relationship_output_paths(*, source: str, relationship: str, target: str) -> tuple[str, str]: + """Return workspace-relative JSON and JSONL output paths for one relationship type.""" + stem = relationship_scanner_stem( + source=source, + relationship=relationship, + target=target, + ) + return ( + f"instance_generators/out/{stem}_instances.json", + f"instance_generators/out/{stem}_instances.jsonl", + ) + + def dedupe_relationships( relationships: list[dict[str, Any]], ) -> tuple[list[dict[str, Any]], int]: diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index 18d226ad9..813770997 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -86,6 +86,9 @@ class WorkloadMutationApplyResponse(BaseModel): applied: bool errors: list[str] = Field(default_factory=list) operations_applied: int = 0 + next_action: str = "" + remaining_entity_gaps: list[str] = Field(default_factory=list) + remaining_relationship_gaps: list[str] = Field(default_factory=list) class WorkloadMutationValidateResponse(BaseModel): @@ -252,6 +255,7 @@ async def workload_apply_mutations( request: WorkloadMutationApplyRequest, auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., + reader: Annotated[IWorkloadGraphReader, Depends(get_workload_graph_reader)] = ..., ) -> WorkloadMutationApplyResponse: _require_chat_scope(auth) try: @@ -262,10 +266,40 @@ async def workload_apply_mutations( ) except Exception as exc: raise_graph_storage_http_error(exc) + + next_action = "" + remaining_entity_gaps: list[str] = [] + remaining_relationship_gaps: list[str] = [] + if result.get("applied"): + from infrastructure.extraction_workload.workspace_readiness import ( + build_workload_readiness_snapshot, + ) + + ontology = await schema_service.get_ontology(knowledge_graph_id=auth.knowledge_graph_id) + try: + snapshot = await build_workload_readiness_snapshot( + ontology=ontology, + knowledge_graph_id=auth.knowledge_graph_id, + tenant_id=auth.tenant_id, + graph_reader=reader, + ) + except Exception as exc: + raise_graph_storage_http_error(exc) + next_action = str(snapshot.get("next_action") or "") + remaining_entity_gaps = list( + snapshot.get("prepopulated_entity_types_without_instances_live") or [] + ) + remaining_relationship_gaps = list( + snapshot.get("prepopulated_relationship_types_without_instances_live") or [] + ) + return WorkloadMutationApplyResponse( applied=bool(result.get("applied")), errors=[str(item) for item in result.get("errors", [])], operations_applied=int(result.get("operations_applied", 0)), + next_action=next_action, + remaining_entity_gaps=remaining_entity_gaps, + remaining_relationship_gaps=remaining_relationship_gaps, ) diff --git a/src/api/infrastructure/extraction_workload/workspace_readiness.py b/src/api/infrastructure/extraction_workload/workspace_readiness.py index 9c840b9df..201819e6e 100644 --- a/src/api/infrastructure/extraction_workload/workspace_readiness.py +++ b/src/api/infrastructure/extraction_workload/workspace_readiness.py @@ -22,10 +22,25 @@ def _entity_scanner_path(label: str) -> str: return f"instance_generators/{label}.py" +def _entity_output_paths(label: str) -> tuple[str, str]: + return ( + f"instance_generators/out/{label}_instances.json", + f"instance_generators/out/{label}_instances.jsonl", + ) + + def _relationship_scanner_path(*, source: str, relationship: str, target: str) -> str: return f"instance_generators/{source}_{relationship}_{target}.py" +def _relationship_output_paths(*, source: str, relationship: str, target: str) -> tuple[str, str]: + stem = f"{source}_{relationship}_{target}" + return ( + f"instance_generators/out/{stem}_instances.json", + f"instance_generators/out/{stem}_instances.jsonl", + ) + + def _build_prepopulation_tasks( *, ontology: OntologyConfig | None, @@ -41,19 +56,27 @@ def _build_prepopulation_tasks( for label in live_entity_gaps: node_type = next((nt for nt in ontology.node_types if nt.label == label), None) live_count = entity_instance_counts.get(label, 0) + output_json, output_jsonl = _entity_output_paths(label) tasks.append( { "kind": "entity", + "order": 1, + "blocking_types": [], "label": label, "live_instance_count": live_count, "scanner_path": _entity_scanner_path(label), - "output_json": f"instance_generators/out/{label}_instances.json", - "output_jsonl": f"instance_generators/out/{label}_instances.jsonl", + "output_json": output_json, + "output_jsonl": output_jsonl, + "run_command": ( + f"python3 instance_generators/run_scanner.py {label} --entity" + ), "required_properties": list(node_type.required_properties) if node_type else [], "optional_properties": list(node_type.optional_properties) if node_type else [], "action": ( f"Copy _entity_scanner.example.py to {_entity_scanner_path(label)} " - f"(filename must match label exactly), run PREPOPULATION_WORKFLOW.md steps 2–6." + f"(filename must match label exactly), then " + f"`python3 instance_generators/run_scanner.py {label} --entity` " + "and apply the printed jsonl_path." ), } ) @@ -71,20 +94,39 @@ def _build_prepopulation_tasks( if source and target and rel else f"instance_generators/{key}.py" ) + output_json, output_jsonl = ( + _relationship_output_paths(source=source, relationship=rel, target=target) + if source and target and rel + else ( + f"instance_generators/out/{key}_instances.json", + f"instance_generators/out/{key}_instances.jsonl", + ) + ) + run_command = ( + "python3 instance_generators/run_scanner.py " + f"--relationship --source {source} --rel {rel} --target {target}" + if source and target and rel + else None + ) tasks.append( { "kind": "relationship", + "order": 2, + "blocking_types": [source, target] if source and target else [], "key": key, "relationship_type": rel, "source_entity_type": source, "target_entity_type": target, "live_instance_count": relationship_instance_counts.get(key, 0), "scanner_path": scanner, - "output_json": f"instance_generators/out/{key}_instances.json", - "output_jsonl": f"instance_generators/out/{key}_instances.jsonl", + "output_json": output_json, + "output_jsonl": output_jsonl, + "run_command": run_command, "action": ( - f"Copy _relationship_scanner.example.py to {scanner}, then run " - "relationship steps in PREPOPULATION_WORKFLOW.md." + f"Copy _relationship_scanner.example.py to {scanner}, then " + f"`{run_command}` and apply the printed jsonl_path." + if run_command + else "Run relationship steps in PREPOPULATION_WORKFLOW.md." ), } ) @@ -102,14 +144,16 @@ def _build_next_action( label = live_entity_gaps[0] return ( f"Run entity prepopulation for `{label}`: create {_entity_scanner_path(label)} " - "from _entity_scanner.example.py (case-sensitive filename), then follow " - "PREPOPULATION_WORKFLOW.md steps 2–6." + "from _entity_scanner.example.py (case-sensitive filename), then " + f"`python3 instance_generators/run_scanner.py {label} --entity` and apply the " + "printed jsonl_path." ) if live_relationship_gaps: key = live_relationship_gaps[0] return ( f"Run relationship prepopulation for `{key}` using " - "_relationship_scanner.example.py and PREPOPULATION_WORKFLOW.md." + "_relationship_scanner.example.py, run_scanner.py --relationship, and apply the " + "printed jsonl_path." ) if transition_eligible: return ( diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py index 257127402..ac19cd7d6 100644 --- a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -10,7 +10,7 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: assert "entities_to_jsonl.py" in SCHEMA_AUTHORING_GUIDE assert "relationships_to_jsonl.py" in SCHEMA_AUTHORING_GUIDE assert "_entity_scanner.example.py" in SCHEMA_AUTHORING_GUIDE - assert "test_instances.json" in SCHEMA_AUTHORING_GUIDE + assert "E2ETest_instances.json" in SCHEMA_AUTHORING_GUIDE or "out/{Label}_instances.json" in SCHEMA_AUTHORING_GUIDE assert "prepopulated" in SCHEMA_AUTHORING_GUIDE assert "/tmp" in SCHEMA_AUTHORING_GUIDE assert "data_source.py" not in SCHEMA_AUTHORING_GUIDE @@ -18,3 +18,5 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: assert "approved_at" in SCHEMA_AUTHORING_GUIDE assert "500/503" in SCHEMA_AUTHORING_GUIDE assert "smoke-test" in SCHEMA_AUTHORING_GUIDE.lower() or "smoke test" in SCHEMA_AUTHORING_GUIDE.lower() + assert "run_scanner.py" in SCHEMA_AUTHORING_GUIDE + assert "next_action" in SCHEMA_AUTHORING_GUIDE diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 19d4a7f9b..6c1474159 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -44,14 +44,16 @@ async def test_bootstrap_mode_uses_bootstrap_defaults(self): "failure_modes", } assert "entities_to_jsonl.py" in resolved.skills["prepopulation"] - assert "_instances.json" in resolved.skills["prepopulation"] + assert "run_scanner.py" in resolved.skills["prepopulation"] + assert "PREPOPULATION_WORKFLOW.md" in resolved.skills["prepopulation"] assert "Entities before relationships" in resolved.skills["prepopulation"] guardrails_text = " ".join(resolved.guardrails) - assert "entities_to_jsonl.py" in guardrails_text + assert "run_scanner.py" in guardrails_text assert "never /tmp" in guardrails_text or "Never /tmp" in guardrails_text assert "do not ask" in guardrails_text assert "500/503" in guardrails_text assert "approved_at" in guardrails_text + assert "next_action" in guardrails_text or "next_action" in resolved.skills["prepopulation"] assert "kartograph_save_schema_ontology" in guardrails_text assert len(resolved.prompt_hierarchy) > 0 diff --git a/src/api/tests/unit/extraction/infrastructure/test_run_scanner.py b/src/api/tests/unit/extraction/infrastructure/test_run_scanner.py new file mode 100644 index 000000000..b3431fb2a --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_run_scanner.py @@ -0,0 +1,159 @@ +"""Unit tests for the run_scanner prepopulation pipeline wrapper.""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +RUN_SCANNER = ( + Path(__file__).resolve().parents[4] + / "extraction/infrastructure/instance_generator_templates/run_scanner.py" +) +ENTITIES_TO_JSONL = RUN_SCANNER.parent / "entities_to_jsonl.py" +RELATIONSHIPS_TO_JSONL = RUN_SCANNER.parent / "relationships_to_jsonl.py" +SCANNER_COMMON = RUN_SCANNER.parent / "scanner_common.py" + + +def _write_entity_scanner(generators_dir: Path, label: str) -> None: + (generators_dir / f"{label}.py").write_text( + "\n".join( + [ + "#!/usr/bin/env python3", + "import json, sys", + "def main():", + " print(json.dumps([", + ' {"slug": "alpha", "properties": {"name": "Alpha"}},', + ' {"slug": "beta", "properties": {"name": "Beta"}},', + " ]))", + 'if __name__ == "__main__":', + " main()", + ] + ), + encoding="utf-8", + ) + + +def _write_relationship_scanner(generators_dir: Path, stem: str) -> None: + (generators_dir / f"{stem}.py").write_text( + "\n".join( + [ + "#!/usr/bin/env python3", + "import json, sys", + "def main():", + " print(json.dumps([", + ' {"source_slug": "alpha", "target_slug": "beta", "properties": {}},', + " ]))", + 'if __name__ == "__main__":', + " main()", + ] + ), + encoding="utf-8", + ) + + +def _bootstrap_workspace(tmp_path: Path) -> Path: + generators_dir = tmp_path / "instance_generators" + generators_dir.mkdir() + (generators_dir / "out").mkdir() + for script in (ENTITIES_TO_JSONL, RELATIONSHIPS_TO_JSONL, SCANNER_COMMON): + (generators_dir / script.name).write_text( + script.read_text(encoding="utf-8"), + encoding="utf-8", + ) + (generators_dir / "run_scanner.py").write_text( + RUN_SCANNER.read_text(encoding="utf-8"), + encoding="utf-8", + ) + (tmp_path / "repository-files").mkdir() + return tmp_path + + +def test_run_scanner_entity_pipeline(tmp_path: Path) -> None: + workspace = _bootstrap_workspace(tmp_path) + _write_entity_scanner(workspace / "instance_generators", "Resource") + + proc = subprocess.run( + [ + sys.executable, + str(workspace / "instance_generators" / "run_scanner.py"), + "Resource", + "--entity", + "--repository-files", + str(workspace / "repository-files"), + ], + check=True, + capture_output=True, + text=True, + cwd=workspace, + ) + + summary = json.loads(proc.stdout) + assert summary["kind"] == "entity" + assert summary["instance_count"] == 2 + assert summary["jsonl_line_count"] == 2 + assert summary["jsonl_path"].endswith("Resource_instances.jsonl") + assert "apply_graph_mutations_from_file" in summary["next_step"] + + jsonl_path = workspace / summary["jsonl_path"] + assert jsonl_path.is_file() + lines = [line for line in jsonl_path.read_text(encoding="utf-8").splitlines() if line.strip()] + assert len(lines) == 2 + assert json.loads(lines[0])["label"] == "Resource" + + +def test_run_scanner_relationship_pipeline(tmp_path: Path) -> None: + workspace = _bootstrap_workspace(tmp_path) + stem = "ComponentTest_tests_APIEndpoint" + _write_relationship_scanner(workspace / "instance_generators", stem) + + proc = subprocess.run( + [ + sys.executable, + str(workspace / "instance_generators" / "run_scanner.py"), + "--relationship", + "--source", + "ComponentTest", + "--rel", + "tests", + "--target", + "APIEndpoint", + "--repository-files", + str(workspace / "repository-files"), + ], + check=True, + capture_output=True, + text=True, + cwd=workspace, + ) + + summary = json.loads(proc.stdout) + assert summary["kind"] == "relationship" + assert summary["jsonl_path"].endswith(f"{stem}_instances.jsonl") + assert (workspace / summary["jsonl_path"]).is_file() + + +def test_run_scanner_validate_only(tmp_path: Path) -> None: + workspace = _bootstrap_workspace(tmp_path) + _write_entity_scanner(workspace / "instance_generators", "Adapter") + + proc = subprocess.run( + [ + sys.executable, + str(workspace / "instance_generators" / "run_scanner.py"), + "Adapter", + "--entity", + "--validate-only", + "--repository-files", + str(workspace / "repository-files"), + ], + check=True, + capture_output=True, + text=True, + cwd=workspace, + ) + + summary = json.loads(proc.stdout) + assert "validate_graph_mutations_from_file" in summary["next_step"] + assert "apply_graph_mutations_from_file" not in summary["next_step"] diff --git a/src/api/tests/unit/extraction/infrastructure/test_scanner_common.py b/src/api/tests/unit/extraction/infrastructure/test_scanner_common.py index 9d7833e63..2580e742e 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_scanner_common.py +++ b/src/api/tests/unit/extraction/infrastructure/test_scanner_common.py @@ -6,6 +6,8 @@ dedupe_instances, dedupe_relationships, generate_slug, + relationship_output_paths, + relationship_scanner_stem, ) @@ -34,3 +36,19 @@ def test_dedupe_relationships_keeps_first_pair() -> None: unique, skipped = dedupe_relationships(rows) assert len(unique) == 2 assert skipped == 1 + + +def test_relationship_paths_use_underscore_stem() -> None: + assert relationship_scanner_stem( + source="ComponentTest", + relationship="tests", + target="APIEndpoint", + ) == "ComponentTest_tests_APIEndpoint" + json_path, jsonl_path = relationship_output_paths( + source="ComponentTest", + relationship="tests", + target="APIEndpoint", + ) + assert json_path == "instance_generators/out/ComponentTest_tests_APIEndpoint_instances.json" + assert jsonl_path == "instance_generators/out/ComponentTest_tests_APIEndpoint_instances.jsonl" + assert "|" not in json_path diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index fa5cf4230..5f402db71 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -173,6 +173,7 @@ def test_materializer_copies_instance_generator_templates(tmp_path: Path) -> Non "entities_to_jsonl.py", "relationships_to_jsonl.py", "preview_instances.py", + "run_scanner.py", "scanner_common.py", "README.md", "PREPOPULATION_WORKFLOW.md", diff --git a/src/api/tests/unit/extraction/presentation/test_workload_routes.py b/src/api/tests/unit/extraction/presentation/test_workload_routes.py index 6f8646379..e9ed2da50 100644 --- a/src/api/tests/unit/extraction/presentation/test_workload_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_workload_routes.py @@ -363,14 +363,24 @@ def test_workload_validate_graph_mutations(workload_client: tuple[TestClient, _F def test_workload_apply_graph_mutations(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: client, fake, token = workload_client + fake.saved = OntologyConfig( + node_types=( + NodeTypeDefinition(label="service", prepopulated=True, prepopulated_instance_count=0), + NodeTypeDefinition(label="folder", prepopulated=True), + ), + edge_types=(), + ) response = client.post( "/extraction/workloads/mutations/apply", headers={"X-Workload-Token": token}, json={"jsonl": '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service","set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap","source_path":"assistant"}}'}, ) assert response.status_code == 200 - assert response.json()["applied"] is True + payload = response.json() + assert payload["applied"] is True assert fake.applied_jsonl is not None + assert payload["next_action"] + assert "folder" in payload["remaining_entity_gaps"] def test_workload_get_extraction_jobs_config(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py b/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py index f60b415ae..80be42bdf 100644 --- a/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_workspace_readiness.py @@ -53,6 +53,11 @@ async def test_build_workload_readiness_snapshot_reports_live_relationship_gaps( assert snapshot["prepopulation_tasks"] assert snapshot["prepopulation_tasks"][0]["kind"] == "entity" assert snapshot["prepopulation_tasks"][0]["scanner_path"] == "instance_generators/folder.py" + assert snapshot["prepopulation_tasks"][0]["order"] == 1 + assert snapshot["prepopulation_tasks"][0]["run_command"] == ( + "python3 instance_generators/run_scanner.py folder --entity" + ) + assert "|" not in snapshot["prepopulation_tasks"][0]["output_jsonl"] assert "required_properties" in snapshot["prepopulated_entity_types"][0] From 24be9c5731ab008faac9a3cfc0b68c84a5faee9c Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sat, 13 Jun 2026 22:20:16 -0400 Subject: [PATCH 130/153] feat(extraction): allow GMA DELETE mutations and archive sessions as one mutation log Enable CREATE/UPDATE/DELETE in workload validation and tools, accumulate applied JSONL per assistant session, and write one ARCHIVED extraction job when Clear chat ends the session so it appears in Extraction Archive history. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/schema_tools.py | 6 +- .../application/agent_session_service.py | 7 ++ .../archived_extraction_history.py | 18 ++- .../application/chat_turn_service.py | 1 + .../graph_management_session_journal.py | 113 ++++++++++++++++++ .../application/schema_authoring_guide.py | 6 +- src/api/extraction/dependencies.py | 8 ++ .../extraction_job_mutation_metrics.py | 17 ++- .../repositories/extraction_job_repository.py | 27 +++++ .../sticky_session_bootstrap_builder.py | 1 + .../workload_credential_issuer.py | 8 +- .../extraction/presentation/workload_auth.py | 7 ++ .../presentation/workload_routes.py | 15 +++ .../extraction_workload/dependencies.py | 14 +++ .../extraction_workload/mutation_preflight.py | 68 +++++++++++ .../extraction_workload/schema_service.py | 14 ++- .../test_graph_management_session_journal.py | 91 ++++++++++++++ ...test_sticky_session_container_bootstrap.py | 6 +- .../test_workload_credential_issuer.py | 3 + .../presentation/test_workload_routes.py | 61 +++++++--- .../test_mutation_preflight.py | 28 +++++ 21 files changed, 490 insertions(+), 29 deletions(-) create mode 100644 src/api/extraction/application/graph_management_session_journal.py create mode 100644 src/api/tests/unit/extraction/application/test_graph_management_session_journal.py diff --git a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py index 713c20f39..69908f9e9 100644 --- a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py @@ -110,7 +110,7 @@ async def save_schema_ontology(args: dict[str, Any]) -> dict[str, Any]: @tool( "kartograph_validate_graph_mutations", - "Dry-run: validate JSONL mutations without writing (strict CREATE — no duplicate types/instances).", + "Dry-run: validate JSONL mutations without writing (CREATE/UPDATE/DELETE).", {"jsonl": str}, ) async def validate_graph_mutations(args: dict[str, Any]) -> dict[str, Any]: @@ -132,7 +132,7 @@ async def validate_graph_mutations(args: dict[str, Any]) -> dict[str, Any]: @tool( "kartograph_apply_graph_mutations", - "Apply JSONL mutation lines. CREATE fails if type or instance already exists; use UPDATE to edit.", + "Apply JSONL mutation lines (CREATE, UPDATE, DELETE). CREATE fails on duplicates; UPDATE/DELETE require existing ids.", {"jsonl": str}, ) async def apply_graph_mutations(args: dict[str, Any]) -> dict[str, Any]: @@ -181,7 +181,7 @@ async def validate_graph_mutations_from_file(args: dict[str, Any]) -> dict[str, @tool( "kartograph_apply_graph_mutations_from_file", - "Apply a workspace .jsonl file in one call (strict CREATE semantics).", + "Apply a workspace .jsonl file in one call (CREATE/UPDATE/DELETE). Apply pre-validates.", {"path": str}, ) async def apply_graph_mutations_from_file(args: dict[str, Any]) -> dict[str, Any]: diff --git a/src/api/extraction/application/agent_session_service.py b/src/api/extraction/application/agent_session_service.py index d50652754..18747cebc 100644 --- a/src/api/extraction/application/agent_session_service.py +++ b/src/api/extraction/application/agent_session_service.py @@ -7,6 +7,9 @@ from ulid import ULID +from extraction.application.graph_management_session_journal import ( + GraphManagementSessionJournalService, +) from extraction.application.skill_resolution_service import ( ExtractionSkillResolutionService, ) @@ -37,11 +40,13 @@ def __init__( skill_resolution_service: ExtractionSkillResolutionService | None = None, run_metrics_reader: IExtractionSessionRunMetricsReader | None = None, sticky_runtime_manager: IStickySessionRuntimeManager | None = None, + session_journal_service: GraphManagementSessionJournalService | None = None, ) -> None: self._repository = repository self._skill_resolution_service = skill_resolution_service self._run_metrics_reader = run_metrics_reader self._sticky_runtime_manager = sticky_runtime_manager + self._session_journal_service = session_journal_service @staticmethod def _build_bootstrap_intake_prompt() -> str: @@ -124,6 +129,8 @@ async def clear_chat( knowledge_graph_id=knowledge_graph_id, mode=mode.value, ) + if self._session_journal_service is not None: + await self._session_journal_service.archive_session_mutations(active) active.archive() await self._repository.save(active) diff --git a/src/api/extraction/application/archived_extraction_history.py b/src/api/extraction/application/archived_extraction_history.py index b495525b1..222e3ba2b 100644 --- a/src/api/extraction/application/archived_extraction_history.py +++ b/src/api/extraction/application/archived_extraction_history.py @@ -7,12 +7,26 @@ from extraction.domain.extraction_job import ExtractionJobRecord +def archived_job_write_ops(job: ExtractionJobRecord) -> int: + """Return write op count, including DELETE lines for graph-management sessions.""" + if ( + job.strategy == "graph_management_session" + and job.applied_mutations_jsonl + ): + from extraction.infrastructure.extraction_job_mutation_metrics import ( + metrics_from_mutation_jsonl, + ) + + return int(metrics_from_mutation_jsonl(job.applied_mutations_jsonl).get("write_ops") or 0) + return job.write_ops() + + def serialize_archived_job(job: ExtractionJobRecord) -> dict[str, Any]: return { **job.to_dict(), "jobId": job.job_id, "jobSet": job.job_set_name, - "writeOps": job.write_ops(), + "writeOps": archived_job_write_ops(job), "hasMutations": bool(job.applied_mutations_jsonl), } @@ -41,7 +55,7 @@ def group_archived_jobs_by_run_and_set( job_sets[set_name] = [] job_sets[set_name].append(serialize_archived_job(job)) run["jobCount"] += 1 - run["writeOps"] += job.write_ops() + run["writeOps"] += archived_job_write_ops(job) run["inputTokens"] += job.input_tokens run["outputTokens"] += job.output_tokens run["costUsd"] += job.cost_usd diff --git a/src/api/extraction/application/chat_turn_service.py b/src/api/extraction/application/chat_turn_service.py index 3fab81dbb..754b20ba4 100644 --- a/src/api/extraction/application/chat_turn_service.py +++ b/src/api/extraction/application/chat_turn_service.py @@ -133,6 +133,7 @@ async def stream_chat_turn( workload_token = self._credential_issuer.issue_for_sticky_session( tenant_id=tenant_id, knowledge_graph_id=knowledge_graph_id, + session_id=session.id, ).token assistant_reply: str | None = None diff --git a/src/api/extraction/application/graph_management_session_journal.py b/src/api/extraction/application/graph_management_session_journal.py new file mode 100644 index 000000000..12abb5586 --- /dev/null +++ b/src/api/extraction/application/graph_management_session_journal.py @@ -0,0 +1,113 @@ +"""Accumulate Graph Management Assistant mutations and archive on session end.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +from ulid import ULID + +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus +from extraction.domain.value_objects import ExtractionSessionMode +from extraction.infrastructure.extraction_job_mutation_metrics import metrics_from_mutation_jsonl +from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository +from extraction.ports.repositories import IExtractionAgentSessionRepository + +GRAPH_MANAGEMENT_SESSION_STRATEGY = "graph_management_session" + +_JOB_SET_BY_MODE: dict[ExtractionSessionMode, str] = { + ExtractionSessionMode.SCHEMA_BOOTSTRAP: "Graph Management · Schema Design", + ExtractionSessionMode.EXTRACTION_OPERATIONS: "Graph Management · Extraction Operations", +} + + +def append_applied_jsonl_to_session( + session: ExtractionAgentSession, + *, + applied_jsonl: str, +) -> None: + """Append successfully applied mutation lines to the session journal.""" + chunk = applied_jsonl.strip() + if not chunk: + return + journal = dict(session.runtime_context.get("mutation_journal") or {}) + previous = str(journal.get("jsonl") or "").strip() + combined = "\n".join(part for part in (previous, chunk) if part) + journal["jsonl"] = combined + journal["line_count"] = sum(1 for line in combined.splitlines() if line.strip()) + if not journal.get("started_at"): + journal["started_at"] = session.created_at.isoformat() + session.runtime_context["mutation_journal"] = journal + + +class GraphManagementSessionJournalService: + """Persist per-session mutation JSONL and archive as one extraction job.""" + + def __init__( + self, + *, + session_repository: IExtractionAgentSessionRepository, + extraction_job_repository: ExtractionJobRepository, + ) -> None: + self._session_repository = session_repository + self._extraction_job_repository = extraction_job_repository + + async def append_applied_jsonl( + self, + *, + session_id: str, + applied_jsonl: str, + ) -> None: + session = await self._session_repository.get_by_id(session_id) + if session is None or not session.is_active: + return + append_applied_jsonl_to_session(session, applied_jsonl=applied_jsonl) + await self._session_repository.save(session) + + async def archive_session_mutations(self, session: ExtractionAgentSession) -> None: + """Write one ARCHIVED extraction job row when the session had graph writes.""" + journal = session.runtime_context.get("mutation_journal") or {} + jsonl = str(journal.get("jsonl") or "").strip() + if not jsonl: + return + + metrics = metrics_from_mutation_jsonl(jsonl) + if int(metrics.get("write_ops") or 0) <= 0: + return + + now = datetime.now(UTC) + started_at = session.created_at + started_raw = journal.get("started_at") + if isinstance(started_raw, str): + try: + started_at = datetime.fromisoformat(started_raw) + except ValueError: + started_at = session.created_at + + job_set_name = _JOB_SET_BY_MODE.get( + session.mode, + "Graph Management Assistant", + ) + record = ExtractionJobRecord( + id=str(ULID()), + knowledge_graph_id=session.knowledge_graph_id, + job_id=f"gma-{session.id}", + job_set_name=job_set_name, + strategy=GRAPH_MANAGEMENT_SESSION_STRATEGY, + status=ExtractionJobStatus.ARCHIVED, + order_index=0, + description=( + f"Graph Management Assistant session {session.id} " + f"({session.mode.value.replace('_', ' ')})" + ), + run_started_at=started_at, + started_at=started_at, + completed_at=now, + archived_at=now, + applied_mutations_jsonl=jsonl, + entities_created=int(metrics.get("entities_created") or 0), + entities_modified=int(metrics.get("entities_modified") or 0), + relationships_created=int(metrics.get("relationships_created") or 0), + relationships_modified=int(metrics.get("relationships_modified") or 0), + ) + await self._extraction_job_repository.insert_archived_session_job(record) diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index b0a8de84f..6c123d44a 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -152,10 +152,12 @@ ## Instance mutations (JSONL) +- Supported ops: **CREATE**, **UPDATE**, and **DELETE** for nodes and edges. - CREATE requires `data_source_id` and `slug` on nodes. Put `source_path` in scanner `properties` when needed. -- CREATE is strict — use UPDATE for existing instances. +- CREATE is strict — duplicate ids/slugs fail validation; use UPDATE or DELETE for existing instances. +- DELETE removes a node or edge by `id` (edges before nodes when batching deletes manually). - Never hand-author bulk CREATE lines in chat; use `entities_to_jsonl.py` / `relationships_to_jsonl.py`. -- Create all entity nodes before relationship edges. +- Create all entity nodes before relationship edges unless you are correcting data with UPDATE/DELETE. ## Readiness checklist diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index 1ad96f545..db8f6d40e 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -12,7 +12,11 @@ ExtractionChatTurnService, ExtractionSkillResolutionService, ) +from extraction.application.graph_management_session_journal import ( + GraphManagementSessionJournalService, +) from extraction.application.sticky_session_runtime_service import StickySessionRuntimeService +from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository from extraction.infrastructure.sticky_runtime_health import StickyRuntimeHealthChecker from extraction.infrastructure.ingestion_readiness_reader import SqlIngestionReadinessReader from infrastructure.job_packages.archive_hydrator import JobPackageArchiveHydrator @@ -65,6 +69,10 @@ def _build_extraction_agent_session_service( skill_resolution_service=skill_resolution_service, run_metrics_reader=ExtractionSessionRunMetricsReader(session=session), sticky_runtime_manager=sticky_runtime_manager, + session_journal_service=GraphManagementSessionJournalService( + session_repository=ExtractionAgentSessionRepository(session=session), + extraction_job_repository=ExtractionJobRepository(session=session), + ), ) diff --git a/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py index 149244d22..0b421650e 100644 --- a/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py +++ b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py @@ -13,8 +13,10 @@ def metrics_from_mutation_jsonl(jsonl_content: str) -> dict[str, int]: """Count instance CREATE/UPDATE operations; ignore schema DEFINE operations.""" entities_created = 0 entities_modified = 0 + entities_deleted = 0 relationships_created = 0 relationships_modified = 0 + relationships_deleted = 0 for raw_line in jsonl_content.splitlines(): line = raw_line.strip() @@ -34,31 +36,40 @@ def metrics_from_mutation_jsonl(jsonl_content: str) -> dict[str, int]: if op not in { MutationOperationType.CREATE.value, MutationOperationType.UPDATE.value, + MutationOperationType.DELETE.value, }: continue if entity_type == EntityType.NODE.value: if op == MutationOperationType.CREATE.value: entities_created += 1 - else: + elif op == MutationOperationType.UPDATE.value: entities_modified += 1 + else: + entities_deleted += 1 elif entity_type == EntityType.EDGE.value: if op == MutationOperationType.CREATE.value: relationships_created += 1 - else: + elif op == MutationOperationType.UPDATE.value: relationships_modified += 1 + else: + relationships_deleted += 1 write_ops = ( entities_created + entities_modified + + entities_deleted + relationships_created + relationships_modified + + relationships_deleted ) return { "entities_created": entities_created, "entities_modified": entities_modified, + "entities_deleted": entities_deleted, "relationships_created": relationships_created, "relationships_modified": relationships_modified, + "relationships_deleted": relationships_deleted, "write_ops": write_ops, } @@ -98,7 +109,9 @@ def _empty_metrics() -> dict[str, int]: return { "entities_created": 0, "entities_modified": 0, + "entities_deleted": 0, "relationships_created": 0, "relationships_modified": 0, + "relationships_deleted": 0, "write_ops": 0, } diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index 54cb222e1..8d2b6416b 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -519,6 +519,33 @@ async def reset_all_non_pending( ) return total + async def insert_archived_session_job(self, job: ExtractionJobRecord) -> None: + """Persist one archived Graph Management Assistant session mutation log.""" + self._session.add( + ExtractionJobModel( + id=job.id, + knowledge_graph_id=job.knowledge_graph_id, + job_id=job.job_id, + job_set_name=job.job_set_name, + strategy=job.strategy, + status=job.status.value, + order_index=job.order_index, + description=job.description, + target_instances=[instance.to_dict() for instance in job.target_instances], + target_files=[target_file.to_dict() for target_file in job.target_files], + started_at=job.started_at, + completed_at=job.completed_at, + entities_created=job.entities_created, + entities_modified=job.entities_modified, + relationships_created=job.relationships_created, + relationships_modified=job.relationships_modified, + run_started_at=job.run_started_at, + archived_at=job.archived_at, + applied_mutations_jsonl=job.applied_mutations_jsonl, + ) + ) + await self._session.flush() + async def list_archived_jobs( self, *, diff --git a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py index 0d816c2b1..ab91d3901 100644 --- a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py +++ b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py @@ -76,6 +76,7 @@ async def build( credentials = self._credential_issuer.issue_for_sticky_session( tenant_id=tenant_id, knowledge_graph_id=knowledge_graph_id, + session_id=session_id, ) return StickySessionRuntimeBootstrap( tenant_id=tenant_id, diff --git a/src/api/extraction/infrastructure/workload_credential_issuer.py b/src/api/extraction/infrastructure/workload_credential_issuer.py index 33a4605e9..5973f0e02 100644 --- a/src/api/extraction/infrastructure/workload_credential_issuer.py +++ b/src/api/extraction/infrastructure/workload_credential_issuer.py @@ -62,13 +62,17 @@ def issue( ) def issue_for_sticky_session( - self, *, tenant_id: str, knowledge_graph_id: str + self, + *, + tenant_id: str, + knowledge_graph_id: str, + session_id: str, ) -> ScopedWorkloadCredentials: """Issue chat-scoped credentials for sticky session agent containers.""" return self.issue( tenant_id=tenant_id, knowledge_graph_id=knowledge_graph_id, - extra_scopes=("workload:chat",), + extra_scopes=("workload:chat", f"session:{session_id}"), ) def verify(self, token: str) -> ScopedWorkloadCredentials | None: diff --git a/src/api/extraction/presentation/workload_auth.py b/src/api/extraction/presentation/workload_auth.py index c35a719b4..529cfc599 100644 --- a/src/api/extraction/presentation/workload_auth.py +++ b/src/api/extraction/presentation/workload_auth.py @@ -22,6 +22,7 @@ class WorkloadAuthContext: credentials: ScopedWorkloadCredentials tenant_id: str knowledge_graph_id: str + session_id: str | None = None def get_workload_auth_context( @@ -62,8 +63,14 @@ def get_workload_auth_context( detail="Workload token is missing tenant or knowledge graph scope", ) + session_scope = next( + (scope.removeprefix("session:") for scope in credentials.scopes if scope.startswith("session:")), + None, + ) + return WorkloadAuthContext( credentials=credentials, tenant_id=tenant_scope, knowledge_graph_id=kg_scope, + session_id=session_scope, ) diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index 813770997..c47dad229 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -15,10 +15,14 @@ get_workload_auth_context, ) from infrastructure.extraction_workload.dependencies import ( + get_graph_management_session_journal_service, get_workload_extraction_jobs_service, get_workload_graph_reader, get_workload_schema_service, ) +from extraction.application.graph_management_session_journal import ( + GraphManagementSessionJournalService, +) from infrastructure.extraction_workload.workload_errors import raise_graph_storage_http_error from management.domain.ontology_prepopulation import PrepopulationValidationError from management.domain.relationship_pairing import ontology_config_from_authoring_payload @@ -256,6 +260,10 @@ async def workload_apply_mutations( auth: Annotated[WorkloadAuthContext, Depends(get_workload_auth_context)] = ..., schema_service: Annotated[IWorkloadSchemaService, Depends(get_workload_schema_service)] = ..., reader: Annotated[IWorkloadGraphReader, Depends(get_workload_graph_reader)] = ..., + session_journal: Annotated[ + GraphManagementSessionJournalService, + Depends(get_graph_management_session_journal_service), + ] = ..., ) -> WorkloadMutationApplyResponse: _require_chat_scope(auth) try: @@ -271,6 +279,13 @@ async def workload_apply_mutations( remaining_entity_gaps: list[str] = [] remaining_relationship_gaps: list[str] = [] if result.get("applied"): + applied_jsonl = str(result.get("applied_jsonl") or "").strip() + if auth.session_id and applied_jsonl: + await session_journal.append_applied_jsonl( + session_id=auth.session_id, + applied_jsonl=applied_jsonl, + ) + from infrastructure.extraction_workload.workspace_readiness import ( build_workload_readiness_snapshot, ) diff --git a/src/api/infrastructure/extraction_workload/dependencies.py b/src/api/infrastructure/extraction_workload/dependencies.py index 134343ba2..49fdd3d5e 100644 --- a/src/api/infrastructure/extraction_workload/dependencies.py +++ b/src/api/infrastructure/extraction_workload/dependencies.py @@ -17,6 +17,11 @@ from infrastructure.extraction_workload.extraction_jobs_service import ( GraphWorkloadExtractionJobsService, ) +from extraction.application.graph_management_session_journal import ( + GraphManagementSessionJournalService, +) +from extraction.infrastructure.repositories import ExtractionAgentSessionRepository +from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository from infrastructure.extraction_workload.graph_mutation_writer import ( GraphWorkloadGraphMutationWriter, ) @@ -43,6 +48,15 @@ def get_workload_graph_reader( return GraphWorkloadGraphReader(pool=pool, settings=get_database_settings()) +def get_graph_management_session_journal_service( + session: Annotated[AsyncSession, Depends(get_write_session)], +) -> GraphManagementSessionJournalService: + return GraphManagementSessionJournalService( + session_repository=ExtractionAgentSessionRepository(session=session), + extraction_job_repository=ExtractionJobRepository(session=session), + ) + + def get_workload_schema_service( session: Annotated[AsyncSession, Depends(get_write_session)], pool: Annotated[ConnectionPool, Depends(get_age_connection_pool)], diff --git a/src/api/infrastructure/extraction_workload/mutation_preflight.py b/src/api/infrastructure/extraction_workload/mutation_preflight.py index 1295a1d16..6573575da 100644 --- a/src/api/infrastructure/extraction_workload/mutation_preflight.py +++ b/src/api/infrastructure/extraction_workload/mutation_preflight.py @@ -65,9 +65,33 @@ async def validate_mutation_jsonl( create_node_ids: list[str] = [] create_edge_ids: list[str] = [] + update_node_ids: list[str] = [] + update_edge_ids: list[str] = [] + delete_node_ids: list[str] = [] + delete_edge_ids: list[str] = [] slug_checks: dict[str, set[str]] = {} for line_num, operation in enumerate(operations, start=1): + if operation.op == MutationOperationType.DELETE: + if not operation.id: + errors.append(f"Line {line_num}: DELETE requires id.") + elif operation.type == EntityType.NODE.value: + delete_node_ids.append(operation.id) + elif operation.type == EntityType.EDGE.value: + delete_edge_ids.append(operation.id) + else: + errors.append(f"Line {line_num}: DELETE type must be node or edge.") + + if operation.op == MutationOperationType.UPDATE: + if not operation.id: + errors.append(f"Line {line_num}: UPDATE requires id.") + elif operation.type == EntityType.NODE.value: + update_node_ids.append(operation.id) + elif operation.type == EntityType.EDGE.value: + update_edge_ids.append(operation.id) + else: + errors.append(f"Line {line_num}: UPDATE type must be node or edge.") + if operation.op == MutationOperationType.DEFINE and operation.label: key = (operation.label, operation.type) if key in existing_type_keys: @@ -151,4 +175,48 @@ async def validate_mutation_jsonl( "use UPDATE to change it." ) + missing_node_ids = set(update_node_ids + delete_node_ids) + if missing_node_ids: + existing_node_ids = await graph_reader.find_existing_node_ids( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + node_ids=tuple(sorted(missing_node_ids)), + ) + for line_num, operation in enumerate(operations, start=1): + if operation.op not in { + MutationOperationType.UPDATE, + MutationOperationType.DELETE, + }: + continue + if operation.type != EntityType.NODE.value or not operation.id: + continue + if operation.id not in existing_node_ids: + verb = operation.op.value + errors.append( + f"Line {line_num}: node id `{operation.id}` does not exist; " + f"cannot {verb}." + ) + + missing_edge_ids = set(update_edge_ids + delete_edge_ids) + if missing_edge_ids: + existing_edge_ids = await graph_reader.find_existing_edge_ids( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + edge_ids=tuple(sorted(missing_edge_ids)), + ) + for line_num, operation in enumerate(operations, start=1): + if operation.op not in { + MutationOperationType.UPDATE, + MutationOperationType.DELETE, + }: + continue + if operation.type != EntityType.EDGE.value or not operation.id: + continue + if operation.id not in existing_edge_ids: + verb = operation.op.value + errors.append( + f"Line {line_num}: edge id `{operation.id}` does not exist; " + f"cannot {verb}." + ) + return errors diff --git a/src/api/infrastructure/extraction_workload/schema_service.py b/src/api/infrastructure/extraction_workload/schema_service.py index 442a8dfe9..da86efb52 100644 --- a/src/api/infrastructure/extraction_workload/schema_service.py +++ b/src/api/infrastructure/extraction_workload/schema_service.py @@ -129,10 +129,11 @@ async def apply_mutation_jsonl( return {"applied": False, "errors": [str(exc)]} if not define_ops and not instance_ops: - return {"applied": True, "errors": [], "operations_applied": 0} + return {"applied": True, "errors": [], "operations_applied": 0, "applied_jsonl": ""} errors: list[str] = [] operations_applied = 0 + applied_operations = define_ops + instance_ops if define_ops: define_jsonl = "\n".join( @@ -171,4 +172,13 @@ async def apply_mutation_jsonl( await self._repository.replace_ontology(knowledge_graph_id, synced) await self._session.commit() - return {"applied": True, "errors": [], "operations_applied": operations_applied} + applied_jsonl = "\n".join( + json.dumps(operation.model_dump(mode="json"), separators=(",", ":")) + for operation in applied_operations + ) + return { + "applied": True, + "errors": [], + "operations_applied": operations_applied, + "applied_jsonl": applied_jsonl, + } diff --git a/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py b/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py new file mode 100644 index 000000000..23e482f9b --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py @@ -0,0 +1,91 @@ +"""Unit tests for Graph Management session mutation journaling.""" + +from __future__ import annotations + +from dataclasses import replace +from datetime import UTC, datetime + +import pytest + +from extraction.application.graph_management_session_journal import ( + GraphManagementSessionJournalService, + append_applied_jsonl_to_session, +) +from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.extraction_job import ExtractionJobStatus +from extraction.domain.value_objects import ExtractionSessionMode + + +class _InMemorySessionRepository: + def __init__(self) -> None: + self._by_id: dict[str, ExtractionAgentSession] = {} + + async def save(self, session: ExtractionAgentSession) -> None: + self._by_id[session.id] = replace(session) + + async def get_by_id(self, session_id: str) -> ExtractionAgentSession | None: + session = self._by_id.get(session_id) + return replace(session) if session else None + + +class _InMemoryJobRepository: + def __init__(self) -> None: + self.inserted: list[object] = [] + + async def insert_archived_session_job(self, job) -> None: + self.inserted.append(job) + + +def test_append_applied_jsonl_to_session_accumulates_lines() -> None: + session = ExtractionAgentSession( + id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + append_applied_jsonl_to_session( + session, + applied_jsonl='{"op":"CREATE","type":"node","id":"service:abc"}', + ) + append_applied_jsonl_to_session( + session, + applied_jsonl='{"op":"DELETE","type":"node","id":"service:def"}', + ) + + journal = session.runtime_context["mutation_journal"] + assert journal["line_count"] == 2 + assert "DELETE" in journal["jsonl"] + + +@pytest.mark.asyncio +async def test_archive_session_mutations_creates_archived_job() -> None: + session_repo = _InMemorySessionRepository() + job_repo = _InMemoryJobRepository() + service = GraphManagementSessionJournalService( + session_repository=session_repo, + extraction_job_repository=job_repo, + ) + session = ExtractionAgentSession( + id="session-2", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + created_at=datetime(2026, 6, 5, tzinfo=UTC), + ) + append_applied_jsonl_to_session( + session, + applied_jsonl=( + '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service",' + '"set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap"}}' + ), + ) + + await service.archive_session_mutations(session) + + assert len(job_repo.inserted) == 1 + job = job_repo.inserted[0] + assert job.status == ExtractionJobStatus.ARCHIVED + assert job.job_id == "gma-session-2" + assert job.strategy == "graph_management_session" + assert job.applied_mutations_jsonl + assert "Graph Management · Schema Design" in job.job_set_name diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py index 0471fe57b..0476e10ef 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py @@ -31,7 +31,11 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: agent_max_turns=500, ) issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) - credentials = issuer.issue_for_sticky_session(tenant_id="tenant-1", knowledge_graph_id="kg-1") + credentials = issuer.issue_for_sticky_session( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id="session-bootstrap", + ) bootstrap = StickySessionRuntimeBootstrap( tenant_id="tenant-1", credentials=credentials, diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py b/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py index fd7d8d304..c0b659295 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_credential_issuer.py @@ -15,9 +15,11 @@ def test_issue_for_sticky_session_includes_chat_scope() -> None: credentials = issuer.issue_for_sticky_session( tenant_id="tenant-1", knowledge_graph_id="kg-1", + session_id="session-test-1", ) assert "workload:chat" in credentials.scopes + assert "session:session-test-1" in credentials.scopes assert issuer.verify(credentials.token) == credentials @@ -35,6 +37,7 @@ def test_verify_survives_new_issuer_instance_with_same_signing_key() -> None: credentials = issuer_a.issue_for_sticky_session( tenant_id="tenant-1", knowledge_graph_id="kg-1", + session_id="session-test-2", ) issuer_b = ScopedWorkloadCredentialIssuer( diff --git a/src/api/tests/unit/extraction/presentation/test_workload_routes.py b/src/api/tests/unit/extraction/presentation/test_workload_routes.py index e9ed2da50..78e9431ff 100644 --- a/src/api/tests/unit/extraction/presentation/test_workload_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_workload_routes.py @@ -11,6 +11,7 @@ from extraction.presentation.workload_auth import WorkloadAuthContext, get_workload_auth_context from extraction.ports.workload_graph import WorkloadGraphNode, WorkloadGraphRelationship from infrastructure.extraction_workload.dependencies import ( + get_graph_management_session_journal_service, get_workload_extraction_jobs_service, get_workload_graph_reader, get_workload_schema_service, @@ -53,7 +54,7 @@ async def apply_mutation_jsonl( jsonl: str, ) -> dict[str, object]: self.applied_jsonl = jsonl - return {"applied": True, "errors": [], "operations_applied": 1} + return {"applied": True, "errors": [], "operations_applied": 1, "applied_jsonl": jsonl} class _FakeGraphReader: @@ -124,6 +125,14 @@ async def count_entity_instances_by_type(self, **kwargs): raise GraphQueryError("graph with oid 17491 does not exist", query="MATCH (n) RETURN n") +class _FakeSessionJournal: + def __init__(self) -> None: + self.appended: list[tuple[str, str]] = [] + + async def append_applied_jsonl(self, *, session_id: str, applied_jsonl: str) -> None: + self.appended.append((session_id, applied_jsonl)) + + class _FakeExtractionJobsService: def __init__(self) -> None: self.saved_payload: dict[str, object] | None = None @@ -172,6 +181,7 @@ async def get_database_status(self, *, tenant_id: str, knowledge_graph_id: str) def workload_client() -> tuple[TestClient, _FakeSchemaService, str]: fake = _FakeSchemaService() extraction_jobs_fake = _FakeExtractionJobsService() + session_journal_fake = _FakeSessionJournal() fake.saved = OntologyConfig( node_types=( NodeTypeDefinition(label="service", prepopulated=True, prepopulated_instance_count=0), @@ -194,6 +204,7 @@ def workload_client() -> tuple[TestClient, _FakeSchemaService, str]: credentials = issuer.issue_for_sticky_session( tenant_id="tenant-1", knowledge_graph_id="kg-1", + session_id="session-test-1", ) app = FastAPI() @@ -201,18 +212,20 @@ def workload_client() -> tuple[TestClient, _FakeSchemaService, str]: app.dependency_overrides[get_workload_schema_service] = lambda: fake app.dependency_overrides[get_workload_extraction_jobs_service] = lambda: extraction_jobs_fake app.dependency_overrides[get_workload_graph_reader] = lambda: _FakeGraphReader() + app.dependency_overrides[get_graph_management_session_journal_service] = lambda: session_journal_fake app.dependency_overrides[get_workload_auth_context] = lambda: WorkloadAuthContext( credentials=credentials, tenant_id="tenant-1", knowledge_graph_id="kg-1", + session_id="session-test-1", ) client = TestClient(app) - return client, fake, credentials.token + return client, fake, credentials.token, session_journal_fake def test_workload_get_schema_authoring_guide(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client response = client.get( "/extraction/workloads/schema/authoring-guide", headers={"X-Workload-Token": token}, @@ -225,7 +238,7 @@ def test_workload_get_schema_authoring_guide(workload_client: tuple[TestClient, def test_workload_get_workspace_readiness(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client response = client.get( "/extraction/workloads/schema/readiness", headers={"X-Workload-Token": token}, @@ -250,16 +263,19 @@ def test_workload_get_workspace_readiness_returns_503_for_graph_storage_errors() credentials = issuer.issue_for_sticky_session( tenant_id="tenant-1", knowledge_graph_id="kg-1", + session_id="session-broken", ) app = FastAPI() app.include_router(workload_routes.router, prefix="/extraction") app.dependency_overrides[get_workload_schema_service] = lambda: fake app.dependency_overrides[get_workload_graph_reader] = lambda: _BrokenGraphReader() app.dependency_overrides[get_workload_extraction_jobs_service] = lambda: _FakeExtractionJobsService() + app.dependency_overrides[get_graph_management_session_journal_service] = lambda: _FakeSessionJournal() app.dependency_overrides[get_workload_auth_context] = lambda: WorkloadAuthContext( credentials=credentials, tenant_id="tenant-1", knowledge_graph_id="kg-1", + session_id="session-broken", ) client = TestClient(app) response = client.get( @@ -271,7 +287,7 @@ def test_workload_get_workspace_readiness_returns_503_for_graph_storage_errors() def test_workload_list_instances_by_type(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client response = client.get( "/extraction/workloads/graph/instances", headers={"X-Workload-Token": token}, @@ -285,7 +301,7 @@ def test_workload_list_instances_by_type(workload_client: tuple[TestClient, _Fak def test_workload_list_relationship_instances(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client response = client.get( "/extraction/workloads/graph/relationships", headers={"X-Workload-Token": token}, @@ -303,7 +319,7 @@ def test_workload_list_relationship_instances(workload_client: tuple[TestClient, def test_workload_save_schema_ontology(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, fake, token = workload_client + client, fake, token, _journal = workload_client response = client.put( "/extraction/workloads/schema/ontology", headers={"X-Workload-Token": token}, @@ -336,7 +352,7 @@ def test_workload_save_schema_ontology(workload_client: tuple[TestClient, _FakeS def test_workload_check_graph_slugs(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client response = client.post( "/extraction/workloads/graph/check-slugs", headers={"X-Workload-Token": token}, @@ -349,7 +365,7 @@ def test_workload_check_graph_slugs(workload_client: tuple[TestClient, _FakeSche def test_workload_validate_graph_mutations(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client response = client.post( "/extraction/workloads/mutations/validate", headers={"X-Workload-Token": token}, @@ -361,8 +377,23 @@ def test_workload_validate_graph_mutations(workload_client: tuple[TestClient, _F assert payload["operation_count"] == 1 -def test_workload_apply_graph_mutations(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, fake, token = workload_client +def test_workload_apply_graph_mutations_appends_session_journal( + workload_client: tuple[TestClient, _FakeSchemaService, str, _FakeSessionJournal], +) -> None: + client, _fake, token, journal = workload_client + response = client.post( + "/extraction/workloads/mutations/apply", + headers={"X-Workload-Token": token}, + json={"jsonl": '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service","set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap","source_path":"assistant"}}'}, + ) + assert response.status_code == 200 + assert len(journal.appended) == 1 + assert journal.appended[0][0] == "session-test-1" + assert "CREATE" in journal.appended[0][1] + + +def test_workload_apply_graph_mutations(workload_client: tuple[TestClient, _FakeSchemaService, str, _FakeSessionJournal]) -> None: + client, fake, token, _journal = workload_client fake.saved = OntologyConfig( node_types=( NodeTypeDefinition(label="service", prepopulated=True, prepopulated_instance_count=0), @@ -384,7 +415,7 @@ def test_workload_apply_graph_mutations(workload_client: tuple[TestClient, _Fake def test_workload_get_extraction_jobs_config(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client response = client.get( "/extraction/workloads/extraction-jobs", headers={"X-Workload-Token": token}, @@ -397,7 +428,7 @@ def test_workload_get_extraction_jobs_config(workload_client: tuple[TestClient, def test_workload_save_extraction_jobs_config(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client job_set = { "name": "Adapter Deep Extraction", "strategy": "by_instances", @@ -419,7 +450,7 @@ def test_workload_save_extraction_jobs_config(workload_client: tuple[TestClient, def test_workload_get_extraction_jobs_plan_summary( workload_client: tuple[TestClient, _FakeSchemaService, str], ) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client client.put( "/extraction/workloads/extraction-jobs", headers={"X-Workload-Token": token}, @@ -446,7 +477,7 @@ def test_workload_get_extraction_jobs_plan_summary( def test_workload_get_extraction_jobs_status(workload_client: tuple[TestClient, _FakeSchemaService, str]) -> None: - client, _fake, token = workload_client + client, _fake, token, _journal = workload_client response = client.get( "/extraction/workloads/extraction-jobs/status", headers={"X-Workload-Token": token}, diff --git a/src/api/tests/unit/infrastructure/extraction_workload/test_mutation_preflight.py b/src/api/tests/unit/infrastructure/extraction_workload/test_mutation_preflight.py index e13d48f88..0f56661f4 100644 --- a/src/api/tests/unit/infrastructure/extraction_workload/test_mutation_preflight.py +++ b/src/api/tests/unit/infrastructure/extraction_workload/test_mutation_preflight.py @@ -38,6 +38,34 @@ async def test_validate_rejects_define_for_existing_type() -> None: assert any("DEFINE" in error for error in errors) +@pytest.mark.asyncio +async def test_validate_rejects_delete_for_missing_node_id() -> None: + jsonl = '{"op":"DELETE","type":"node","id":"service:0123456789abcdef"}' + reader = _FakeGraphReader(existing_node_ids=frozenset()) + errors = await validate_mutation_jsonl( + jsonl_content=jsonl, + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + graph_reader=reader, + existing_type_keys=frozenset(), + ) + assert any("does not exist" in error and "DELETE" in error for error in errors) + + +@pytest.mark.asyncio +async def test_validate_allows_delete_for_existing_node_id() -> None: + jsonl = '{"op":"DELETE","type":"node","id":"service:0123456789abcdef"}' + reader = _FakeGraphReader(existing_node_ids=frozenset({"service:0123456789abcdef"})) + errors = await validate_mutation_jsonl( + jsonl_content=jsonl, + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + graph_reader=reader, + existing_type_keys=frozenset(), + ) + assert errors == [] + + @pytest.mark.asyncio async def test_validate_rejects_create_for_existing_node_id() -> None: jsonl = ( From 2a46a3b6380ce0516991c605c95ddeaf57172c42 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 00:20:45 -0400 Subject: [PATCH 131/153] feat(extraction): document unique edge labels for GMA and record turn usage Teach the Graph Management Assistant that each relationship UI row needs a distinct edge_types label, with read-back verification before claiming saves. Also propagate Claude SDK token/cost metrics into session journals and chat turn handling for operator visibility. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../kartograph_agent_runtime/agent_prompt.py | 4 + .../kartograph_agent_runtime/executor.py | 25 +++- src/agent-runtime/tests/test_executor.py | 25 ++++ .../application/chat_turn_service.py | 7 ++ .../graph_management_session_journal.py | 78 ++++++++++--- .../application/schema_authoring_guide.py | 82 +++++++++++-- .../application/skill_resolution_service.py | 22 +++- .../repositories/extraction_job_repository.py | 5 + .../application/test_chat_turn_service.py | 58 ++++++++++ .../test_graph_management_session_journal.py | 108 +++++++++++++++++- .../test_schema_authoring_guide.py | 9 ++ 11 files changed, 392 insertions(+), 31 deletions(-) diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index f4c9a6c11..29007f95e 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -64,6 +64,10 @@ Start prepopulation only when schema save succeeded **and** readiness returns 200 with gaps. +**Relationship ontology:** each `edge_types[].label` must be unique. Multiple UI rows (one per +source→target pair) require distinct labels (e.g. `tests_ct_api`, `covered_by_us_e2e`). After save, +read back `kartograph_get_schema_ontology` — do not claim N types until N primary labels are stored. + Writes persist to the platform database for the active knowledge graph. """.strip() diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 263c6240b..7c90b8ef7 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -212,6 +212,25 @@ def finalize_sdk_turn_reply( return None +def metrics_from_sdk_result(result: Any | None) -> dict[str, Any]: + """Extract token usage and cost from a Claude Agent SDK ResultMessage.""" + if result is None: + return {} + raw_usage = getattr(result, "usage", None) + usage = raw_usage if isinstance(raw_usage, dict) else {} + total_cost = getattr(result, "total_cost_usd", None) + cost_usd = float(total_cost) if total_cost is not None else 0.0 + if not usage and cost_usd == 0.0: + return {} + return { + "input_tokens": int(usage.get("input_tokens") or 0), + "output_tokens": int(usage.get("output_tokens") or 0), + "cache_read_tokens": int(usage.get("cache_read_input_tokens") or 0), + "cache_creation_tokens": int(usage.get("cache_creation_input_tokens") or 0), + "cost_usd": cost_usd, + } + + def _build_sdk_env(settings: AgentRuntimeSettings) -> dict[str, str]: env = build_claude_agent_env(settings) if settings.gcloud_config_dir.strip(): @@ -542,4 +561,8 @@ async def _stream_with_claude_sdk( }, } return - yield {"type": "done", "ok": True, "reply": reply} + done_payload: dict[str, Any] = {"type": "done", "ok": True, "reply": reply} + usage_metrics = metrics_from_sdk_result(last_result) + if usage_metrics: + done_payload["usage"] = usage_metrics + yield done_payload diff --git a/src/agent-runtime/tests/test_executor.py b/src/agent-runtime/tests/test_executor.py index e3cf617ce..750102e81 100644 --- a/src/agent-runtime/tests/test_executor.py +++ b/src/agent-runtime/tests/test_executor.py @@ -14,6 +14,7 @@ _extract_sdk_reply, _iter_sdk_messages_with_heartbeat, finalize_sdk_turn_reply, + metrics_from_sdk_result, stream_turn_events, ) from kartograph_agent_runtime.settings import AgentRuntimeSettings @@ -198,6 +199,30 @@ async def delayed_messages(): assert collected == ["first", "second"] +def test_metrics_from_sdk_result_extracts_usage_and_cost() -> None: + class _Result: + usage = { + "input_tokens": 120, + "output_tokens": 45, + "cache_read_input_tokens": 30, + "cache_creation_input_tokens": 15, + } + total_cost_usd = 0.33 + + metrics = metrics_from_sdk_result(_Result()) + + assert metrics["input_tokens"] == 120 + assert metrics["output_tokens"] == 45 + assert metrics["cache_read_tokens"] == 30 + assert metrics["cache_creation_tokens"] == 15 + assert metrics["cost_usd"] == pytest.approx(0.33) + + +def test_metrics_from_sdk_result_returns_empty_when_missing() -> None: + assert metrics_from_sdk_result(None) == {} + assert metrics_from_sdk_result(object()) == {} + + @pytest.mark.asyncio async def test_stream_turn_events_without_api_key_returns_done_reply( monkeypatch: pytest.MonkeyPatch, diff --git a/src/api/extraction/application/chat_turn_service.py b/src/api/extraction/application/chat_turn_service.py index 754b20ba4..978f95aa5 100644 --- a/src/api/extraction/application/chat_turn_service.py +++ b/src/api/extraction/application/chat_turn_service.py @@ -7,6 +7,7 @@ from typing import Any from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.application.graph_management_session_journal import append_turn_usage_to_session from extraction.ports.sticky_session_runtime import IStickySessionRuntimeService from extraction.domain.value_objects import ( ExtractionSessionMode, @@ -151,6 +152,9 @@ async def stream_chat_turn( str(line) for line in recent if str(line).strip() ] if event.get("type") == "done": + usage = event.get("usage") + if isinstance(usage, dict) and usage: + append_turn_usage_to_session(session, usage=usage) if event.get("ok") is True and event.get("reply"): assistant_reply = str(event["reply"]) elif event.get("ok") is not True: @@ -167,6 +171,9 @@ async def stream_chat_turn( session.message_history.append({"role": "user", "content": trimmed}) session.updated_at = datetime.now(UTC) await self._session_service.save_session(session) + elif session.runtime_context.get("mutation_journal"): + session.updated_at = datetime.now(UTC) + await self._session_service.save_session(session) else: yield { "type": "done", diff --git a/src/api/extraction/application/graph_management_session_journal.py b/src/api/extraction/application/graph_management_session_journal.py index 12abb5586..2c3cc5347 100644 --- a/src/api/extraction/application/graph_management_session_journal.py +++ b/src/api/extraction/application/graph_management_session_journal.py @@ -8,18 +8,51 @@ from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode from extraction.infrastructure.extraction_job_mutation_metrics import metrics_from_mutation_jsonl from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository from extraction.ports.repositories import IExtractionAgentSessionRepository GRAPH_MANAGEMENT_SESSION_STRATEGY = "graph_management_session" +_JOB_SET_BY_UI_MODE: dict[str, str] = { + GraphManagementUiMode.INITIAL_SCHEMA_DESIGN.value: ( + "Graph Management · Initial Schema Design" + ), + GraphManagementUiMode.EXTRACTION_JOBS.value: "Graph Management · Extraction Jobs", + GraphManagementUiMode.ONE_OFF_MUTATIONS.value: "Graph Management · One-off Mutations", +} + _JOB_SET_BY_MODE: dict[ExtractionSessionMode, str] = { ExtractionSessionMode.SCHEMA_BOOTSTRAP: "Graph Management · Schema Design", ExtractionSessionMode.EXTRACTION_OPERATIONS: "Graph Management · Extraction Operations", } +_USAGE_KEYS = ( + "input_tokens", + "output_tokens", + "cache_read_tokens", + "cache_creation_tokens", +) + + +def _ensure_journal(session: ExtractionAgentSession) -> dict[str, object]: + journal = dict(session.runtime_context.get("mutation_journal") or {}) + if not journal.get("started_at"): + journal["started_at"] = session.created_at.isoformat() + return journal + + +def _journal_token_total(journal: dict[str, object]) -> int: + return int(journal.get("input_tokens") or 0) + int(journal.get("output_tokens") or 0) + + +def _job_set_name_for_session(session: ExtractionAgentSession) -> str: + ui_mode = str(session.runtime_context.get("graph_management_ui_mode") or "") + if ui_mode in _JOB_SET_BY_UI_MODE: + return _JOB_SET_BY_UI_MODE[ui_mode] + return _JOB_SET_BY_MODE.get(session.mode, "Graph Management Assistant") + def append_applied_jsonl_to_session( session: ExtractionAgentSession, @@ -30,13 +63,26 @@ def append_applied_jsonl_to_session( chunk = applied_jsonl.strip() if not chunk: return - journal = dict(session.runtime_context.get("mutation_journal") or {}) + journal = _ensure_journal(session) previous = str(journal.get("jsonl") or "").strip() combined = "\n".join(part for part in (previous, chunk) if part) journal["jsonl"] = combined journal["line_count"] = sum(1 for line in combined.splitlines() if line.strip()) - if not journal.get("started_at"): - journal["started_at"] = session.created_at.isoformat() + session.runtime_context["mutation_journal"] = journal + + +def append_turn_usage_to_session( + session: ExtractionAgentSession, + *, + usage: dict[str, object], +) -> None: + """Accumulate token usage from one Graph Management Assistant chat turn.""" + if not usage: + return + journal = _ensure_journal(session) + for key in _USAGE_KEYS: + journal[key] = int(journal.get(key) or 0) + int(usage.get(key) or 0) + journal["cost_usd"] = float(journal.get("cost_usd") or 0.0) + float(usage.get("cost_usd") or 0.0) session.runtime_context["mutation_journal"] = journal @@ -65,14 +111,13 @@ async def append_applied_jsonl( await self._session_repository.save(session) async def archive_session_mutations(self, session: ExtractionAgentSession) -> None: - """Write one ARCHIVED extraction job row when the session had graph writes.""" + """Write one ARCHIVED extraction job row for the full GMA session.""" journal = session.runtime_context.get("mutation_journal") or {} jsonl = str(journal.get("jsonl") or "").strip() - if not jsonl: - return - - metrics = metrics_from_mutation_jsonl(jsonl) - if int(metrics.get("write_ops") or 0) <= 0: + metrics = metrics_from_mutation_jsonl(jsonl) if jsonl else {} + write_ops = int(metrics.get("write_ops") or 0) + token_total = _journal_token_total(journal) + if write_ops <= 0 and token_total <= 0: return now = datetime.now(UTC) @@ -84,15 +129,11 @@ async def archive_session_mutations(self, session: ExtractionAgentSession) -> No except ValueError: started_at = session.created_at - job_set_name = _JOB_SET_BY_MODE.get( - session.mode, - "Graph Management Assistant", - ) record = ExtractionJobRecord( id=str(ULID()), knowledge_graph_id=session.knowledge_graph_id, job_id=f"gma-{session.id}", - job_set_name=job_set_name, + job_set_name=_job_set_name_for_session(session), strategy=GRAPH_MANAGEMENT_SESSION_STRATEGY, status=ExtractionJobStatus.ARCHIVED, order_index=0, @@ -104,7 +145,12 @@ async def archive_session_mutations(self, session: ExtractionAgentSession) -> No started_at=started_at, completed_at=now, archived_at=now, - applied_mutations_jsonl=jsonl, + applied_mutations_jsonl=jsonl or None, + input_tokens=int(journal.get("input_tokens") or 0), + output_tokens=int(journal.get("output_tokens") or 0), + cache_read_tokens=int(journal.get("cache_read_tokens") or 0), + cache_creation_tokens=int(journal.get("cache_creation_tokens") or 0), + cost_usd=float(journal.get("cost_usd") or 0.0), entities_created=int(metrics.get("entities_created") or 0), entities_modified=int(metrics.get("entities_modified") or 0), relationships_created=int(metrics.get("relationships_created") or 0), diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 6c123d44a..49b1f6e7a 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -87,12 +87,71 @@ ## Schema modeling rules - **Property vs entity:** categorize → property; track instances/relationships → entity + edges. -- **Bidirectional relationships** default on — author **one primary direction only** in `edge_types`. - Set optional `inverse_label` (default `{label}_inverse`). Never add a separate inverse type; - the platform auto-generates it and twin edge instances. Design artifacts show - `primary / inverse` on a single row. - Set `bidirectional: false` only for asymmetric edges (`depends_on`, `created_by`). +## Relationship types (authoring vs UI) + +### Unique edge labels (required) + +Every `edge_types[].label` must be **unique** within the ontology. The platform stores edge types by +label; duplicate labels are rejected or silently collapse to one definition — **never** author six +entries all named `tests` or two named `covered_by`. + +**When the operator wants N rows in the Relationship ontology UI** (one row per source → target +pair), create **N primary `edge_types` entries with N distinct labels** — one concrete +`source_labels` + `target_labels` pair each (single element in each array). Assign a unique label +per row (e.g. `tests_ct_api`, `tests_e2e_adapter`, `covered_by_us_ct`). Set a distinct +`inverse_label` per entry when bidirectional (e.g. `appears_in_ct_api`, `covers_us_ct`). + +Example — eight UI rows for eight endpoint pairs (labels illustrative; adjust naming to taste): + +| UI row | `label` | `source_labels` | `target_labels` | `inverse_label` | +|--------|---------|-----------------|-----------------|-----------------| +| 1 | `tests_ct_api` | `["ComponentTest"]` | `["APIEndpoint"]` | `appears_in_ct_api` | +| 2 | `tests_ct_adapter` | `["ComponentTest"]` | `["Adapter"]` | `appears_in_ct_adapter` | +| … | … | … | … | … | +| 8 | `covered_by_us_e2e` | `["UserStory"]` | `["E2ETest"]` | `covers_us_e2e` | + +After save, call `kartograph_get_schema_ontology` and confirm **eight primary** edge types exist +(`auto_generated` / `inverse_of` entries are inverses — the UI hides them). **Never** claim “8 types +saved” until read-back shows eight distinct primary labels. + +**Relationship scanners:** `--rel` must match the saved `label` for that row (e.g. +`--rel tests_ct_api`, not `--rel tests` when the ontology label is `tests_ct_api`). + +### How the UI counts rows + +Design artifacts show **one row per primary relationship label**. Inverse types are **not** +separate rows; each row shows `primary / inverse` badges (e.g. `tests_ct_api / appears_in_ct_api`). + +**Bidirectional (default):** author **one primary direction only** per label. Do **not** add the +inverse as its own authored `edge_types` entry — the platform auto-generates it on save. + +### Semantic grouping vs UI rows + +**Count relationship types by stored label**, not by endpoint pair alone. Two patterns: + +1. **Few semantic types, few UI rows:** one label (e.g. `tests`) with one representative pair; other + endpoint combinations get relationship **instances** via extraction jobs later. +2. **Many UI rows:** many labels (unique per pair) as in the table above — report the count from + read-back primary entries, not “8 combinations” while only two labels exist. + +**Multi-label arrays (advanced):** one entry may list multiple entity types in `source_labels` / +`target_labels`, but the UI shows **one row** using `source_labels[0]` → `target_labels[0]` only. +Do not promise N×M separate UI rows without N×M distinct primary labels. + +**After every schema save or relationship edit:** call `kartograph_get_schema_ontology` and report +what is stored — for each **primary** edge type: `label`, `source_labels`, `target_labels`, +`inverse_label`, `bidirectional`, `prepopulated`. Do not claim combinations or counts not in that +payload. + +**User-facing summaries must match stored ontology:** + +- ✅ “8 relationship types in the UI: `tests_ct_api`, `tests_ct_adapter`, … (each bidirectional).” +- ✅ “2 relationship types: `tests`, `covered_by` — one representative pair each; other pairs via extraction jobs.” +- ❌ “Saved 8 types: ComponentTest|tests|APIEndpoint, …” when read-back shows only two labels. +- ❌ Listing auto-generated inverses as types you authored. + ## Workspace discovery patterns | Target | Glob / Grep hints | @@ -135,20 +194,25 @@ ## Relationship type shape +One primary entry per UI row (single source/target pair; **unique `label`**): + ```json { - "label": "exercises", + "label": "tests_ct_api", + "description": "ComponentTest validates APIEndpoint behavior", "source_labels": ["ComponentTest"], "target_labels": ["APIEndpoint"], - "prepopulated": true, + "prepopulated": false, "bidirectional": true, - "inverse_label": "exercises_inverse" + "inverse_label": "appears_in_ct_api" } ``` -Do **not** also add `exercises_inverse` as its own `edge_types` entry — that inverse is auto-generated on save. +Do **not** also add `appears_in_ct_api` as its own `edge_types` entry — that inverse is auto-generated on save. -Relationship scanner convention: `out/{source}_{label}_{target}_instances.json` (primary direction only). +**Prepopulation / scanners:** one concrete triple per run; `--rel` equals the saved `label`: +`run_scanner.py --relationship --source ComponentTest --rel tests_ct_api --target APIEndpoint`. +Output: `out/{source}_{label}_{target}_instances.json` (primary direction only; platform adds twin inverse edges on apply). ## Instance mutations (JSONL) diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 0d91773ec..3862e11a1 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -89,6 +89,13 @@ class ResolvedExtractionSkillPack: "500/503, report both outcomes as a backend bug — do not retry blindly or continue to " "the next prepopulated type." ), + ( + "Relationship ontology UI rows require unique edge_types labels. When the user asks " + "for multiple source→target pairs (e.g. eight ComponentTest/E2ETest tests rows), save " + "one primary edge_types entry per pair with distinct labels — never reuse the same " + "label six times. After kartograph_save_schema_ontology, call kartograph_get_schema_ontology " + "and report the stored primary label count before telling the user to refresh the UI." + ), ), }, ExtractionSessionMode.EXTRACTION_OPERATIONS: { @@ -132,10 +139,17 @@ class ResolvedExtractionSkillPack: "schema_modeling": ( "Property vs entity: distinguish/categorize → property on an existing type; " "track which/what or needs relationships → entity type + edges. " - "Relationships default bidirectional — author one primary direction in edge_types with " - "optional inverse_label; never add a separate inverse edge type (platform auto-generates " - "it and twin instances). Design artifacts show primary/inverse on one row. " - "Set bidirectional=false only for asymmetric edges (depends_on, created_by)." + "Edge type labels must be UNIQUE — never duplicate `tests` or `covered_by` across rows. " + "When the operator wants N Relationship ontology UI rows (one per source→target pair), " + "author N primary edge_types with N distinct labels (e.g. tests_ct_api, covered_by_us_e2e), " + "each with a single source_labels/target_labels pair and distinct inverse_label when " + "bidirectional. Relationship scanners use --rel matching the saved label. " + "Relationships default bidirectional — one primary entry per unique label; never add a " + "separate inverse type (platform auto-generates it). UI: one row per primary label. " + "After save, read kartograph_get_schema_ontology and confirm primary edge type count — " + "never claim N types saved until read-back shows N distinct primary labels. " + "Set bidirectional=false only for asymmetric edges (depends_on, created_by). " + "Full rules: kartograph_get_schema_authoring_guide." ), "schema_workflow": ( "Call kartograph_get_schema_authoring_guide when you need shapes, phases, or mutation rules. " diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index 8d2b6416b..cc45adcc4 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -542,6 +542,11 @@ async def insert_archived_session_job(self, job: ExtractionJobRecord) -> None: run_started_at=job.run_started_at, archived_at=job.archived_at, applied_mutations_jsonl=job.applied_mutations_jsonl, + input_tokens=job.input_tokens, + output_tokens=job.output_tokens, + cache_read_tokens=job.cache_read_tokens, + cache_creation_tokens=job.cache_creation_tokens, + cost_usd=job.cost_usd, ) ) await self._session.flush() diff --git a/src/api/tests/unit/extraction/application/test_chat_turn_service.py b/src/api/tests/unit/extraction/application/test_chat_turn_service.py index 575db9e48..09c2b16b6 100644 --- a/src/api/tests/unit/extraction/application/test_chat_turn_service.py +++ b/src/api/tests/unit/extraction/application/test_chat_turn_service.py @@ -119,6 +119,64 @@ def _build_chat_turn_service( return service, repo +class _UsageEmittingChatAgent: + async def stream_turn(self, **kwargs): + yield { + "type": "done", + "ok": True, + "reply": "Designed schema.", + "usage": { + "input_tokens": 800, + "output_tokens": 200, + "cache_read_tokens": 0, + "cache_creation_tokens": 0, + "cost_usd": 0.25, + }, + } + + +@pytest.mark.asyncio +async def test_stream_chat_turn_accumulates_token_usage_in_session_journal() -> None: + repo = _InMemoryAgentSessionRepository() + sticky = InMemoryStickySessionRuntimeManager() + session_service = ExtractionAgentSessionService(repository=repo) + runtime_service = StickySessionRuntimeService( + session_service=session_service, + skill_resolution_service=_StaticSkillResolutionService(), + ingestion_readiness_reader=_StaticIngestionReadinessReader(IngestionReadinessSnapshot(1, 1)), + sticky_runtime_manager=sticky, + bootstrap_builder=_StaticBootstrapBuilder(), + health_checker=_InstantHealthChecker(), + runtime_backend="memory", + sticky_health_timeout_seconds=5.0, + ) + service = ExtractionChatTurnService( + session_service=session_service, + runtime_service=runtime_service, + chat_agent=_UsageEmittingChatAgent(), + ) + + events = [ + event + async for event in service.stream_chat_turn( + tenant_id="tenant-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + message="Design entity types", + ) + ] + + assert events[-1]["ok"] is True + active = await repo.find_active_by_scope("user-1", "kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP) + assert active is not None + journal = active.runtime_context["mutation_journal"] + assert journal["input_tokens"] == 800 + assert journal["output_tokens"] == 200 + assert journal["cost_usd"] == 0.25 + + @pytest.mark.asyncio async def test_stream_chat_turn_persists_assistant_reply() -> None: service, repo = _build_chat_turn_service(readiness=IngestionReadinessSnapshot(1, 1)) diff --git a/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py b/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py index 23e482f9b..721593b20 100644 --- a/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py +++ b/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py @@ -10,10 +10,11 @@ from extraction.application.graph_management_session_journal import ( GraphManagementSessionJournalService, append_applied_jsonl_to_session, + append_turn_usage_to_session, ) from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.extraction_job import ExtractionJobStatus -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode class _InMemorySessionRepository: @@ -89,3 +90,108 @@ async def test_archive_session_mutations_creates_archived_job() -> None: assert job.strategy == "graph_management_session" assert job.applied_mutations_jsonl assert "Graph Management · Schema Design" in job.job_set_name + + +def test_append_turn_usage_to_session_accumulates_tokens() -> None: + session = ExtractionAgentSession( + id="session-3", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ) + append_turn_usage_to_session( + session, + usage={ + "input_tokens": 100, + "output_tokens": 50, + "cache_read_tokens": 10, + "cache_creation_tokens": 5, + "cost_usd": 0.12, + }, + ) + append_turn_usage_to_session( + session, + usage={ + "input_tokens": 200, + "output_tokens": 75, + "cost_usd": 0.08, + }, + ) + + journal = session.runtime_context["mutation_journal"] + assert journal["input_tokens"] == 300 + assert journal["output_tokens"] == 125 + assert journal["cache_read_tokens"] == 10 + assert journal["cost_usd"] == pytest.approx(0.20) + + +@pytest.mark.asyncio +async def test_archive_session_mutations_includes_tokens_and_initial_schema_label() -> None: + session_repo = _InMemorySessionRepository() + job_repo = _InMemoryJobRepository() + service = GraphManagementSessionJournalService( + session_repository=session_repo, + extraction_job_repository=job_repo, + ) + session = ExtractionAgentSession( + id="session-4", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + created_at=datetime(2026, 6, 5, tzinfo=UTC), + ) + session.runtime_context["graph_management_ui_mode"] = ( + GraphManagementUiMode.INITIAL_SCHEMA_DESIGN.value + ) + append_applied_jsonl_to_session( + session, + applied_jsonl=( + '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service",' + '"set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap"}}' + ), + ) + append_turn_usage_to_session( + session, + usage={ + "input_tokens": 1200, + "output_tokens": 400, + "cost_usd": 0.45, + }, + ) + + await service.archive_session_mutations(session) + + assert len(job_repo.inserted) == 1 + job = job_repo.inserted[0] + assert job.input_tokens == 1200 + assert job.output_tokens == 400 + assert job.cost_usd == pytest.approx(0.45) + assert job.job_set_name == "Graph Management · Initial Schema Design" + + +@pytest.mark.asyncio +async def test_archive_session_mutations_token_only_session() -> None: + session_repo = _InMemorySessionRepository() + job_repo = _InMemoryJobRepository() + service = GraphManagementSessionJournalService( + session_repository=session_repo, + extraction_job_repository=job_repo, + ) + session = ExtractionAgentSession( + id="session-5", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + created_at=datetime(2026, 6, 5, tzinfo=UTC), + ) + append_turn_usage_to_session( + session, + usage={"input_tokens": 500, "output_tokens": 100, "cost_usd": 0.05}, + ) + + await service.archive_session_mutations(session) + + assert len(job_repo.inserted) == 1 + job = job_repo.inserted[0] + assert job.input_tokens == 500 + assert job.applied_mutations_jsonl is None diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py index ac19cd7d6..856009cfe 100644 --- a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -20,3 +20,12 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: assert "smoke-test" in SCHEMA_AUTHORING_GUIDE.lower() or "smoke test" in SCHEMA_AUTHORING_GUIDE.lower() assert "run_scanner.py" in SCHEMA_AUTHORING_GUIDE assert "next_action" in SCHEMA_AUTHORING_GUIDE + assert "## Relationship types" in SCHEMA_AUTHORING_GUIDE + assert "one row per primary relationship label" in SCHEMA_AUTHORING_GUIDE + assert "kartograph_get_schema_ontology" in SCHEMA_AUTHORING_GUIDE + assert "N×M separate UI rows" in SCHEMA_AUTHORING_GUIDE + assert "source_labels[0]" in SCHEMA_AUTHORING_GUIDE or "first source" in SCHEMA_AUTHORING_GUIDE + assert "Unique edge labels" in SCHEMA_AUTHORING_GUIDE + assert "duplicate labels are rejected" in SCHEMA_AUTHORING_GUIDE + assert "tests_ct_api" in SCHEMA_AUTHORING_GUIDE + assert "eight primary" in SCHEMA_AUTHORING_GUIDE From 14e1fd56c13d20ddd33a11b5b073759ee39f54c7 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 14:37:59 -0400 Subject: [PATCH 132/153] feat(extraction): pre-seed job context and reduce agent discovery overhead Front-load graph_id, property gaps, JSONL examples, and directory-prefix file materialization so enrichment jobs spend less time probing formats and paths. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../extraction_job_target_context.py | 103 ++++++++++++++++++ .../extraction_job_helpers/__init__.py | 2 + .../mutation-examples.jsonl | 4 + .../infrastructure/extraction_job_prompt.py | 32 ++++-- .../extraction_job_repository_files.py | 17 ++- .../extraction_job_runner_factory.py | 26 +++++ .../extraction_job_workdir_layout.py | 6 +- .../extraction_job_workdir_materializer.py | 25 ++++- .../ports/extraction_job_target_context.py | 20 ++++ .../target_context_enricher.py | 66 +++++++++++ .../test_extraction_job_target_context.py | 68 ++++++++++++ .../extraction_job_target_context_enricher.py | 45 ++++++++ .../test_extraction_job_prompt.py | 23 ++++ .../test_extraction_job_repository_files.py | 43 ++++++++ ...est_extraction_job_workdir_materializer.py | 71 ++++++++++++ 15 files changed, 534 insertions(+), 17 deletions(-) create mode 100644 src/api/extraction/application/extraction_job_target_context.py create mode 100644 src/api/extraction/infrastructure/extraction_job_helpers/mutation-examples.jsonl create mode 100644 src/api/extraction/ports/extraction_job_target_context.py create mode 100644 src/api/infrastructure/extraction_workload/target_context_enricher.py create mode 100644 src/api/tests/unit/extraction/application/test_extraction_job_target_context.py create mode 100644 src/api/tests/unit/extraction/infrastructure/fakes/extraction_job_target_context_enricher.py diff --git a/src/api/extraction/application/extraction_job_target_context.py b/src/api/extraction/application/extraction_job_target_context.py new file mode 100644 index 000000000..9e81c6701 --- /dev/null +++ b/src/api/extraction/application/extraction_job_target_context.py @@ -0,0 +1,103 @@ +"""Enrich extraction job target instances with live graph context for agent workspaces.""" + +from __future__ import annotations + +from typing import Any + +from extraction.domain.extraction_job import ExtractionTargetInstance +from extraction.ports.workload_graph import WorkloadGraphNode + +_PLATFORM_MANAGED_PROPERTIES = frozenset( + { + "data_source_id", + "knowledge_graph_id", + "graph_id", + "source_path", + } +) + + +def _properties_for_entity_type( + entity_type: str, + *, + node_types: list[dict[str, Any]], +) -> tuple[str, ...]: + for node in node_types: + if str(node.get("label") or "").strip() != entity_type: + continue + required = tuple( + str(name).strip() + for name in node.get("required_properties") or () + if str(name).strip() + ) + optional = tuple( + str(name).strip() + for name in node.get("optional_properties") or () + if str(name).strip() + ) + return required + optional + return () + + +def _property_is_missing(properties: dict[str, Any], property_name: str) -> bool: + value = properties.get(property_name) + return value is None or value == "" + + +def missing_properties_for_instance( + *, + entity_type: str, + node_properties: dict[str, Any], + node_types: list[dict[str, Any]], +) -> tuple[str, ...]: + """Return ontology property names absent or empty on one live graph node.""" + missing: list[str] = [] + for property_name in _properties_for_entity_type(entity_type, node_types=node_types): + if property_name in _PLATFORM_MANAGED_PROPERTIES: + continue + if _property_is_missing(node_properties, property_name): + missing.append(property_name) + return tuple(sorted(missing)) + + +def enrich_target_instance_for_context( + instance: ExtractionTargetInstance, + *, + graph_node: WorkloadGraphNode | None, + node_types: list[dict[str, Any]], +) -> dict[str, Any]: + """Build one job-context target entry with graph id and property gaps.""" + payload = instance.to_dict() + if graph_node is None: + payload["graph_id"] = None + payload["properties_missing"] = list( + _properties_for_entity_type(instance.entity_type, node_types=node_types) + ) + return payload + + payload["graph_id"] = graph_node.id + payload["properties_missing"] = list( + missing_properties_for_instance( + entity_type=instance.entity_type, + node_properties=graph_node.properties, + node_types=node_types, + ) + ) + return payload + + +def enrich_target_instances_for_context( + instances: tuple[ExtractionTargetInstance, ...], + *, + graph_nodes_by_slug: dict[str, WorkloadGraphNode], + node_types: list[dict[str, Any]], +) -> list[dict[str, Any]]: + """Build enriched target_instances payload for job-context.json.""" + return [ + enrich_target_instance_for_context( + instance, + graph_node=graph_nodes_by_slug.get(instance.slug), + node_types=node_types, + ) + for instance in instances + ] diff --git a/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py b/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py index 6680535a0..f80a15b48 100644 --- a/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py +++ b/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py @@ -5,3 +5,5 @@ HELPERS_DIR = Path(__file__).resolve().parent HELPERS_CONTAINER_DIR = "helpers" HELPER_SCRIPT_NAMES = ("workload-mutations.sh",) +HELPER_RESOURCE_NAMES = ("mutation-examples.jsonl",) +HELPER_BUNDLE_NAMES = HELPER_SCRIPT_NAMES + HELPER_RESOURCE_NAMES diff --git a/src/api/extraction/infrastructure/extraction_job_helpers/mutation-examples.jsonl b/src/api/extraction/infrastructure/extraction_job_helpers/mutation-examples.jsonl new file mode 100644 index 000000000..09da88b28 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_helpers/mutation-examples.jsonl @@ -0,0 +1,4 @@ +{"op":"CREATE","type":"node","id":"adapter:abc123def4567890","label":"Adapter","set_properties":{"slug":"cl_m_wrong_nest","name":"cl-m-wrong-nest","data_source_id":"hyperfleet-e2e","transport":"maestro"}} +{"op":"UPDATE","type":"node","id":"adapter:abc123def4567890","set_properties":{"transport":"maestro","resource_types":"Cluster"}} +{"op":"CREATE","type":"edge","id":"edge:abc123def4567891","label":"tests","start_id":"componenttest:abc123def4567892","end_id":"adapter:abc123def4567890","set_properties":{"data_source_id":"hyperfleet-e2e"}} +{"op":"DELETE","type":"node","id":"adapter:deadbeefdeadbeef"} diff --git a/src/api/extraction/infrastructure/extraction_job_prompt.py b/src/api/extraction/infrastructure/extraction_job_prompt.py index 33e3dae59..be76a8f2d 100644 --- a/src/api/extraction/infrastructure/extraction_job_prompt.py +++ b/src/api/extraction/infrastructure/extraction_job_prompt.py @@ -8,11 +8,14 @@ EXTRACTION_PROMPT_FILENAME = "extraction_prompt.md" MUTATIONS_HELPER = "helpers/workload-mutations.sh" +MUTATION_EXAMPLES = "helpers/mutation-examples.jsonl" EXTRACTION_JOB_INVOKE_PROMPT = ( "You are running a Kartograph extraction job in /workspace. " f"Read {EXTRACTION_PROMPT_FILENAME}, job-context.json, and sources-index.json, then follow " - "the instructions completely. Write JSONL batches under mutations/, validate with " + "the instructions completely. Read job-context.json target_instances for graph_id and " + "properties_missing before querying the graph API. Copy JSONL shapes from " + f"{MUTATION_EXAMPLES} when writing mutations. Write JSONL batches under mutations/, validate with " f"`bash {MUTATIONS_HELPER} validate mutations/<batch>.jsonl`, then apply with " f"`bash {MUTATIONS_HELPER} apply mutations/<batch>.jsonl`. Do not finish until apply " "succeeds and mutations/result.json reports operations_applied > 0." @@ -46,10 +49,13 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: "Treat partial coverage as incomplete unless the job instructions below narrow scope.", "", "## Target entity instances", - "Process only the instances listed below. Read source files under repository-files/", - "when materialized (see job-context.json repository_files and instance property paths", - "such as config_file_path or source_path). Use the workload API to read existing graph", - "context and emit JSONL mutations for new or updated entities and relationships.", + "Process only the instances listed below. Each entry in job-context.json", + "target_instances includes graph_id (for UPDATE/DELETE) and properties_missing", + "(ontology fields still empty on the live node). Read source files under", + "repository-files/ when materialized (see job-context.json repository_files and", + "instance property paths such as config_path, config_file_path, or source_path).", + "Use the workload API for additional graph context and emit JSONL mutations for", + "new or updated entities and relationships.", "", ] ) @@ -73,9 +79,19 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: lines.extend( [ "## Repository files", - "If job-context.json repository_files.files_written is 0, report the warnings there", - "and still apply any updates supported by graph context — but prefer reading", - "repository-files/ content whenever sample_paths are listed.", + "job-context.json repository_files reports materialization status:", + "- paths_not_found lists instance-referenced paths with no JobPackage match.", + "- When paths_not_found is non-empty but files_written > 0, targeted paths missed but", + " files are available under repository-files/ (often via directory prefix matching", + " or a full-repo fallback) — search repository-files/ before concluding sources are absent.", + "If files_written is 0, report repository_files.warnings and still apply updates", + "supported by graph context when possible.", + "", + "## JSONL mutation format", + f"Copy field names and structure from `{MUTATION_EXAMPLES}` in the workspace.", + "Every line needs both op (CREATE|UPDATE|DELETE) and type (node|edge).", + "Use set_properties (not properties). UPDATE and DELETE require top-level id.", + "Existing instances must use UPDATE with graph_id from job-context.json target_instances.", "", "## Mutations workflow (required)", "This container has no Kartograph MCP tools. Use the bundled helper script:", diff --git a/src/api/extraction/infrastructure/extraction_job_repository_files.py b/src/api/extraction/infrastructure/extraction_job_repository_files.py index 6e58e001c..b96c421ec 100644 --- a/src/api/extraction/infrastructure/extraction_job_repository_files.py +++ b/src/api/extraction/infrastructure/extraction_job_repository_files.py @@ -15,6 +15,7 @@ _INSTANCE_PATH_PROPERTY_KEYS = ( "config_file_path", + "config_path", "source_path", "file_path", "repository_path", @@ -90,13 +91,17 @@ def _normalize_repository_path(path: str) -> str: def _path_matches(requested: str, candidate: str) -> bool: - normalized_requested = _normalize_repository_path(requested) + normalized_requested = _normalize_repository_path(requested).rstrip("/") normalized_candidate = _normalize_repository_path(candidate) - return ( - normalized_candidate == normalized_requested - or normalized_candidate.endswith(f"/{normalized_requested}") - or normalized_requested.endswith(normalized_candidate) - ) + if normalized_candidate == normalized_requested: + return True + if normalized_candidate.startswith(f"{normalized_requested}/"): + return True + if normalized_candidate.endswith(f"/{normalized_requested}"): + return True + if normalized_requested.endswith(normalized_candidate): + return True + return False def materialize_all_repository_files( diff --git a/src/api/extraction/infrastructure/extraction_job_runner_factory.py b/src/api/extraction/infrastructure/extraction_job_runner_factory.py index f7dda8937..69069e6ef 100644 --- a/src/api/extraction/infrastructure/extraction_job_runner_factory.py +++ b/src/api/extraction/infrastructure/extraction_job_runner_factory.py @@ -15,6 +15,15 @@ get_extraction_workload_runtime_settings, ) from extraction.ports.extraction_job_runner import IExtractionJobRunner +from infrastructure.database.connection_pool import ConnectionPool +from infrastructure.dependencies import get_age_connection_pool +from infrastructure.extraction_workload.graph_mutation_writer import GraphWorkloadGraphMutationWriter +from infrastructure.extraction_workload.graph_reader import GraphWorkloadGraphReader +from infrastructure.extraction_workload.schema_service import GraphWorkloadSchemaService +from infrastructure.extraction_workload.target_context_enricher import ( + GraphExtractionJobTargetContextEnricher, +) +from infrastructure.settings import get_database_settings from sqlalchemy.ext.asyncio import AsyncSession @@ -22,6 +31,7 @@ def create_extraction_job_runner( *, session: AsyncSession | None = None, settings: ExtractionWorkloadRuntimeSettings | None = None, + pool: ConnectionPool | None = None, ) -> IExtractionJobRunner: """Build the configured extraction job runner implementation.""" resolved = settings or get_extraction_workload_runtime_settings() @@ -35,6 +45,18 @@ def create_extraction_job_runner( ) from infrastructure.job_packages.archive_hydrator import JobPackageArchiveHydrator + resolved_pool = pool or get_age_connection_pool() + db_settings = get_database_settings() + graph_reader = GraphWorkloadGraphReader(pool=resolved_pool, settings=db_settings) + schema_service = GraphWorkloadSchemaService( + session=session, + mutation_writer=GraphWorkloadGraphMutationWriter( + pool=resolved_pool, + settings=db_settings, + session=session, + ), + graph_reader=graph_reader, + ) materializer = ExtractionJobWorkdirMaterializer( settings=resolved, prepared_job_package_reader=prepared_reader, @@ -42,6 +64,10 @@ def create_extraction_job_runner( session=session, job_package_work_dir=Path(resolved.job_package_work_dir), ), + target_context_enricher=GraphExtractionJobTargetContextEnricher( + graph_reader=graph_reader, + schema_service=schema_service, + ), ) return AgenticCiExtractionJobRunner( settings=resolved, diff --git a/src/api/extraction/infrastructure/extraction_job_workdir_layout.py b/src/api/extraction/infrastructure/extraction_job_workdir_layout.py index adf65f1b1..e282d5d69 100644 --- a/src/api/extraction/infrastructure/extraction_job_workdir_layout.py +++ b/src/api/extraction/infrastructure/extraction_job_workdir_layout.py @@ -6,6 +6,7 @@ from pathlib import Path from extraction.infrastructure.extraction_job_helpers import ( + HELPER_BUNDLE_NAMES, HELPER_SCRIPT_NAMES, HELPERS_CONTAINER_DIR, HELPERS_DIR, @@ -30,12 +31,13 @@ def prepare_agentic_ci_workspace( helpers_dir = job_root / HELPERS_CONTAINER_DIR helpers_dir.mkdir(parents=True, exist_ok=True) - for name in HELPER_SCRIPT_NAMES: + for name in HELPER_BUNDLE_NAMES: source = HELPERS_DIR / name if source.is_file(): target = helpers_dir / name shutil.copy2(source, target) - target.chmod(target.stat().st_mode | 0o111) + if name in HELPER_SCRIPT_NAMES: + target.chmod(target.stat().st_mode | 0o111) ensure_agent_workspace_permissions( job_root, diff --git a/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py index 2842195c0..969b030f1 100644 --- a/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py +++ b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py @@ -25,6 +25,7 @@ from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader from infrastructure.job_packages.archive_hydrator import JobPackageArchiveHydrator from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings +from extraction.ports.extraction_job_target_context import IExtractionJobTargetContextEnricher from extraction.ports.runtime import ScopedWorkloadCredentials @@ -38,12 +39,14 @@ def __init__( prepared_job_package_reader: SqlPreparedJobPackageReader, probe: ExtractionJobProbe | None = None, archive_hydrator: JobPackageArchiveHydrator | None = None, + target_context_enricher: IExtractionJobTargetContextEnricher | None = None, ) -> None: self._settings = settings self._prepared_job_package_reader = prepared_job_package_reader self._job_package_work_dir = Path(settings.job_package_work_dir) self._probe = probe or LoggingExtractionJobProbe() self._archive_hydrator = archive_hydrator + self._target_context_enricher = target_context_enricher async def prepare( self, @@ -115,7 +118,10 @@ async def prepare( "description": job.description, "api_base_url": self._settings.api_base_url.rstrip("/"), "workload_token": credentials.token, - "target_instances": [instance.to_dict() for instance in job.target_instances], + "target_instances": await self._build_target_instances_context( + job=job, + tenant_id=tenant_id, + ), "target_files": [target_file.to_dict() for target_file in job.target_files], "repository_files": materialization.to_dict(), } @@ -125,6 +131,23 @@ async def prepare( ) return job_root + async def _build_target_instances_context( + self, + *, + job: ExtractionJobRecord, + tenant_id: str, + ) -> list[dict]: + if not job.target_instances: + return [] + if self._target_context_enricher is None: + return [instance.to_dict() for instance in job.target_instances] + + return await self._target_context_enricher.enrich_target_instances( + tenant_id=tenant_id, + knowledge_graph_id=job.knowledge_graph_id, + instances=job.target_instances, + ) + def _materialize_repository_files( self, *, diff --git a/src/api/extraction/ports/extraction_job_target_context.py b/src/api/extraction/ports/extraction_job_target_context.py new file mode 100644 index 000000000..fcefa32bd --- /dev/null +++ b/src/api/extraction/ports/extraction_job_target_context.py @@ -0,0 +1,20 @@ +"""Port for enriching extraction job target instances before agent runs.""" + +from __future__ import annotations + +from typing import Any, Protocol + +from extraction.domain.extraction_job import ExtractionTargetInstance + + +class IExtractionJobTargetContextEnricher(Protocol): + """Resolve live graph ids and property gaps for assigned extraction targets.""" + + async def enrich_target_instances( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + instances: tuple[ExtractionTargetInstance, ...], + ) -> list[dict[str, Any]]: + """Return job-context target_instances entries with graph_id and properties_missing.""" diff --git a/src/api/infrastructure/extraction_workload/target_context_enricher.py b/src/api/infrastructure/extraction_workload/target_context_enricher.py new file mode 100644 index 000000000..2efcb9d5e --- /dev/null +++ b/src/api/infrastructure/extraction_workload/target_context_enricher.py @@ -0,0 +1,66 @@ +"""Graph-backed enrichment for extraction job target instances in job-context.json.""" + +from __future__ import annotations + +from typing import Any + +from extraction.application.extraction_job_target_context import enrich_target_instances_for_context +from extraction.domain.extraction_job import ExtractionTargetInstance +from extraction.ports.extraction_job_target_context import IExtractionJobTargetContextEnricher +from extraction.ports.workload_graph import IWorkloadGraphReader, WorkloadGraphNode +from extraction.ports.workload_schema import IWorkloadSchemaService + + +def _node_type_dicts_from_ontology(ontology: Any | None) -> list[dict[str, Any]]: + if ontology is None: + return [] + node_types = getattr(ontology, "node_types", None) or () + return [ + { + "label": str(getattr(node, "label", "") or "").strip(), + "required_properties": list(getattr(node, "required_properties", None) or ()), + "optional_properties": list(getattr(node, "optional_properties", None) or ()), + } + for node in node_types + ] + + +class GraphExtractionJobTargetContextEnricher(IExtractionJobTargetContextEnricher): + """Uses workload graph and schema services to pre-seed agent job context.""" + + def __init__( + self, + *, + graph_reader: IWorkloadGraphReader, + schema_service: IWorkloadSchemaService, + ) -> None: + self._graph_reader = graph_reader + self._schema_service = schema_service + + async def enrich_target_instances( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + instances: tuple[ExtractionTargetInstance, ...], + ) -> list[dict[str, Any]]: + if not instances: + return [] + + ontology = await self._schema_service.get_ontology(knowledge_graph_id=knowledge_graph_id) + graph_nodes_by_slug: dict[str, WorkloadGraphNode] = {} + for instance in instances: + matches = await self._graph_reader.search_by_slug( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + slug=instance.slug, + entity_type=instance.entity_type, + ) + if matches: + graph_nodes_by_slug[instance.slug] = matches[0] + + return enrich_target_instances_for_context( + instances, + graph_nodes_by_slug=graph_nodes_by_slug, + node_types=_node_type_dicts_from_ontology(ontology), + ) diff --git a/src/api/tests/unit/extraction/application/test_extraction_job_target_context.py b/src/api/tests/unit/extraction/application/test_extraction_job_target_context.py new file mode 100644 index 000000000..980054802 --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_extraction_job_target_context.py @@ -0,0 +1,68 @@ +"""Unit tests for extraction job target instance context enrichment.""" + +from __future__ import annotations + +from extraction.application.extraction_job_target_context import ( + enrich_target_instances_for_context, + missing_properties_for_instance, +) +from extraction.domain.extraction_job import ExtractionTargetInstance +from extraction.ports.workload_graph import WorkloadGraphNode + + +def test_missing_properties_for_instance_detects_empty_optional_fields() -> None: + node_types = [ + { + "label": "Adapter", + "required_properties": ["name", "slug"], + "optional_properties": ["transport", "resource_types", "description"], + } + ] + missing = missing_properties_for_instance( + entity_type="Adapter", + node_properties={ + "slug": "cl_m_wrong_nest", + "name": "cl-m-wrong-nest", + "description": "Test adapter", + "repository": "hyperfleet-e2e", + }, + node_types=node_types, + ) + + assert missing == ("resource_types", "transport") + + +def test_enrich_target_instances_for_context_adds_graph_id_and_gaps() -> None: + instances = ( + ExtractionTargetInstance( + slug="cl_m_wrong_nest", + entity_type="Adapter", + properties={"config_path": "testdata/adapter-configs/cl-m-wrong-nest"}, + ), + ) + graph_nodes = { + "cl_m_wrong_nest": WorkloadGraphNode( + id="adapter:96533bc42820e9c5", + entity_type="Adapter", + slug="cl_m_wrong_nest", + properties={ + "slug": "cl_m_wrong_nest", + "name": "cl-m-wrong-nest", + "config_path": "testdata/adapter-configs/cl-m-wrong-nest", + }, + ) + } + enriched = enrich_target_instances_for_context( + instances, + graph_nodes_by_slug=graph_nodes, + node_types=[ + { + "label": "Adapter", + "required_properties": ["name", "slug"], + "optional_properties": ["transport", "resource_types"], + } + ], + ) + + assert enriched[0]["graph_id"] == "adapter:96533bc42820e9c5" + assert enriched[0]["properties_missing"] == ["resource_types", "transport"] diff --git a/src/api/tests/unit/extraction/infrastructure/fakes/extraction_job_target_context_enricher.py b/src/api/tests/unit/extraction/infrastructure/fakes/extraction_job_target_context_enricher.py new file mode 100644 index 000000000..b26b8af05 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/fakes/extraction_job_target_context_enricher.py @@ -0,0 +1,45 @@ +"""Fake target context enricher for extraction job materializer tests.""" + +from __future__ import annotations + +from typing import Any + +from extraction.application.extraction_job_target_context import enrich_target_instances_for_context +from extraction.domain.extraction_job import ExtractionTargetInstance +from extraction.ports.extraction_job_target_context import IExtractionJobTargetContextEnricher +from extraction.ports.workload_graph import WorkloadGraphNode + + +class FakeExtractionJobTargetContextEnricher(IExtractionJobTargetContextEnricher): + """Returns deterministic graph context for one adapter slug used in unit tests.""" + + async def enrich_target_instances( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + instances: tuple[ExtractionTargetInstance, ...], + ) -> list[dict[str, Any]]: + graph_nodes_by_slug = { + "hyperfleet_e2e_cl_stuck": WorkloadGraphNode( + id="adapter:abc123def4567890", + entity_type="Adapter", + slug="hyperfleet_e2e_cl_stuck", + properties={ + "slug": "hyperfleet_e2e_cl_stuck", + "name": "cl-stuck", + "config_file_path": "testdata/adapter-configs/cl-stuck/adapter-config.yaml", + }, + ) + } + return enrich_target_instances_for_context( + instances, + graph_nodes_by_slug=graph_nodes_by_slug, + node_types=[ + { + "label": "Adapter", + "required_properties": ["name", "slug"], + "optional_properties": ["transport", "resource_types", "config_file_path"], + } + ], + ) diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py index c8db7e461..68759a926 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py @@ -39,3 +39,26 @@ def test_build_extraction_job_prompt_includes_instances_and_files() -> None: assert "Feature: feature-a" in prompt assert "repo-a/features/a.feature" in prompt assert "job-context.json" in prompt + assert "mutation-examples.jsonl" in prompt + assert "properties_missing" in prompt + assert "paths_not_found" in prompt + + +def test_build_extraction_job_prompt_mentions_graph_id_in_job_context() -> None: + job = ExtractionJobRecord( + id="job-row", + knowledge_graph_id="kg-1", + job_id="adapters_batch_0001_abcd1234", + job_set_name="Adapter Enrichment", + strategy="by_instances", + status=ExtractionJobStatus.PENDING, + order_index=0, + description="Enrich adapter transport fields.", + target_instances=( + ExtractionTargetInstance(slug="cl_m_wrong_nest", entity_type="Adapter"), + ), + ) + + prompt = build_extraction_job_prompt(job=job) + + assert "graph_id" in prompt diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_repository_files.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_repository_files.py index 2dff57187..8a95d5e78 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_repository_files.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_repository_files.py @@ -72,6 +72,20 @@ def test_collect_instance_repository_paths_reads_config_and_source_paths() -> No assert "pkg/internal/foo.go" in paths +def test_collect_instance_repository_paths_reads_config_path_directories() -> None: + paths = collect_instance_repository_paths( + ( + ExtractionTargetInstance( + slug="cl_m_wrong_nest", + entity_type="Adapter", + properties={"config_path": "testdata/adapter-configs/cl-m-wrong-nest"}, + ), + ) + ) + + assert paths == ("testdata/adapter-configs/cl-m-wrong-nest",) + + def test_materialize_all_repository_files_writes_changeset(tmp_path: Path) -> None: package_id = "01JTESTPACK0000000000000000" _build_package( @@ -127,3 +141,32 @@ def test_materialize_instance_repository_paths_targets_referenced_files(tmp_path assert ( repo_dir / "hyperfleet-e2e" / "testdata/adapter-configs/cl-stuck/adapter-config.yaml" ).is_file() + + +def test_materialize_instance_repository_paths_matches_directory_config_path_prefix( + tmp_path: Path, +) -> None: + package_id = "01JTESTPACK0000000000000003" + _build_package( + tmp_path, + package_id, + "testdata/adapter-configs/cl-m-wrong-nest/adapter-config.yaml", + b"transport: maestro\n", + ) + repo_dir = tmp_path / "repository-files" + + result = materialize_instance_repository_paths( + repository_files_dir=repo_dir, + job_package_work_dir=tmp_path, + job_packages=(_source(package_id=package_id),), + paths=("testdata/adapter-configs/cl-m-wrong-nest",), + ) + + output = ( + repo_dir + / "hyperfleet-e2e" + / "testdata/adapter-configs/cl-m-wrong-nest/adapter-config.yaml" + ) + assert result.files_written == 1 + assert result.paths_not_found == () + assert output.read_text(encoding="utf-8") == "transport: maestro\n" diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py index 02388f7a0..afc4ef1ba 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py @@ -20,6 +20,9 @@ from extraction.infrastructure.extraction_job_workdir_materializer import ExtractionJobWorkdirMaterializer from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings from extraction.ports.runtime import ScopedWorkloadCredentials +from tests.unit.extraction.infrastructure.fakes.extraction_job_target_context_enricher import ( + FakeExtractionJobTargetContextEnricher, +) from shared_kernel.job_package.builder import JobPackageBuilder from shared_kernel.job_package.value_objects import ( AdapterCheckpoint, @@ -130,6 +133,74 @@ async def test_prepare_materializes_instance_referenced_paths_and_workspace_layo assert repo_file.is_file() assert (job_root / "mutations").is_dir() assert (job_root / "helpers" / "workload-mutations.sh").is_file() + assert (job_root / "helpers" / "mutation-examples.jsonl").is_file() context = json.loads((job_root / "job-context.json").read_text(encoding="utf-8")) assert context["repository_files"]["files_written"] == 1 assert probe.observations[0].files_written == 1 + + +@pytest.mark.asyncio +async def test_prepare_enriches_target_instances_with_graph_id_and_missing_properties( + tmp_path: Path, +) -> None: + package_id = "01JTESTPACK0000000000000003" + job_packages_dir = tmp_path / "packages" + job_packages_dir.mkdir() + extraction_jobs_dir = tmp_path / "extraction_jobs" + _build_package( + job_packages_dir, + package_id, + "testdata/adapter-configs/cl-stuck/adapter-config.yaml", + b"adapter: stuck\n", + ) + package = PreparedJobPackageSource( + package_id=package_id, + data_source_id="ds-1", + data_source_name="hyperfleet-e2e", + repository_folder="hyperfleet-e2e", + ) + reader = AsyncMock() + reader.list_latest_for_knowledge_graph = AsyncMock(return_value=(package,)) + materializer = ExtractionJobWorkdirMaterializer( + settings=ExtractionWorkloadRuntimeSettings( + extraction_job_work_dir=str(extraction_jobs_dir), + job_package_work_dir=str(job_packages_dir), + ), + prepared_job_package_reader=reader, + target_context_enricher=FakeExtractionJobTargetContextEnricher(), + ) + job = ExtractionJobRecord( + id="job-row", + knowledge_graph_id="kg-1", + job_id="Adapter Deep Extraction_batch_0002_abcd1234", + job_set_name="Adapter Deep Extraction", + strategy="by_instances", + status=ExtractionJobStatus.PENDING, + order_index=0, + description="Extract adapter details.", + target_instances=( + ExtractionTargetInstance( + slug="hyperfleet_e2e_cl_stuck", + entity_type="Adapter", + properties={ + "config_file_path": "testdata/adapter-configs/cl-stuck/adapter-config.yaml", + }, + ), + ), + ) + + job_root = await materializer.prepare( + job=job, + tenant_id="tenant-1", + credentials=ScopedWorkloadCredentials( + token="tok", + expires_at=datetime.now(UTC), + scopes=("workload:chat",), + ), + ) + + context = json.loads((job_root / "job-context.json").read_text(encoding="utf-8")) + target = context["target_instances"][0] + assert target["graph_id"] == "adapter:abc123def4567890" + assert target["properties_missing"] == ["resource_types", "transport"] + assert (job_root / "helpers" / "mutation-examples.jsonl").is_file() From 16a561a13c3a4ab03c35ff88387008ac78b1362d Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 15:39:59 -0400 Subject: [PATCH 133/153] feat(extraction): add one-off mutations mode and graph writes history UI Implement GMA one-off mutations with session archiving, rename Mutation logs to Graph Writes History, fix job set labels and cost display, and add a template-driven manual mutation authoring panel with schema instance views. Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/extraction/one-off-mutations.spec.md | 60 ++++++ specs/extraction/operations.spec.md | 17 +- .../kartograph_agent_runtime/agent_prompt.py | 66 +++++-- src/agent-runtime/tests/test_agent_prompt.py | 21 ++ .../archived_extraction_history.py | 9 +- .../graph_management_session_journal.py | 21 +- .../application/schema_authoring_guide.py | 33 ++++ .../application/skill_resolution_service.py | 67 ++++++- .../domain/mutation_jsonl_metrics.py | 66 +++++++ .../extraction_job_mutation_metrics.py | 73 +------ .../sticky_session_workdir_materializer.py | 15 ++ src/api/extraction/ports/repositories.py | 7 + .../test_archived_extraction_history.py | 30 ++- .../test_graph_management_session_journal.py | 29 ++- .../test_schema_authoring_guide.py | 3 +- .../test_skill_resolution_service.py | 19 ++ .../domain/test_mutation_jsonl_metrics.py | 29 +++ ...est_sticky_session_workdir_materializer.py | 13 ++ .../GraphExtractionArchivedHistory.vue | 52 +++-- .../GraphManagementMutationAuthoringPanel.vue | 187 ++++++++++++++++++ .../pages/knowledge-graphs/[kgId]/manage.vue | 74 ++----- .../kg-graph-management-artifacts.test.ts | 16 ++ .../knowledge-graph-manage-workspace.test.ts | 129 ++++++------ src/dev-ui/app/utils/kgGraphManagement.ts | 14 +- .../app/utils/kgGraphManagementArtifacts.ts | 2 +- src/dev-ui/app/utils/kgManageState.ts | 8 +- src/dev-ui/app/utils/kgManageWorkspace.ts | 6 +- src/dev-ui/app/utils/kgManageWorkspaceHub.ts | 8 +- 28 files changed, 821 insertions(+), 253 deletions(-) create mode 100644 specs/extraction/one-off-mutations.spec.md create mode 100644 src/api/extraction/domain/mutation_jsonl_metrics.py create mode 100644 src/api/tests/unit/extraction/domain/test_mutation_jsonl_metrics.py create mode 100644 src/dev-ui/app/components/graph-management/GraphManagementMutationAuthoringPanel.vue diff --git a/specs/extraction/one-off-mutations.spec.md b/specs/extraction/one-off-mutations.spec.md new file mode 100644 index 000000000..6899d4581 --- /dev/null +++ b/specs/extraction/one-off-mutations.spec.md @@ -0,0 +1,60 @@ +# One-off Mutations (Graph Management) + +## Purpose +One-off Mutations is a Graph Management Assistant UI mode for direct schema and instance edits. The operator describes a change; the assistant validates and applies it via mutation tools. Sessions archive to Graph Writes History with token cost and applied JSONL. + +## Requirements + +### Requirement: One-off Mutations Skill Pack +The system SHALL resolve a dedicated skill pack when graph-management UI mode is `one-off-mutations`. + +#### Scenario: Skills include edit workflows +- GIVEN UI mode `one-off-mutations` +- WHEN skills are resolved for a chat turn +- THEN instance and schema edit workflow skills are primary +- AND confirmation policy for destructive operations is included + +### Requirement: Assistant Executes Edits In Session +The system SHALL implement requested schema and instance changes via Kartograph schema tools without deferring to extraction job workers. + +#### Scenario: Instance property update +- GIVEN an operator asks to update a property on an existing instance +- WHEN the assistant completes the turn +- THEN it validates and applies UPDATE JSONL mutations +- AND reports write operation counts + +#### Scenario: Schema type change +- GIVEN an operator asks to add an optional property to an entity type +- WHEN the assistant completes the turn +- THEN it saves ontology via `kartograph_save_schema_ontology` after confirmation when required + +### Requirement: JobPackage Not Required +The system SHALL NOT block one-off mutations chat on JobPackage ingestion readiness. + +#### Scenario: Chat without prepared sources +- GIVEN no JobPackages are prepared +- WHEN the operator uses one-off mutations mode +- THEN the chat turn proceeds without awaiting ingestion + +### Requirement: GMA Session Archive +The system SHALL archive each Graph Management Assistant session to Graph Writes History when chat is cleared. + +#### Scenario: Archive with writes and cost +- GIVEN a GMA session applied mutations and consumed tokens +- WHEN the operator clears chat +- THEN one ARCHIVED entry is persisted +- AND job set name reflects the UI mode (Initial Schema Design, Extraction Jobs, or One-off Mutations) + +#### Scenario: Token-only session +- GIVEN a GMA session consumed tokens but applied no graph writes +- WHEN chat is cleared +- THEN an ARCHIVED entry is still persisted with cost metrics + +### Requirement: Graph Writes History Presentation +The system SHALL present archived GMA sessions and extraction worker jobs in a unified Graph Writes History view. + +#### Scenario: Job list shows cost +- GIVEN an archived job or GMA session with cost metadata +- WHEN the operator views Graph Writes History +- THEN each entry shows write count and total cost in USD +- AND GMA sessions are distinguishable from extraction worker jobs diff --git a/specs/extraction/operations.spec.md b/specs/extraction/operations.spec.md index adb760f35..bf05ff156 100644 --- a/specs/extraction/operations.spec.md +++ b/specs/extraction/operations.spec.md @@ -37,7 +37,22 @@ The system SHALL apply graph-management UI mode overlays on top of workspace ses #### Scenario: One-off mutations overlay - GIVEN graph-management UI mode `One-off Mutations` - WHEN a chat turn resolves skills -- THEN scoped JSONL mutation authoring guidance is primary +- THEN scoped JSONL mutation authoring and schema edit guidance is primary +- AND JobPackage readiness is not required + +### Requirement: Graph Writes History +The system SHALL archive Graph Management Assistant sessions and extraction worker jobs that apply graph writes or incur assistant token cost. + +#### Scenario: GMA session job set names +- GIVEN a GMA session archived from graph management +- WHEN the job set name is recorded +- THEN it is one of: `Graph Management · Initial Schema Design`, `Graph Management · Extraction Jobs`, or `Graph Management · One-off Mutations` + +#### Scenario: Unified history view +- GIVEN archived GMA sessions and extraction jobs exist +- WHEN the operator opens Graph Writes History in the project workspace +- THEN both entry types appear grouped by run and job set +- AND each entry shows write count and total cost ### Requirement: Skill Resolution Model The system SHALL resolve agent skills using global templates with knowledge-graph overrides. diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index 29007f95e..e62254de6 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -118,6 +118,35 @@ When the operator approves, save via `kartograph_save_extraction_jobs_config`. """.strip() +_ONE_OFF_MUTATIONS_TOOLS_REFERENCE = """ +## One-off mutation tools (one-off-mutations UI mode) + +| Tool | Purpose | +|------|---------| +| `kartograph_get_schema_authoring_guide` | JSONL shapes, schema rules, one-off workflow | +| `kartograph_get_schema_ontology` | **Always read before edits** | +| `kartograph_save_schema_ontology` | Schema type/property changes (read → merge → save) | +| `kartograph_search_graph_by_slug` | Resolve existing node id for UPDATE/DELETE | +| `kartograph_check_graph_slugs` | Batch slug existence before CREATE | +| `kartograph_list_instances_by_type` | Browse instances when picking targets | +| `kartograph_list_relationship_instances` | Inspect edges before create/update/delete | +| `kartograph_validate_graph_mutations` | Dry-run inline JSONL (primary for ≤20 lines) | +| `kartograph_apply_graph_mutations` | Apply inline JSONL after validate passes | +| `kartograph_validate_graph_mutations_from_file` | Dry-run workspace `.jsonl` file | +| `kartograph_apply_graph_mutations_from_file` | Apply larger batches from workspace file | + +Copy JSONL field names from `helpers/mutation-examples.jsonl` in the workspace. + +### Workflow + +1. Classify request: schema edit, instance edit, mixed, or read-only +2. Read ontology; search/list targets +3. Validate → apply → verify +4. Report write op counts and affected slugs + +Confirm before DELETE nodes or schema removals. Route bulk enrichment to Extraction Jobs mode. +""".strip() + _TOOLS_COMPACT_REFERENCE = ( "Tools: kartograph_* schema MCP tools, plus Read/Write/Edit/Grep/Glob/Bash. " "Prepopulation: {label}.py → out/{label}_instances.json → entities_to_jsonl.py or " @@ -209,6 +238,13 @@ def _format_workspace_readiness(readiness: dict[str, Any]) -> str: _EXTRACTION_JOBS_COMPACT_SKILL_KEYS = ("per_instance_description_authoring", "job_set_contract") +_ONE_OFF_MUTATIONS_COMPACT_SKILL_KEYS = ( + "instance_edit_workflow", + "schema_edit_workflow", + "confirmation_policy", + "jsonl_shape_reference", +) + def build_agent_system_prompt( agent_configuration: dict[str, Any], @@ -243,6 +279,12 @@ def build_agent_system_prompt( for key, value in skills_dict.items() if key in _EXTRACTION_JOBS_COMPACT_SKILL_KEYS ) + elif prompt_detail == "compact" and ui_mode == "one-off-mutations": + skill_items = sorted( + (key, value) + for key, value in skills_dict.items() + if key in _ONE_OFF_MUTATIONS_COMPACT_SKILL_KEYS + ) elif prompt_detail == "full": skill_items = sorted(skills_dict.items()) else: @@ -260,12 +302,12 @@ def build_agent_system_prompt( tools_block = "" if include_tools_manifest and settings is not None and settings.workload_token.strip(): if prompt_detail == "compact": - extraction_jobs_block = ( - f"\n\n{_EXTRACTION_JOBS_TOOLS_REFERENCE}" - if ui_mode == "extraction-jobs" - else "" - ) - tools_block = f"## Tools\n\n{_TOOLS_COMPACT_REFERENCE}{extraction_jobs_block}" + mode_block = "" + if ui_mode == "extraction-jobs": + mode_block = f"\n\n{_EXTRACTION_JOBS_TOOLS_REFERENCE}" + elif ui_mode == "one-off-mutations": + mode_block = f"\n\n{_ONE_OFF_MUTATIONS_TOOLS_REFERENCE}" + tools_block = f"## Tools\n\n{_TOOLS_COMPACT_REFERENCE}{mode_block}" else: kartograph_tools = ", ".join( f"`{name}`" @@ -279,13 +321,13 @@ def build_agent_system_prompt( ) ) file_tools = ", ".join(f"`{name}`" for name in WORKSPACE_FILE_TOOL_NAMES) - extraction_jobs_block = ( - f"\n\n{_EXTRACTION_JOBS_TOOLS_REFERENCE}" - if ui_mode == "extraction-jobs" - else "" - ) + mode_block = "" + if ui_mode == "extraction-jobs": + mode_block = f"\n\n{_EXTRACTION_JOBS_TOOLS_REFERENCE}" + elif ui_mode == "one-off-mutations": + mode_block = f"\n\n{_ONE_OFF_MUTATIONS_TOOLS_REFERENCE}" tools_block = ( - f"{_TOOLS_QUICK_REFERENCE}{extraction_jobs_block}\n\n" + f"{_TOOLS_QUICK_REFERENCE}{mode_block}\n\n" f"Registered Kartograph tools: {kartograph_tools}.\n" f"Registered workspace tools: {file_tools}." ) diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index df850e71a..3d57d2250 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -135,3 +135,24 @@ def test_build_agent_system_prompt_compact_extraction_jobs_keeps_description_aut assert "IGNORE lines" in prompt assert "relationship_authoring_by_entity_type" in prompt assert "entity_type_authoring_context" in prompt + + +def test_build_agent_system_prompt_one_off_mutations_includes_tools_reference() -> None: + prompt = build_agent_system_prompt( + { + "system_prompt": "You are the Graph Management Assistant.", + "skills": { + "instance_edit_workflow": "Validate then apply.", + "schema_edit_workflow": "Read merge save ontology.", + }, + "graph_management_ui_mode": "one-off-mutations", + }, + settings=AgentRuntimeSettings( + KARTOGRAPH_WORKLOAD_TOKEN="token", + KARTOGRAPH_KNOWLEDGE_GRAPH_ID="kg-123", + ), + ) + + assert "One-off mutation tools" in prompt + assert "mutation-examples.jsonl" in prompt + assert "kartograph_validate_graph_mutations" in prompt diff --git a/src/api/extraction/application/archived_extraction_history.py b/src/api/extraction/application/archived_extraction_history.py index 222e3ba2b..49f0a3439 100644 --- a/src/api/extraction/application/archived_extraction_history.py +++ b/src/api/extraction/application/archived_extraction_history.py @@ -13,9 +13,7 @@ def archived_job_write_ops(job: ExtractionJobRecord) -> int: job.strategy == "graph_management_session" and job.applied_mutations_jsonl ): - from extraction.infrastructure.extraction_job_mutation_metrics import ( - metrics_from_mutation_jsonl, - ) + from extraction.domain.mutation_jsonl_metrics import metrics_from_mutation_jsonl return int(metrics_from_mutation_jsonl(job.applied_mutations_jsonl).get("write_ops") or 0) return job.write_ops() @@ -28,6 +26,11 @@ def serialize_archived_job(job: ExtractionJobRecord) -> dict[str, Any]: "jobSet": job.job_set_name, "writeOps": archived_job_write_ops(job), "hasMutations": bool(job.applied_mutations_jsonl), + "inputTokens": job.input_tokens, + "outputTokens": job.output_tokens, + "costUsd": job.cost_usd, + "archivedAt": job.archived_at.isoformat() if job.archived_at else None, + "strategy": job.strategy, } diff --git a/src/api/extraction/application/graph_management_session_journal.py b/src/api/extraction/application/graph_management_session_journal.py index 2c3cc5347..599f12d2c 100644 --- a/src/api/extraction/application/graph_management_session_journal.py +++ b/src/api/extraction/application/graph_management_session_journal.py @@ -9,9 +9,11 @@ from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode -from extraction.infrastructure.extraction_job_mutation_metrics import metrics_from_mutation_jsonl -from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository -from extraction.ports.repositories import IExtractionAgentSessionRepository +from extraction.domain.mutation_jsonl_metrics import metrics_from_mutation_jsonl +from extraction.ports.repositories import ( + IExtractionAgentSessionRepository, + IGraphManagementSessionArchivalRepository, +) GRAPH_MANAGEMENT_SESSION_STRATEGY = "graph_management_session" @@ -23,9 +25,9 @@ GraphManagementUiMode.ONE_OFF_MUTATIONS.value: "Graph Management · One-off Mutations", } -_JOB_SET_BY_MODE: dict[ExtractionSessionMode, str] = { - ExtractionSessionMode.SCHEMA_BOOTSTRAP: "Graph Management · Schema Design", - ExtractionSessionMode.EXTRACTION_OPERATIONS: "Graph Management · Extraction Operations", +_DEFAULT_UI_MODE_BY_SESSION_MODE: dict[ExtractionSessionMode, GraphManagementUiMode] = { + ExtractionSessionMode.SCHEMA_BOOTSTRAP: GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + ExtractionSessionMode.EXTRACTION_OPERATIONS: GraphManagementUiMode.EXTRACTION_JOBS, } _USAGE_KEYS = ( @@ -51,7 +53,10 @@ def _job_set_name_for_session(session: ExtractionAgentSession) -> str: ui_mode = str(session.runtime_context.get("graph_management_ui_mode") or "") if ui_mode in _JOB_SET_BY_UI_MODE: return _JOB_SET_BY_UI_MODE[ui_mode] - return _JOB_SET_BY_MODE.get(session.mode, "Graph Management Assistant") + default_ui_mode = _DEFAULT_UI_MODE_BY_SESSION_MODE.get(session.mode) + if default_ui_mode is not None: + return _JOB_SET_BY_UI_MODE[default_ui_mode.value] + return _JOB_SET_BY_UI_MODE[GraphManagementUiMode.INITIAL_SCHEMA_DESIGN.value] def append_applied_jsonl_to_session( @@ -93,7 +98,7 @@ def __init__( self, *, session_repository: IExtractionAgentSessionRepository, - extraction_job_repository: ExtractionJobRepository, + extraction_job_repository: IGraphManagementSessionArchivalRepository, ) -> None: self._session_repository = session_repository self._extraction_job_repository = extraction_job_repository diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 49b1f6e7a..585319584 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -223,6 +223,39 @@ - Never hand-author bulk CREATE lines in chat; use `entities_to_jsonl.py` / `relationships_to_jsonl.py`. - Create all entity nodes before relationship edges unless you are correcting data with UPDATE/DELETE. +## One-off mutations (Graph Management Assistant) + +Use this workflow when the UI mode is **one-off-mutations** — the operator asks for specific schema or instance edits and you apply them directly. + +### Decision tree + +| Request | Tool path | +|---------|-----------| +| Add/change entity or relationship **types** | Read ontology → propose delta → `kartograph_save_schema_ontology` | +| Create/update/delete **instances** | Search/list targets → JSONL → validate → apply | +| Mixed | Schema save first, then instance JSONL | + +### JSONL examples + +Bundled at `helpers/mutation-examples.jsonl` in the workspace. Canonical shapes: + +```json +{"op":"UPDATE","type":"node","id":"adapter:abc123def4567890","set_properties":{"transport":"maestro"}} +{"op":"CREATE","type":"edge","id":"edge:...","label":"tests_ct_api","start_id":"...","end_id":"...","set_properties":{"data_source_id":"manual-edit"}} +{"op":"DELETE","type":"node","id":"adapter:deadbeefdeadbeef"} +``` + +Rules: both `op` and `type` on every line; `set_properties` not `properties`; UPDATE/DELETE need top-level `id`. + +### Workflow + +1. `kartograph_get_schema_ontology` — always before edits +2. Resolve targets: `kartograph_search_graph_by_slug`, `kartograph_list_instances_by_type` +3. `kartograph_validate_graph_mutations` → `kartograph_apply_graph_mutations` (≤20 lines) or apply-from-file +4. Verify with list/search; report write op counts + +Confirm before DELETE nodes or schema removals. Do not use prepopulation scanners unless the operator explicitly requests bulk import. + ## Readiness checklist - Every `prepopulated=true` entity type needs ≥1 live instance. diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index 3862e11a1..d5f89747d 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -269,12 +269,68 @@ class ResolvedExtractionSkillPack: }, GraphManagementUiMode.ONE_OFF_MUTATIONS: { "ui_mode_framing": ( - "Focus on scoped one-off graph mutations with mutation-log auditability." + "Primary mode: scoped one-off graph edits executed by you in this chat. " + "Implement schema changes via kartograph_save_schema_ontology and instance changes " + "via validate-then-apply JSONL mutation tools. Do not defer to extraction jobs or " + "bulk prepopulation scanners unless the operator explicitly asks." + ), + "intake_and_classification": ( + "Classify each request: (A) schema edit — node_types/edge_types/properties; " + "(B) instance edit — CREATE/UPDATE/DELETE nodes or edges; (C) mixed — schema first " + "then instances; (D) read-only — search/list/explain. Ask at most one clarifying " + "question when slug, id, or property target is ambiguous." + ), + "schema_edit_workflow": ( + "Schema edits: (1) kartograph_get_schema_ontology — read current state; " + "(2) propose delta in chat; (3) confirm unless operator said apply/save; " + "(4) kartograph_save_schema_ontology with full merged payload; " + "(5) read-back verify. Never use JSONL DEFINE for ontology — save tool only. " + "Unique edge_types labels; one primary direction per label." + ), + "instance_edit_workflow": ( + "Instance edits: (1) kartograph_get_schema_ontology; " + "(2) kartograph_search_graph_by_slug or kartograph_list_instances_by_type; " + "(3) prefer UPDATE for existing slugs, CREATE only for new; " + "(4) kartograph_validate_graph_mutations then kartograph_apply_graph_mutations " + "for small batches (≤20 lines), or apply-from-file for larger; " + "(5) verify affected slugs. Copy JSONL shapes from helpers/mutation-examples.jsonl." + ), + "jsonl_shape_reference": ( + "Every JSONL line needs op (CREATE|UPDATE|DELETE) and type (node|edge). " + "Use set_properties (not properties). UPDATE/DELETE require top-level id. " + "CREATE nodes need label, id, set_properties with slug, name, data_source_id." + ), + "confirmation_policy": ( + "Auto-apply after validate when operator said apply/fix/update or change is a single " + "non-destructive UPDATE. Confirm before DELETE nodes, bulk CREATE (>5 lines), or " + "schema type removal. Never apply without validate passing." + ), + "session_reporting": ( + "After successful apply, report operation counts, affected slugs/labels, and any " + "follow-up needed. End with write op summary." ), }, } +_ONE_OFF_MUTATIONS_SYSTEM_PROMPT = ( + "You are the Graph Management Assistant in One-off Mutations mode. " + "The operator requests specific schema or instance changes; you implement them yourself " + "using Kartograph schema tools with validate-then-apply mutation workflow. " + "Every write must be auditable via mutation tools — do not instruct manual JSONL entry." +) + +_ONE_OFF_MUTATIONS_GUARDRAILS: tuple[str, ...] = ( + "Implement requested edits in-session via kartograph_* tools; do not ask the operator to paste JSONL manually.", + "Validate before every apply; report validation errors verbatim.", + "Prefer UPDATE over CREATE for existing slugs; strict CREATE rejects duplicate ids/slugs.", + "Do not start prepopulation scanners or run_scanner.py unless the operator explicitly requests bulk import.", + "DELETE on nodes requires explicit operator confirmation (cascades connected edges).", + "Schema saves require confirmation unless the operator explicitly approved.", + "Route bulk enrichment requests to Extraction Jobs mode; route greenfield ontology design to Initial Schema Design.", +) + + class ExtractionSkillResolutionService: """Resolve session skills from global templates + KG overrides.""" @@ -326,9 +382,14 @@ async def resolve_for_graph_management_turn( overlay = dict(_UI_MODE_SKILL_OVERLAYS.get(ui_mode, {})) merged_skills = dict(base.skills) merged_skills.update(overlay) + guardrails = base.guardrails + system_prompt = base.system_prompt + if ui_mode == GraphManagementUiMode.ONE_OFF_MUTATIONS: + system_prompt = _ONE_OFF_MUTATIONS_SYSTEM_PROMPT + guardrails = base.guardrails + _ONE_OFF_MUTATIONS_GUARDRAILS return ResolvedExtractionSkillPack( - system_prompt=base.system_prompt, + system_prompt=system_prompt, prompt_hierarchy=base.prompt_hierarchy, - guardrails=base.guardrails, + guardrails=guardrails, skills=merged_skills, ) diff --git a/src/api/extraction/domain/mutation_jsonl_metrics.py b/src/api/extraction/domain/mutation_jsonl_metrics.py new file mode 100644 index 000000000..5d7665d31 --- /dev/null +++ b/src/api/extraction/domain/mutation_jsonl_metrics.py @@ -0,0 +1,66 @@ +"""Count graph write operations from applied mutation JSONL.""" + +from __future__ import annotations + +import json + + +def metrics_from_mutation_jsonl(jsonl_content: str) -> dict[str, int]: + """Count instance CREATE/UPDATE/DELETE operations; ignore schema DEFINE operations.""" + entities_created = 0 + entities_modified = 0 + entities_deleted = 0 + relationships_created = 0 + relationships_modified = 0 + relationships_deleted = 0 + + for raw_line in jsonl_content.splitlines(): + line = raw_line.strip() + if not line: + continue + try: + row = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(row, dict): + continue + + op = str(row.get("op") or "").upper() + entity_type = str(row.get("type") or "").lower() + if op == "DEFINE": + continue + if op not in {"CREATE", "UPDATE", "DELETE"}: + continue + + if entity_type == "node": + if op == "CREATE": + entities_created += 1 + elif op == "UPDATE": + entities_modified += 1 + else: + entities_deleted += 1 + elif entity_type == "edge": + if op == "CREATE": + relationships_created += 1 + elif op == "UPDATE": + relationships_modified += 1 + else: + relationships_deleted += 1 + + write_ops = ( + entities_created + + entities_modified + + entities_deleted + + relationships_created + + relationships_modified + + relationships_deleted + ) + return { + "entities_created": entities_created, + "entities_modified": entities_modified, + "entities_deleted": entities_deleted, + "relationships_created": relationships_created, + "relationships_modified": relationships_modified, + "relationships_deleted": relationships_deleted, + "write_ops": write_ops, + } diff --git a/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py index 0b421650e..f3e1e3846 100644 --- a/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py +++ b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py @@ -2,76 +2,15 @@ from __future__ import annotations -import json from pathlib import Path -from typing import Any -from graph.domain.value_objects import EntityType, MutationOperationType +from extraction.domain.mutation_jsonl_metrics import metrics_from_mutation_jsonl - -def metrics_from_mutation_jsonl(jsonl_content: str) -> dict[str, int]: - """Count instance CREATE/UPDATE operations; ignore schema DEFINE operations.""" - entities_created = 0 - entities_modified = 0 - entities_deleted = 0 - relationships_created = 0 - relationships_modified = 0 - relationships_deleted = 0 - - for raw_line in jsonl_content.splitlines(): - line = raw_line.strip() - if not line: - continue - try: - row = json.loads(line) - except json.JSONDecodeError: - continue - if not isinstance(row, dict): - continue - - op = str(row.get("op") or "").upper() - entity_type = str(row.get("type") or "").lower() - if op == MutationOperationType.DEFINE.value: - continue - if op not in { - MutationOperationType.CREATE.value, - MutationOperationType.UPDATE.value, - MutationOperationType.DELETE.value, - }: - continue - - if entity_type == EntityType.NODE.value: - if op == MutationOperationType.CREATE.value: - entities_created += 1 - elif op == MutationOperationType.UPDATE.value: - entities_modified += 1 - else: - entities_deleted += 1 - elif entity_type == EntityType.EDGE.value: - if op == MutationOperationType.CREATE.value: - relationships_created += 1 - elif op == MutationOperationType.UPDATE.value: - relationships_modified += 1 - else: - relationships_deleted += 1 - - write_ops = ( - entities_created - + entities_modified - + entities_deleted - + relationships_created - + relationships_modified - + relationships_deleted - ) - return { - "entities_created": entities_created, - "entities_modified": entities_modified, - "entities_deleted": entities_deleted, - "relationships_created": relationships_created, - "relationships_modified": relationships_modified, - "relationships_deleted": relationships_deleted, - "write_ops": write_ops, - } +__all__ = [ + "applied_mutation_jsonl_from_workdir", + "metrics_from_mutation_jsonl", + "metrics_from_mutation_workdir", +] def metrics_from_mutation_workdir(job_root: Path) -> dict[str, int]: diff --git a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py index f45d60a88..aca0a857d 100644 --- a/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py +++ b/src/api/extraction/infrastructure/sticky_session_workdir_materializer.py @@ -8,6 +8,10 @@ import zipfile from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from extraction.infrastructure.extraction_job_helpers import ( + HELPER_BUNDLE_NAMES, + HELPERS_DIR, +) from extraction.infrastructure.instance_generator_templates import ( EXAMPLES_DIR, EXAMPLE_SCANNER_NAMES, @@ -110,6 +114,7 @@ def prepare( marker = session_root / "knowledge-graph-id" marker.write_text(knowledge_graph_id, encoding="utf-8") self._materialize_instance_generators(session_root) + self._materialize_mutation_helpers(session_root) self._write_workspace_index( session_root=session_root, knowledge_graph_id=knowledge_graph_id, @@ -139,6 +144,16 @@ def _materialize_instance_generators(session_root: Path) -> None: shutil.copy2(source, examples_target / name) (target_dir / "out").mkdir(parents=True, exist_ok=True) + @staticmethod + def _materialize_mutation_helpers(session_root: Path) -> None: + """Copy JSONL mutation examples for one-off graph edits.""" + helpers_dir = session_root / "helpers" + helpers_dir.mkdir(parents=True, exist_ok=True) + for name in HELPER_BUNDLE_NAMES: + source = HELPERS_DIR / name + if source.is_file(): + shutil.copy2(source, helpers_dir / name) + @staticmethod def _extension_counts(root: Path) -> dict[str, int]: """Summarize file extensions under one materialized repository folder.""" diff --git a/src/api/extraction/ports/repositories.py b/src/api/extraction/ports/repositories.py index 03c902fed..69020a98c 100644 --- a/src/api/extraction/ports/repositories.py +++ b/src/api/extraction/ports/repositories.py @@ -5,6 +5,7 @@ from typing import Protocol from extraction.domain.entities.agent_session import ExtractionAgentSession +from extraction.domain.extraction_job import ExtractionJobRecord from extraction.domain.value_objects import ExtractionSessionMode, ExtractionSessionRunMetric @@ -50,3 +51,9 @@ async def get_overrides_for_knowledge_graph( mode: ExtractionSessionMode, ) -> dict[str, str]: ... + +class IGraphManagementSessionArchivalRepository(Protocol): + """Persist archived Graph Management Assistant session write history.""" + + async def insert_archived_session_job(self, job: ExtractionJobRecord) -> None: ... + diff --git a/src/api/tests/unit/extraction/application/test_archived_extraction_history.py b/src/api/tests/unit/extraction/application/test_archived_extraction_history.py index a00684ea8..2ecec28bd 100644 --- a/src/api/tests/unit/extraction/application/test_archived_extraction_history.py +++ b/src/api/tests/unit/extraction/application/test_archived_extraction_history.py @@ -4,7 +4,10 @@ from datetime import UTC, datetime -from extraction.application.archived_extraction_history import group_archived_jobs_by_run_and_set +from extraction.application.archived_extraction_history import ( + group_archived_jobs_by_run_and_set, + serialize_archived_job, +) from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus @@ -45,3 +48,28 @@ def test_group_archived_jobs_by_run_and_set() -> None: assert grouped[0]["jobCount"] == 2 assert grouped[0]["jobSets"][0]["jobSet"] == "Adapter Deep Extraction" assert grouped[1]["jobCount"] == 1 + + +def test_serialize_archived_job_includes_camel_case_metrics() -> None: + job = ExtractionJobRecord( + id="01JOB", + knowledge_graph_id="01KG", + job_id="gma-session-1", + job_set_name="Graph Management · One-off Mutations", + strategy="graph_management_session", + status=ExtractionJobStatus.ARCHIVED, + order_index=0, + description="", + input_tokens=1200, + output_tokens=400, + cost_usd=0.45, + entities_modified=2, + applied_mutations_jsonl='{"op":"UPDATE","type":"node","id":"adapter:abc"}', + ) + + payload = serialize_archived_job(job) + + assert payload["inputTokens"] == 1200 + assert payload["outputTokens"] == 400 + assert payload["costUsd"] == 0.45 + assert payload["strategy"] == "graph_management_session" diff --git a/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py b/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py index 721593b20..811ab4c0c 100644 --- a/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py +++ b/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py @@ -89,7 +89,7 @@ async def test_archive_session_mutations_creates_archived_job() -> None: assert job.job_id == "gma-session-2" assert job.strategy == "graph_management_session" assert job.applied_mutations_jsonl - assert "Graph Management · Schema Design" in job.job_set_name + assert "Graph Management · Initial Schema Design" in job.job_set_name def test_append_turn_usage_to_session_accumulates_tokens() -> None: @@ -169,6 +169,33 @@ async def test_archive_session_mutations_includes_tokens_and_initial_schema_labe assert job.job_set_name == "Graph Management · Initial Schema Design" +@pytest.mark.asyncio +async def test_archive_session_mutations_uses_one_off_mutations_job_set() -> None: + session_repo = _InMemorySessionRepository() + job_repo = _InMemoryJobRepository() + service = GraphManagementSessionJournalService( + session_repository=session_repo, + extraction_job_repository=job_repo, + ) + session = ExtractionAgentSession( + id="session-6", + user_id="user-1", + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + created_at=datetime(2026, 6, 5, tzinfo=UTC), + ) + session.runtime_context["graph_management_ui_mode"] = GraphManagementUiMode.ONE_OFF_MUTATIONS.value + append_turn_usage_to_session( + session, + usage={"input_tokens": 100, "output_tokens": 50, "cost_usd": 0.02}, + ) + + await service.archive_session_mutations(session) + + assert len(job_repo.inserted) == 1 + assert job_repo.inserted[0].job_set_name == "Graph Management · One-off Mutations" + + @pytest.mark.asyncio async def test_archive_session_mutations_token_only_session() -> None: session_repo = _InMemorySessionRepository() diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py index 856009cfe..ea8809878 100644 --- a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -18,7 +18,8 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: assert "approved_at" in SCHEMA_AUTHORING_GUIDE assert "500/503" in SCHEMA_AUTHORING_GUIDE assert "smoke-test" in SCHEMA_AUTHORING_GUIDE.lower() or "smoke test" in SCHEMA_AUTHORING_GUIDE.lower() - assert "run_scanner.py" in SCHEMA_AUTHORING_GUIDE + assert "## One-off mutations" in SCHEMA_AUTHORING_GUIDE + assert "mutation-examples.jsonl" in SCHEMA_AUTHORING_GUIDE assert "next_action" in SCHEMA_AUTHORING_GUIDE assert "## Relationship types" in SCHEMA_AUTHORING_GUIDE assert "one row per primary relationship label" in SCHEMA_AUTHORING_GUIDE diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index 6c1474159..dddcffa9f 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -119,3 +119,22 @@ async def test_override_merge_is_deterministic(self): ) assert first.skills == second.skills + + async def test_one_off_mutations_ui_mode_overlay(self) -> None: + from extraction.domain.value_objects import GraphManagementUiMode + + service = ExtractionSkillResolutionService( + override_repository=_InMemorySkillOverrideRepository() + ) + + resolved = await service.resolve_for_graph_management_turn( + knowledge_graph_id="kg-1", + mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.ONE_OFF_MUTATIONS, + ) + + assert "One-off Mutations mode" in resolved.system_prompt + assert "instance_edit_workflow" in resolved.skills + assert "schema_edit_workflow" in resolved.skills + assert "confirmation_policy" in resolved.skills + assert any("DELETE on nodes" in item for item in resolved.guardrails) diff --git a/src/api/tests/unit/extraction/domain/test_mutation_jsonl_metrics.py b/src/api/tests/unit/extraction/domain/test_mutation_jsonl_metrics.py new file mode 100644 index 000000000..9d6e9e45a --- /dev/null +++ b/src/api/tests/unit/extraction/domain/test_mutation_jsonl_metrics.py @@ -0,0 +1,29 @@ +"""Unit tests for mutation JSONL metrics parsing.""" + +from __future__ import annotations + +from extraction.domain.mutation_jsonl_metrics import metrics_from_mutation_jsonl + + +def test_metrics_from_mutation_jsonl_counts_instance_operations() -> None: + jsonl = "\n".join( + [ + '{"op":"DEFINE","type":"node","label":"service"}', + '{"op":"CREATE","type":"node","id":"service:abc","label":"service"}', + '{"op":"UPDATE","type":"node","id":"service:abc","set_properties":{"name":"api"}}', + '{"op":"DELETE","type":"node","id":"service:old"}', + '{"op":"CREATE","type":"edge","id":"edge:1","label":"calls"}', + '{"op":"UPDATE","type":"edge","id":"edge:1","set_properties":{"weight":2}}', + '{"op":"DELETE","type":"edge","id":"edge:old"}', + ] + ) + + metrics = metrics_from_mutation_jsonl(jsonl) + + assert metrics["entities_created"] == 1 + assert metrics["entities_modified"] == 1 + assert metrics["entities_deleted"] == 1 + assert metrics["relationships_created"] == 1 + assert metrics["relationships_modified"] == 1 + assert metrics["relationships_deleted"] == 1 + assert metrics["write_ops"] == 6 diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index 5f402db71..207179a9f 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -184,3 +184,16 @@ def test_materializer_copies_instance_generator_templates(tmp_path: Path) -> Non readme = (generators_dir / "README.md").read_text(encoding="utf-8") assert "repository-files" in readme assert (generators_dir / "out").is_dir() + + +def test_materializer_copies_mutation_helper_examples(tmp_path: Path) -> None: + materializer = StickySessionWorkdirMaterializer(job_package_work_dir=tmp_path) + + session_root = materializer.prepare( + session_id="session-helpers", + knowledge_graph_id="kg-1", + job_packages=(), + ) + + assert (session_root / "helpers" / "mutation-examples.jsonl").is_file() + assert (session_root / "helpers" / "workload-mutations.sh").is_file() diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue b/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue index 42a278b05..d425938d3 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue @@ -16,6 +16,7 @@ interface ArchivedJob { jobId: string jobSet: string status: string + strategy: string workerId: string | null startedAt: string | null completedAt: string | null @@ -82,7 +83,7 @@ async function loadHistory() { selectedJobId.value = payload.value?.runs[0]?.jobSets[0]?.jobs[0]?.jobId ?? null await loadSelectedMutations() } catch (e: unknown) { - error.value = e instanceof Error ? e.message : 'Failed to load archived extraction history' + error.value = e instanceof Error ? e.message : 'Failed to load graph writes history' payload.value = null } finally { loading.value = false @@ -128,8 +129,19 @@ function formatWhen(value: string | null | undefined): string { return new Date(value).toLocaleString() } -function formatCompactNumber(value: number): string { - return new Intl.NumberFormat(undefined, { notation: 'compact', maximumFractionDigits: 1 }).format(value) +function formatCost(value: number | null | undefined): string { + const amount = Number(value ?? 0) + if (!Number.isFinite(amount) || amount <= 0) return '$0.00' + if (amount < 0.01) return `$${amount.toFixed(4)}` + return `$${amount.toFixed(2)}` +} + +function jobKindLabel(job: ArchivedJob): string { + return job.strategy === 'graph_management_session' ? 'GMA session' : 'Extraction job' +} + +function jobKindVariant(job: ArchivedJob): 'secondary' | 'outline' { + return job.strategy === 'graph_management_session' ? 'secondary' : 'outline' } watch( @@ -144,23 +156,25 @@ watch( <CardHeader> <CardTitle class="flex items-center gap-2 text-base"> <Archive class="size-4" /> - Extraction archive + Graph Writes History </CardTitle> <CardDescription> - Permanent history of extraction jobs that applied graph writes, grouped by run and job set. + Permanent history of graph writes from Graph Management Assistant sessions and extraction + worker jobs, grouped by run and job set. </CardDescription> </CardHeader> <CardContent class="grid gap-4 xl:grid-cols-[260px_220px_minmax(0,1fr)]"> <div v-if="loading" class="col-span-full flex items-center gap-2 text-sm text-muted-foreground"> <Loader2 class="size-4 animate-spin" /> - Loading archived extraction jobs... + Loading graph writes history... </div> <div v-else-if="error" class="col-span-full text-sm text-destructive"> {{ error }} <Button class="mt-2" size="sm" variant="outline" @click="loadHistory">Retry</Button> </div> <div v-else-if="!payload?.runs.length" class="col-span-full text-sm text-muted-foreground"> - No archived extraction jobs yet. Jobs that apply graph writes are archived automatically. + No archived graph writes yet. GMA sessions and extraction jobs that apply writes or incur + assistant cost are archived automatically when sessions end or jobs complete. </div> <template v-else> <div class="rounded border"> @@ -179,7 +193,7 @@ watch( > <p class="font-medium">{{ formatWhen(run.runStartedAt) }}</p> <p class="text-[10px] text-muted-foreground"> - {{ run.jobCount }} jobs · {{ run.writeOps }} writes · ${{ run.costUsd.toFixed(4) }} + {{ run.jobCount }} entries · {{ run.writeOps }} writes · {{ formatCost(run.costUsd) }} </p> </button> </div> @@ -198,8 +212,8 @@ watch( :class="index === selectedJobSetIndex ? 'border-primary bg-primary/5' : 'border-transparent'" @click="selectJobSet(index)" > - <span class="font-medium">{{ set.jobSet }}</span> - <ChevronRight class="size-3 text-muted-foreground" /> + <span class="font-medium leading-snug">{{ set.jobSet }}</span> + <ChevronRight class="size-3 shrink-0 text-muted-foreground" /> </button> </div> </div> @@ -218,9 +232,14 @@ watch( :class="job.jobId === selectedJobId ? 'border-primary bg-primary/5' : 'border-transparent'" @click="selectJob(job.jobId)" > - <p class="font-mono">{{ job.jobId }}</p> - <p class="text-[10px] text-muted-foreground"> - {{ job.writeOps }} writes · {{ formatCompactNumber(job.inputTokens) }}/{{ formatCompactNumber(job.outputTokens) }} tokens + <div class="flex items-center gap-2"> + <Badge :variant="jobKindVariant(job)" class="text-[9px] px-1 py-0"> + {{ jobKindLabel(job) }} + </Badge> + <p class="truncate font-mono">{{ job.jobId }}</p> + </div> + <p class="mt-1 text-[10px] text-muted-foreground"> + {{ job.writeOps }} writes · {{ formatCost(job.costUsd) }} </p> </button> </div> @@ -229,6 +248,7 @@ watch( <div v-if="selectedJob" class="rounded border p-3 text-xs"> <div class="flex flex-wrap items-center gap-2"> <Badge variant="outline">{{ selectedJob.status }}</Badge> + <Badge :variant="jobKindVariant(selectedJob)">{{ jobKindLabel(selectedJob) }}</Badge> <span v-if="selectedJob.workerId" class="font-mono text-muted-foreground">{{ selectedJob.workerId }}</span> </div> <Separator class="my-2" /> @@ -237,7 +257,9 @@ watch( <p>{{ selectedJob.entitiesModified }} entities modified</p> <p>{{ selectedJob.relationshipsCreated }} relationships created</p> <p>{{ selectedJob.relationshipsModified }} relationships modified</p> - <p class="font-medium text-foreground sm:col-span-2">{{ selectedJob.writeOps }} total write ops</p> + <p class="font-medium text-foreground sm:col-span-2"> + {{ selectedJob.writeOps }} total write ops · {{ formatCost(selectedJob.costUsd) }} + </p> </div> </div> @@ -254,7 +276,7 @@ watch( class="max-h-64 overflow-auto p-3 font-mono text-[10px] leading-relaxed whitespace-pre-wrap break-all" >{{ mutationJsonl }}</pre> <p v-else class="px-3 py-4 text-xs text-muted-foreground"> - No stored mutation JSONL for this job. + No stored mutation JSONL for this entry (token-only GMA session or no graph writes). </p> </div> </div> diff --git a/src/dev-ui/app/components/graph-management/GraphManagementMutationAuthoringPanel.vue b/src/dev-ui/app/components/graph-management/GraphManagementMutationAuthoringPanel.vue new file mode 100644 index 000000000..fa98309f6 --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphManagementMutationAuthoringPanel.vue @@ -0,0 +1,187 @@ +<script setup lang="ts"> +import { ref } from 'vue' +import { toast } from 'vue-sonner' +import { + BookOpen, + GitBranch, + Loader2, + PencilRuler, + Play, + Plus, + RefreshCw, + Trash2, +} from 'lucide-vue-next' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Separator } from '@/components/ui/separator' +import MutationTemplates from '@/components/graph/MutationTemplates.vue' +import { useGraphApi } from '@/composables/api/useGraphApi' +import { generateHexId } from '@/utils/mutationParser' +import { getMergedEditorContent } from '@/utils/mutationConsole' + +const props = defineProps<{ + kgId: string +}>() + +const emit = defineEmits<{ + applied: [] +}>() + +const graphApi = useGraphApi() +const jsonlContent = ref('') +const applying = ref(false) +const applyError = ref<string | null>(null) + +const quickStartTemplates = [ + { + name: 'Create a Node', + description: 'Define a type and create a node', + icon: Plus, + content: () => [ + '{"op": "DEFINE", "type": "node", "label": "person", "description": "A person entity", "required_properties": ["name"]}', + `{"op": "CREATE", "type": "node", "label": "person", "id": "person:${generateHexId()}", "set_properties": {"name": "Alice", "slug": "alice", "data_source_id": "dev-ui", "source_path": "manual"}}`, + ].join('\n'), + }, + { + name: 'Create an Edge', + description: 'Define a relationship and connect nodes', + icon: GitBranch, + content: () => [ + '{"op": "DEFINE", "type": "edge", "label": "knows", "description": "Two entities know each other", "required_properties": []}', + `{"op": "CREATE", "type": "edge", "label": "knows", "id": "knows:${generateHexId()}", "start_id": "person:a1b2c3d4e5f67890", "end_id": "person:f6e5d4c3b2a10987", "set_properties": {"data_source_id": "dev-ui", "source_path": "manual"}}`, + ].join('\n'), + }, + { + name: 'Update Properties', + description: 'Modify properties on an existing entity', + icon: RefreshCw, + content: () => + '{"op": "UPDATE", "type": "node", "id": "person:a1b2c3d4e5f67890", "set_properties": {"email": "alice@example.com"}}', + }, + { + name: 'Delete an Entity', + description: 'Remove a node or edge from the graph', + icon: Trash2, + content: () => '{"op": "DELETE", "type": "node", "id": "person:a1b2c3d4e5f67890"}', + }, +] as const + +function insertTemplate(content: string) { + jsonlContent.value = getMergedEditorContent(jsonlContent.value, content) + applyError.value = null +} + +function clearEditor() { + jsonlContent.value = '' + applyError.value = null +} + +async function applyMutations() { + const body = jsonlContent.value.trim() + if (!body) { + applyError.value = 'Add one or more JSONL mutation operations first.' + return + } + + applying.value = true + applyError.value = null + try { + await graphApi.applyMutations(props.kgId, body) + toast.success('Mutations applied') + jsonlContent.value = '' + emit('applied') + } catch (err) { + applyError.value = err instanceof Error ? err.message : 'Failed to apply mutations' + toast.error('Failed to apply mutations', { description: applyError.value }) + } finally { + applying.value = false + } +} +</script> + +<template> + <Card> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base"> + <PencilRuler class="size-4" /> + Mutation Authoring + </CardTitle> + <CardDescription> + Compose JSONL mutations yourself — independent from the assistant chat above. Use templates + to populate the editor, then apply directly to this knowledge graph. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div class="grid gap-3 sm:grid-cols-2 xl:grid-cols-4"> + <button + v-for="template in quickStartTemplates" + :key="template.name" + type="button" + class="rounded-lg border bg-card p-3 text-left transition-colors hover:bg-muted/50 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" + @click="insertTemplate(template.content())" + > + <div class="flex items-center gap-2"> + <component :is="template.icon" class="size-4 shrink-0 text-primary" /> + <span class="text-sm font-medium">{{ template.name }}</span> + </div> + <p class="mt-1 text-xs text-muted-foreground">{{ template.description }}</p> + </button> + </div> + + <div class="grid gap-4 xl:grid-cols-[minmax(0,1fr)_17.5rem]"> + <div class="space-y-3 rounded-lg border bg-muted/20 p-3"> + <div class="flex items-center justify-between gap-2"> + <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground"> + JSONL editor + </p> + <Button + size="sm" + variant="ghost" + class="h-7 px-2 text-xs" + :disabled="!jsonlContent.trim() || applying" + @click="clearEditor" + > + <Trash2 class="mr-1 size-3" /> + Clear + </Button> + </div> + <textarea + v-model="jsonlContent" + class="min-h-56 w-full rounded-md border border-input bg-background px-3 py-2 font-mono text-xs leading-relaxed shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" + placeholder='{"op":"UPDATE","type":"node","id":"adapter:abc123","set_properties":{"transport":"maestro"}}' + /> + <div class="flex flex-wrap items-center gap-2"> + <Button size="sm" :disabled="applying || !jsonlContent.trim()" @click="applyMutations"> + <Loader2 v-if="applying" class="mr-1.5 size-3.5 animate-spin" /> + <Play v-else class="mr-1.5 size-3.5" /> + Apply mutations + </Button> + <span class="text-xs text-muted-foreground"> + Applies immediately — not tracked in the assistant session journal. + </span> + </div> + <p v-if="applyError" class="text-xs text-destructive"> + {{ applyError }} + </p> + </div> + + <div class="rounded-lg border bg-card p-3"> + <p class="mb-3 flex items-center gap-2 text-xs font-medium uppercase tracking-wide text-muted-foreground"> + <BookOpen class="size-3.5" /> + Templates + </p> + <div class="max-h-[min(28rem,60dvh)] overflow-y-auto overscroll-contain pr-1"> + <MutationTemplates @insert="insertTemplate" /> + </div> + </div> + </div> + + <Separator /> + + <p class="text-xs text-muted-foreground"> + Tip: click any template to append its JSONL into the editor. Fresh IDs are generated for + create operations each time you insert. + </p> + </CardContent> + </Card> +</template> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index e5988b0ec..7f9ec96e2 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -48,6 +48,7 @@ import GraphDesignEntitiesPanel from '@/components/graph-management/GraphDesignE import GraphDesignRelationshipsPanel from '@/components/graph-management/GraphDesignRelationshipsPanel.vue' import GraphExtractionJobsWorkspace from '@/components/graph-management/GraphExtractionJobsWorkspace.vue' import GraphExtractionArchivedHistory from '@/components/graph-management/GraphExtractionArchivedHistory.vue' +import GraphManagementMutationAuthoringPanel from '@/components/graph-management/GraphManagementMutationAuthoringPanel.vue' import { GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS, GRAPH_MANAGEMENT_MODE_LABELS, @@ -108,7 +109,6 @@ import { } from '@/utils/kgMutationLogs' import { streamExtractionChatTurn, streamRuntimeWarmup } from '@/utils/kgExtractionChat' import { applyThinkingRecentUpdate } from '@/utils/thinkingActivityLines' -import { useGraphApi } from '@/composables/api/useGraphApi' import type { DesignArtifactsResponse } from '@/utils/kgDesignArtifacts' const runtimeConfig = useRuntimeConfig() @@ -198,7 +198,6 @@ const route = useRoute() const { hasTenant, tenantVersion } = useTenant() const { extractErrorMessage } = useErrorHandler() const { apiFetch, currentTenantId } = useApiClient() -const graphApi = useGraphApi() const kgId = computed(() => String(route.params.kgId ?? '')) const kgIdentity = ref<KnowledgeGraphIdentity | null>(null) const dataSourceCount = ref(0) @@ -251,9 +250,6 @@ const selectedInlineRunId = ref<string | null>(null) const inlineRunLogs = ref<string[]>([]) const inlineRunLogsLoading = ref(false) const inlineRunLogsError = ref<string | null>(null) -const inlineMutationJsonl = ref('') -const inlineMutationApplying = ref(false) -const inlineMutationApplyError = ref<string | null>(null) const designArtifactsReloadNonce = ref(0) const designArtifactsRefreshing = ref(false) @@ -403,7 +399,7 @@ const graphManagementChatDescription = computed(() => { return 'Define extraction job sets with per-instance descriptions, review ontology schema, and run parallel extraction workers for this knowledge graph.' } if (graphManagementMode.value === 'one-off-mutations') { - return 'Author and apply one-off graph mutations scoped to this knowledge graph. Use the assistant below for mutation guidance and workspace context.' + return 'Ask the assistant to change schema types or specific instances — it validates and applies mutations directly. Use manual JSONL below only for power-user overrides.' } return 'Design and refine schema readiness, validation, and bootstrap transition for this knowledge graph. Use the assistant below to prepare workspace artifacts.' }) @@ -744,26 +740,6 @@ async function loadInlineRunLogs(runId: string) { } } -async function applyInlineMutations() { - if (!kgId.value || inlineMutationJsonl.value.trim().length === 0) { - inlineMutationApplyError.value = 'Add one or more JSONL mutation operations first.' - return - } - inlineMutationApplying.value = true - inlineMutationApplyError.value = null - try { - await graphApi.applyMutations(kgId.value, inlineMutationJsonl.value.trim()) - toast.success('Mutations applied') - inlineMutationJsonl.value = '' - await loadMutationLogRuns() - } catch (err) { - inlineMutationApplyError.value = extractErrorMessage(err) - toast.error('Failed to apply mutations', { description: inlineMutationApplyError.value }) - } finally { - inlineMutationApplying.value = false - } -} - function returnToWorkspaceOverview() { navigateTo(buildManageStepUrl(kgId.value)) } @@ -844,11 +820,11 @@ async function loadMutationLogRuns() { if (isForbiddenHttpError(err)) { mutationLogLoadError.value = resolveForbiddenReason( err, - 'You do not have permission to view mutation logs for this graph.', + 'You do not have permission to view graph writes history for this graph.', ) } else { mutationLogLoadError.value = extractErrorMessage(err) - toast.error('Failed to load mutation log runs', { + toast.error('Failed to load archived write history', { description: mutationLogLoadError.value, }) } @@ -888,7 +864,7 @@ async function loadMutationLogEntryPreviews(offset = 0) { preview_available: false, } mutationLogEntryPreviewOffset.value = offset - toast.error('Failed to load mutation log entry previews', { + toast.error('Failed to load graph write entry previews', { description: extractErrorMessage(err), }) } finally { @@ -1605,7 +1581,7 @@ watch( </div> <div> <div class="text-2xl font-bold">{{ mutationLogRuns.length }}</div> - <p class="text-xs text-muted-foreground">Mutation Runs</p> + <p class="text-xs text-muted-foreground">Archived writes</p> </div> </CardContent> </Card> @@ -2096,39 +2072,11 @@ watch( </CardContent> </Card> - <Card v-else-if="selectedRailItemId === 'mutation-authoring'"> - <CardHeader> - <CardTitle class="text-base flex items-center gap-2"> - <PencilRuler class="size-4" /> - Mutation authoring - </CardTitle> - <CardDescription> - Author and apply one-off JSONL mutations directly in this workspace. - </CardDescription> - </CardHeader> - <CardContent class="space-y-3 text-sm"> - <div class="space-y-3 rounded-lg border bg-muted/30 p-3"> - <p class="text-xs font-medium text-muted-foreground">Mutation payload (JSONL)</p> - <textarea - v-model="inlineMutationJsonl" - class="min-h-44 w-full rounded-md border border-input bg-background px-3 py-2 font-mono text-xs leading-relaxed shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" - placeholder='{"op":"CREATE","type":"node","label":"repo","id":"repo:example","set_properties":{"name":"example"}}' - /> - <div class="flex flex-wrap items-center gap-2"> - <Button size="sm" :disabled="inlineMutationApplying" @click="applyInlineMutations"> - <Loader2 v-if="inlineMutationApplying" class="mr-1.5 size-3.5 animate-spin" /> - Apply Mutations - </Button> - <span class="text-xs text-muted-foreground"> - Applies directly to this knowledge graph without page navigation. - </span> - </div> - <p v-if="inlineMutationApplyError" class="text-xs text-destructive"> - {{ inlineMutationApplyError }} - </p> - </div> - </CardContent> - </Card> + <GraphManagementMutationAuthoringPanel + v-else-if="selectedRailItemId === 'mutation-authoring'" + :kg-id="kgId" + @applied="refreshDesignArtifacts" + /> <Card v-else> <CardHeader> diff --git a/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts b/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts index e89b76a4e..12fe9dfed 100644 --- a/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts +++ b/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts @@ -34,6 +34,22 @@ describe('kgGraphManagementArtifacts', () => { ).toBe('extraction-jobs-setup') }) + it('defaults one-off mutations mode to schema entities', () => { + const operationsItems = buildGraphManagementRailItems({ + workspaceMode: 'extraction_operations', + transitionEligible: true, + blockingReasonCount: 0, + prepopulatedGapCount: 0, + hasMinimumEntityTypes: true, + hasMinimumRelationshipTypes: true, + sessionUpdatedAt: null, + hasActiveSession: true, + }) + expect( + resolveSchemaRailSelection(null, 'one-off-mutations', operationsItems), + ).toBe('schema-entities') + }) + it('maps ready status to done artifact rows', () => { expect(graphManagementRailItemDone('ready')).toBe(true) expect(graphManagementArtifactRowClass(true, true)).toContain('ring-primary') diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 4791f2d80..d89e008d7 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -25,6 +25,7 @@ import { resolveRailSelectionForMode, resolveSharedSessionMode, } from '../utils/kgGraphManagement' +import { filterSchemaRailItems } from '../utils/kgGraphManagementArtifacts' const manageWorkspaceVue = readFileSync( resolve(__dirname, '../pages/knowledge-graphs/[kgId]/manage.vue'), @@ -42,6 +43,14 @@ const sharedConversationPanelVue = readFileSync( resolve(__dirname, '../components/extraction/SharedConversationPanel.vue'), 'utf-8', ) +const graphExtractionArchivedHistoryVue = readFileSync( + resolve(__dirname, '../components/graph-management/GraphExtractionArchivedHistory.vue'), + 'utf-8', +) +const graphManagementMutationAuthoringVue = readFileSync( + resolve(__dirname, '../components/graph-management/GraphManagementMutationAuthoringPanel.vue'), + 'utf-8', +) const manageWorkspaceHubTs = readFileSync( resolve(__dirname, '../utils/kgManageWorkspaceHub.ts'), 'utf-8', @@ -85,72 +94,69 @@ describe('Knowledge Graph Manage Workspace - graph management controls', () => { }) }) -describe('Knowledge Graph Manage Workspace - mutation log browser', () => { - it('renders mutation log step with scoped run listing', () => { - expect(manageWorkspaceVue).toContain('MutationLogs') - expect(manageWorkspaceVue).toContain('loadMutationLogRuns') - expect(manageWorkspaceVue).toContain('/management/knowledge-graphs/${kgId.value}/data-sources') +describe('Knowledge Graph Manage Workspace - graph writes history', () => { + it('renders graph writes history step with archived extraction component', () => { + expect(manageWorkspaceVue).toContain('GraphExtractionArchivedHistory') + expect(manageWorkspaceVue).toContain("activeStep === 'mutation-logs'") }) - it('loads sync runs per data source and filters to mutation-log runs', () => { - expect(manageWorkspaceVue).toContain('/management/data-sources/${ds.id}/sync-runs') - expect(manageWorkspaceVue).toContain('collectScopedMutationLogRuns') + it('loads archived history from management API', () => { + expect(graphExtractionArchivedHistoryVue).toContain('/extraction-jobs/archived-history') + expect(graphExtractionArchivedHistoryVue).toContain('loadHistory') }) - it('renders run detail summary with token and cost metrics', () => { - expect(manageWorkspaceVue).toContain('Token usage') - expect(manageWorkspaceVue).toContain('Cost (USD)') - expect(manageWorkspaceVue).toContain('token_usage_total') - expect(manageWorkspaceVue).toContain('cost_total_usd') + it('shows write count and cost instead of token fractions in job rows', () => { + expect(graphExtractionArchivedHistoryVue).toContain('formatCost(job.costUsd)') + expect(graphExtractionArchivedHistoryVue).toContain('writeOps') + expect(graphExtractionArchivedHistoryVue).not.toContain('inputTokens }}/{{') }) - it('separates operation class counts from per-entry previews', () => { - expect(manageWorkspaceVue).toContain('Operation class counts') - expect(manageWorkspaceVue).toContain('Per-entry operation previews') - expect(manageWorkspaceVue).toContain('Object.entries(selectedMutationLogRun.operation_counts)') - expect(manageWorkspaceVue).toContain('loadMutationLogEntryPreviews') + it('distinguishes GMA sessions from extraction worker jobs', () => { + expect(graphExtractionArchivedHistoryVue).toContain('graph_management_session') + expect(graphExtractionArchivedHistoryVue).toContain('GMA session') + expect(graphExtractionArchivedHistoryVue).toContain('Extraction job') }) }) -describe('KG-MANAGE-012 - graph-scoped mutation run list', () => { - it('loads runs only from graph-scoped data sources with KG metadata filtering', () => { - expect(manageWorkspaceVue).toContain('collectScopedMutationLogRuns') - expect(manageWorkspaceVue).toContain('knowledge_graph_id') +describe('KG-MANAGE-012 - archived graph writes grouping', () => { + it('groups archived jobs by run and job set', () => { + expect(graphExtractionArchivedHistoryVue).toContain('payload.runs') + expect(graphExtractionArchivedHistoryVue).toContain('jobSets') + expect(graphExtractionArchivedHistoryVue).toContain('set.jobSet') }) - it('defaults run list ordering to newest-first', () => { - expect(manageWorkspaceVue).toContain('collectScopedMutationLogRuns') - expect(manageWorkspaceVue).toContain('resolveDefaultSelectedMutationLogRunId') + it('defaults selection to first run and job set', () => { + expect(graphExtractionArchivedHistoryVue).toContain('selectedRunIndex') + expect(graphExtractionArchivedHistoryVue).toContain('selectedJobSetIndex') + expect(graphExtractionArchivedHistoryVue).toContain('selectedJobId') }) - it('shows status, timestamp, source, and run identifier in run list items', () => { - expect(manageWorkspaceVue).toContain('run.data_source_name') - expect(manageWorkspaceVue).toContain('run.started_at') - expect(manageWorkspaceVue).toContain('run.status') - expect(manageWorkspaceVue).toContain('run.mutation_log_id') + it('shows job identifier, write ops, and cost in job list items', () => { + expect(graphExtractionArchivedHistoryVue).toContain('job.jobId') + expect(graphExtractionArchivedHistoryVue).toContain('job.writeOps') + expect(graphExtractionArchivedHistoryVue).toContain('formatCost(job.costUsd)') }) }) -describe('KG-MANAGE-013 - run detail richness', () => { - it('renders run summary, session reference, token/cost metrics, and operation counts', () => { - expect(manageWorkspaceVue).toContain('Run summary') - expect(manageWorkspaceVue).toContain('Session') - expect(manageWorkspaceVue).toContain('Token usage') - expect(manageWorkspaceVue).toContain('Cost (USD)') - expect(manageWorkspaceVue).toContain('Operation class counts') +describe('KG-MANAGE-013 - archived job detail richness', () => { + it('renders entity and relationship mutation metrics for selected job', () => { + expect(graphExtractionArchivedHistoryVue).toContain('entitiesCreated') + expect(graphExtractionArchivedHistoryVue).toContain('relationshipsModified') + expect(graphExtractionArchivedHistoryVue).toContain('formatCost(selectedJob.costUsd)') }) - it('loads paginated per-entry previews from mutation-log-entries API', () => { - expect(manageWorkspaceVue).toContain('buildMutationLogEntryPreviewUrl') - expect(manageWorkspaceVue).toContain('loadMutationLogEntryPreviews') - expect(manageWorkspaceVue).toContain('mutationLogEntryPreviewPage') + it('loads applied mutation JSONL for selected archived job', () => { + expect(graphExtractionArchivedHistoryVue).toContain('/archived-mutations') + expect(graphExtractionArchivedHistoryVue).toContain('loadSelectedMutations') + expect(graphExtractionArchivedHistoryVue).toContain('mutationJsonl') }) }) describe('KG-MANAGE-014 - no-preview fallback state', () => { - it('shows explicit fallback when entry previews are unavailable', () => { - expect(manageWorkspaceVue).toContain('MUTATION_LOG_NO_PREVIEW_MESSAGE') - expect(manageWorkspaceVue).toContain('hasMutationLogEntryPreviewPage') + it('shows explicit fallback when archived mutation JSONL is unavailable', () => { + expect(graphExtractionArchivedHistoryVue).toContain( + 'No stored mutation JSONL for this entry', + ) }) }) @@ -195,10 +201,10 @@ describe('KG-MANAGE-002 - workspace hub tile set', () => { expect(manageWorkspaceVue).toContain('workspaceHubTileClasses') expect(manageWorkspaceVue).toContain('Entity Types') expect(manageWorkspaceVue).toContain('Relationship Types') - expect(manageWorkspaceVue).toContain('Mutation Runs') + expect(manageWorkspaceVue).toContain('Archived writes') expect(manageWorkspaceHubTs).toContain('Data sources') expect(manageWorkspaceHubTs).toContain('Graph Management') - expect(manageWorkspaceHubTs).toContain('Mutation logs') + expect(manageWorkspaceHubTs).toContain('Graph Writes History') expect(manageWorkspaceHubTs).toContain('Maintain') }) @@ -214,7 +220,7 @@ describe('KG-MANAGE-002 - workspace hub tile set', () => { expect(cards.map((card) => card.title)).toEqual([ 'Data Sources', 'Graph Management', - 'MutationLogs', + 'Graph Writes History', 'Maintain', ]) }) @@ -375,7 +381,7 @@ describe('Shared conversation panel - extraction UX contract', () => { expect(sharedConversationPanelVue).toContain('showInitialConversationLoading') expect(sharedConversationPanelVue).toContain('showConversationRefreshIndicator') expect(sharedConversationPanelVue).toContain('scrollToBottom') - expect(sharedConversationPanelVue).toContain('el.scrollTop = el.scrollHeight') + expect(sharedConversationPanelVue).toContain('target.scrollTop = target.scrollHeight') }) it('accepts mode-aware input placeholder and session status props', () => { @@ -472,6 +478,7 @@ describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { expect(manageWorkspaceVue).toContain('GraphExtractionJobsWorkspace') expect(manageWorkspaceVue).toContain("graphManagementMode === 'extraction-jobs'") expect(manageWorkspaceVue).toContain("selectedRailItemId === 'mutation-authoring'") + expect(manageWorkspaceVue).toContain('GraphManagementMutationAuthoringPanel') }) it('filters rail items to the active mode', () => { @@ -489,9 +496,13 @@ describe('KG-MANAGE-009 - hybrid lower panel mode-specific detail', () => { expect(filterRailItemsForMode(items, 'extraction-jobs').map((item) => item.id)).toContain( 'extraction-jobs-setup', ) - expect(filterRailItemsForMode(items, 'one-off-mutations').map((item) => item.id)).toContain( + expect( + filterSchemaRailItems(filterRailItemsForMode(items, 'one-off-mutations')).map((item) => item.id), + ).toEqual([ + 'schema-entities', + 'schema-relationships', 'mutation-authoring', - ) + ]) }) }) @@ -610,19 +621,18 @@ describe('KG-MANAGE-018 - keyboard operable step and rail actions', () => { }) describe('KG-MANAGE-019 - section-specific loading, empty, and error states', () => { - it('uses section state contracts for workspace, graph management, and mutation logs', () => { + it('uses section state contracts for workspace and graph management', () => { expect(manageWorkspaceVue).toContain('resolveSectionState') expect(manageWorkspaceVue).toContain('workspaceOverviewState') expect(manageWorkspaceVue).toContain('graphManagementSectionState') - expect(manageWorkspaceVue).toContain('mutationLogsSectionState') expect(manageWorkspaceVue).toContain('Retry workspace load') - expect(manageWorkspaceVue).toContain('Retry mutation log load') expect(manageWorkspaceVue).toContain('Retry session load') }) - it('renders actionable empty states for mutation log runs', () => { - expect(manageWorkspaceVue).toContain('mutationLogsSectionState.actionLabel') - expect(manageWorkspaceVue).toContain('Refresh runs') + it('renders loading and empty states inside graph writes history component', () => { + expect(graphExtractionArchivedHistoryVue).toContain('Loading graph writes history') + expect(graphExtractionArchivedHistoryVue).toContain('No archived graph writes yet') + expect(graphExtractionArchivedHistoryVue).toContain('@click="loadHistory"') }) }) @@ -653,9 +663,10 @@ describe('KG-MANAGE-021 - unified in-place graph operations', () => { }) it('applies one-off mutations directly in graph-management without mutations-console redirect', () => { - expect(manageWorkspaceVue).toContain('inlineMutationJsonl') - expect(manageWorkspaceVue).toContain('applyInlineMutations') - expect(manageWorkspaceVue).toContain('graphApi.applyMutations') + expect(manageWorkspaceVue).toContain('GraphManagementMutationAuthoringPanel') + expect(graphManagementMutationAuthoringVue).toContain('MutationTemplates') + expect(graphManagementMutationAuthoringVue).toContain('applyMutations') + expect(graphManagementMutationAuthoringVue).toContain('getMergedEditorContent') expect(manageWorkspaceVue).not.toContain('navigateTo(`/graph/mutations?kg_id=${kgId}&view=editor`)') }) }) diff --git a/src/dev-ui/app/utils/kgGraphManagement.ts b/src/dev-ui/app/utils/kgGraphManagement.ts index edd110e29..7cb5b79b9 100644 --- a/src/dev-ui/app/utils/kgGraphManagement.ts +++ b/src/dev-ui/app/utils/kgGraphManagement.ts @@ -32,7 +32,7 @@ export const GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS: Record<GraphManagementMode, st 'extraction-jobs': 'Ask about extraction job sets, per-instance descriptions, or running extraction workers…', 'one-off-mutations': - 'Author or preview one-off graph mutations scoped to this knowledge graph…', + 'Ask for a schema or instance change — the assistant will validate and apply it…', } export interface GraphManagementRailItem { @@ -94,16 +94,16 @@ export function buildGraphManagementRailItems( label: 'Schema: Entities', status: input.hasMinimumEntityTypes ? 'ready' : 'in_progress', lastUpdated: sessionStamp, - detailHint: 'Entity type definitions and coverage snapshot.', - modes: ['initial-schema-design'], + detailHint: 'Entity type definitions and instance inventory.', + modes: ['initial-schema-design', 'one-off-mutations'], }, { id: 'schema-relationships', label: 'Schema: Relationships', status: input.hasMinimumRelationshipTypes ? 'ready' : 'in_progress', lastUpdated: sessionStamp, - detailHint: 'Relationship type definitions and edge coverage snapshot.', - modes: ['initial-schema-design'], + detailHint: 'Relationship type definitions and edge inventory.', + modes: ['initial-schema-design', 'one-off-mutations'], }, { id: 'schema-readiness', @@ -141,10 +141,10 @@ export function buildGraphManagementRailItems( }, { id: 'mutation-authoring', - label: 'Mutation authoring', + label: 'Mutation Authoring', status: input.workspaceMode === 'extraction_operations' ? 'ready' : 'blocked', lastUpdated: sessionStamp, - detailHint: 'One-off mutation preview and submit context.', + detailHint: 'Manual JSONL editor with templates — independent from the assistant.', modes: ['one-off-mutations'], }, ] diff --git a/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts b/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts index 2e339da08..d73d8784d 100644 --- a/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts +++ b/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts @@ -55,7 +55,7 @@ export function graphManagementArtifactHint(item: GraphManagementRailItem): stri return item.status === 'ready' ? 'Operations mode' : 'Complete schema first' } if (item.id === 'mutation-authoring') { - return item.status === 'ready' ? 'JSONL mutations' : 'Complete schema first' + return item.status === 'ready' ? 'Templates + editor' : 'Complete schema first' } return item.detailHint } diff --git a/src/dev-ui/app/utils/kgManageState.ts b/src/dev-ui/app/utils/kgManageState.ts index c27d7fd12..98354abc2 100644 --- a/src/dev-ui/app/utils/kgManageState.ts +++ b/src/dev-ui/app/utils/kgManageState.ts @@ -31,10 +31,10 @@ export const SECTION_STATE_MESSAGES: Record< forbidden: 'You do not have permission to manage this knowledge graph.', }, 'mutation-logs': { - loading: 'Loading archived extraction history…', - empty: 'No archived extraction jobs with graph writes yet.', - error: 'Could not load archived extraction history.', - forbidden: 'You do not have permission to view extraction archive for this graph.', + loading: 'Loading graph writes history…', + empty: 'No archived graph writes yet.', + error: 'Could not load graph writes history.', + forbidden: 'You do not have permission to view graph writes history for this graph.', }, 'data-sources': { loading: 'Loading data source readiness for this knowledge graph…', diff --git a/src/dev-ui/app/utils/kgManageWorkspace.ts b/src/dev-ui/app/utils/kgManageWorkspace.ts index 4964bba9e..b0dd569e8 100644 --- a/src/dev-ui/app/utils/kgManageWorkspace.ts +++ b/src/dev-ui/app/utils/kgManageWorkspace.ts @@ -16,7 +16,7 @@ export type StepActionLabel = 'Open' | 'Revisit' | 'Run' export const WORKSPACE_STEP_TITLES: Record<WorkspaceStepId, string> = { 'data-sources': 'Data Sources', 'graph-management': 'Graph Management', - 'mutation-logs': 'Extraction Archive', + 'mutation-logs': 'Graph Writes History', maintain: 'Maintain', } @@ -196,7 +196,7 @@ function buildMutationLogsCard(input: WorkspaceOverviewInputs): WorkspaceStepCar status: input.workspaceStatus?.workspace_mode === 'extraction_operations' ? 'needs_attention' : 'ready', - statusDetail: 'No mutation log runs recorded for this graph yet.', + statusDetail: 'No archived graph writes recorded for this graph yet.', actionLabel: 'Open', } } @@ -205,7 +205,7 @@ function buildMutationLogsCard(input: WorkspaceOverviewInputs): WorkspaceStepCar id: 'mutation-logs', title: WORKSPACE_STEP_TITLES['mutation-logs'], status: 'ready', - statusDetail: `${input.mutationLogRunCount} mutation run${input.mutationLogRunCount === 1 ? '' : 's'} available.`, + statusDetail: `${input.mutationLogRunCount} archived write entr${input.mutationLogRunCount === 1 ? 'y' : 'ies'} available.`, actionLabel: 'Revisit', } } diff --git a/src/dev-ui/app/utils/kgManageWorkspaceHub.ts b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts index 5a696a132..2e39dbc69 100644 --- a/src/dev-ui/app/utils/kgManageWorkspaceHub.ts +++ b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts @@ -146,10 +146,10 @@ export function buildWorkspaceHubTiles(input: WorkspaceHubOverview): WorkspaceHu { step: 3, key: 'mutation-logs', - title: 'Mutation logs', + title: 'Graph Writes History', subtitle: input.mutationLogRunCount > 0 - ? `${input.mutationLogRunCount} run${input.mutationLogRunCount === 1 ? '' : 's'} recorded` - : 'Review extraction and apply runs', + ? `${input.mutationLogRunCount} archived run${input.mutationLogRunCount === 1 ? '' : 's'} recorded` + : 'Review GMA sessions and extraction job writes', to: resolveStepDestination(input.kgId, 'mutation-logs'), enabled: input.dataSourceCount > 0, lockedReason: input.dataSourceCount > 0 ? null : 'Connect a data source before reviewing runs.', @@ -259,7 +259,7 @@ export function workspaceHubDescription(input: WorkspaceHubOverview): string { if (!designPhaseComplete(input)) { return 'Use Graph Management for the assistant and schema bootstrap. Green tiles use Revisit; the highlighted tile is your suggested next step.' } - return 'Continue with mutation logs or maintenance, or Revisit any completed step below.' + return 'Continue with graph writes history or maintenance, or Revisit any completed step below.' } export function buildManageOverviewUrl(kgId: string): string { From 2897324e697f7b522cba8a97fbedffe17979ab24 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 15:40:41 -0400 Subject: [PATCH 134/153] refactor(dev-ui): remove session pointers from graph management Drop the session pointers rail item and detail panel from all GMA modes; session history now lives in Graph Writes History when chat is cleared. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../extraction/SharedConversationPanel.vue | 2 +- .../pages/knowledge-graphs/[kgId]/manage.vue | 166 ++---------------- .../kg-graph-management-artifacts.test.ts | 2 +- .../knowledge-graph-manage-workspace.test.ts | 20 +-- src/dev-ui/app/utils/kgGraphManagement.ts | 9 - .../app/utils/kgGraphManagementArtifacts.ts | 2 +- 6 files changed, 18 insertions(+), 183 deletions(-) diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index 91dd28a7d..8f032984b 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -71,7 +71,7 @@ const props = withDefaults(defineProps<{ description: 'Design and refine schema readiness, validation, and extraction operations for this knowledge graph. Use the assistant below to drive workspace changes.', footerHint: - 'Use Schema & artifacts and Session pointers below to inspect workspace state; send notes or questions here.', + 'Use Design Artifacts below to inspect schema and instances; send notes or questions here.', }) const emit = defineEmits<{ diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 7f9ec96e2..fcc44ca7e 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -173,27 +173,6 @@ interface ExtractionSessionResponse { updated_at: string } -interface SessionRunMetricView { - sync_run_id: string - mutation_log_id: string | null - status: string - started_at: string - completed_at: string | null - token_usage_total: number | null - cost_total_usd: number | null - operation_counts: Record<string, number> -} - -interface ExtractionSessionHistoryItem { - id: string - created_at: string - updated_at: string - archived_at: string | null - is_active: boolean - message_count: number - run_metrics: SessionRunMetricView[] -} - const route = useRoute() const { hasTenant, tenantVersion } = useTenant() const { extractErrorMessage } = useErrorHandler() @@ -215,7 +194,6 @@ const workspaceForbiddenReason = ref<string | null>(null) const validating = ref(false) const transitioning = ref(false) const sessionLoading = ref(false) -const sessionHistoryLoading = ref(false) const sessionLoadError = ref<string | null>(null) const sessionForbidden = ref(false) const sessionForbiddenReason = ref<string | null>(null) @@ -226,7 +204,6 @@ const runtimeReady = ref(false) const runtimeWarmupError = ref<string | null>(null) let runtimeWarmupGeneration = 0 const extractionSession = ref<ExtractionSessionResponse | null>(null) -const sessionHistory = ref<ExtractionSessionHistoryItem[]>([]) const draftMessage = ref('') const statusProjection = ref<WorkspaceStatusResponse | null>(null) const mutationLogLoading = ref(false) @@ -918,21 +895,22 @@ async function loadExtractionSession() { } } -async function loadSessionHistory() { - if (!kgId.value) return - sessionHistoryLoading.value = true +async function clearChat() { + // Clear chat resets the active extraction session for this knowledge graph. + if (!kgId.value || sessionForbidden.value) return + clearingChat.value = true try { - const response = await apiFetch<{ sessions: ExtractionSessionHistoryItem[] }>( - `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/history`, + extractionSession.value = await apiFetch<ExtractionSessionResponse>( + `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/clear-chat`, + { method: 'POST' }, ) - sessionHistory.value = response.sessions + toast.success('Extraction chat cleared') } catch (err) { - sessionHistory.value = [] - toast.error('Failed to load session history', { + toast.error('Failed to clear chat', { description: extractErrorMessage(err), }) } finally { - sessionHistoryLoading.value = false + clearingChat.value = false } } @@ -1215,26 +1193,6 @@ async function transitionToExtraction() { } } -async function clearChat() { - // Clear chat resets the active extraction session for this knowledge graph. - if (!kgId.value || sessionForbidden.value) return - clearingChat.value = true - try { - extractionSession.value = await apiFetch<ExtractionSessionResponse>( - `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/clear-chat`, - { method: 'POST' }, - ) - toast.success('Extraction chat cleared') - await loadSessionHistory() - } catch (err) { - toast.error('Failed to clear chat', { - description: extractErrorMessage(err), - }) - } finally { - clearingChat.value = false - } -} - onMounted(() => { loadKgIdentity() loadWorkspaceStatus() @@ -1282,7 +1240,6 @@ watch( syncGraphManagementState() await Promise.all([ loadExtractionSession(), - loadSessionHistory(), loadGraphManagementDataSources(), refreshDesignArtifacts({ silent: true }), ]) @@ -2086,109 +2043,6 @@ watch( </CardDescription> </CardHeader> </Card> - - <Card id="graph-management-session-pointers" class="graph-management-session-pointers"> - <CardHeader> - <CardTitle class="text-base flex items-center gap-2"> - <ScrollText class="size-4" /> - Session pointers - </CardTitle> - <CardDescription> - Active bootstrap and extraction sessions, plus archived history for this knowledge graph. - </CardDescription> - </CardHeader> - <CardContent class="space-y-4 text-sm"> - <div class="grid gap-2 md:grid-cols-3 text-xs"> - <div class="rounded-lg border bg-muted/30 px-3 py-2"> - <p class="text-muted-foreground">Active schema bootstrap session</p> - <p class="mt-1 break-all font-mono"> - {{ statusProjection.session_pointers.active_schema_bootstrap_session_id ?? 'None' }} - </p> - </div> - <div class="rounded-lg border bg-muted/30 px-3 py-2"> - <p class="text-muted-foreground">Active extraction operations session</p> - <p class="mt-1 break-all font-mono"> - {{ statusProjection.session_pointers.active_extraction_operations_session_id ?? 'None' }} - </p> - </div> - <div class="rounded-lg border bg-muted/30 px-3 py-2"> - <p class="text-muted-foreground">Most recent completed session</p> - <p class="mt-1 break-all font-mono"> - {{ statusProjection.session_pointers.most_recent_completed_session_id ?? 'None' }} - </p> - </div> - </div> - <div class="space-y-3 border-t pt-3"> - <div class="flex items-center justify-between"> - <p class="text-xs font-medium uppercase tracking-wide text-muted-foreground"> - Session history - </p> - <Button - size="sm" - variant="ghost" - class="h-6 px-2 text-[10px]" - :disabled="sessionHistoryLoading" - @click="loadSessionHistory" - > - Refresh - </Button> - </div> - <div - v-if="sessionHistoryLoading" - class="flex items-center gap-2 text-xs text-muted-foreground" - > - <Loader2 class="size-3.5 animate-spin" /> - Loading session history... - </div> - <div - v-else-if="sessionHistory.length === 0" - class="rounded-lg border border-dashed px-3 py-4 text-xs text-muted-foreground" - > - No archived or active sessions found for this scope yet. - </div> - <div v-else class="space-y-2"> - <div - v-for="entry in sessionHistory" - :key="entry.id" - class="rounded-lg border bg-card px-3 py-2 text-xs" - > - <div class="flex flex-wrap items-center justify-between gap-2"> - <p class="font-mono break-all">{{ entry.id }}</p> - <Badge :variant="entry.is_active ? 'default' : 'secondary'"> - {{ entry.is_active ? 'Active' : 'Archived' }} - </Badge> - </div> - <p class="mt-1 text-muted-foreground"> - Updated {{ new Date(entry.updated_at).toLocaleString() }} - <span v-if="entry.archived_at"> - · Archived {{ new Date(entry.archived_at).toLocaleString() }} - </span> - </p> - <p class="mt-1 text-muted-foreground"> - {{ entry.message_count }} message(s) - · {{ entry.run_metrics.length }} linked run(s) - </p> - <div - v-if="entry.run_metrics.length > 0" - class="mt-2 space-y-1.5 rounded-lg border bg-muted/20 p-2" - > - <div - v-for="metric in entry.run_metrics" - :key="metric.sync_run_id" - class="flex flex-wrap items-center justify-between gap-2" - > - <span class="font-mono">{{ metric.mutation_log_id ?? metric.sync_run_id }}</span> - <span class="text-muted-foreground"> - {{ metric.token_usage_total ?? 0 }} tokens · - ${{ (metric.cost_total_usd ?? 0).toFixed(2) }} - </span> - </div> - </div> - </div> - </div> - </div> - </CardContent> - </Card> </div> </div> </section> diff --git a/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts b/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts index 12fe9dfed..45b7eccd3 100644 --- a/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts +++ b/src/dev-ui/app/tests/kg-graph-management-artifacts.test.ts @@ -30,7 +30,7 @@ describe('kgGraphManagementArtifacts', () => { resolveSchemaRailSelection(null, 'initial-schema-design', items), ).toBe('schema-entities') expect( - resolveSchemaRailSelection('session-pointers', 'extraction-jobs', items), + resolveSchemaRailSelection('schema-readiness', 'extraction-jobs', items), ).toBe('extraction-jobs-setup') }) diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index d89e008d7..1eff5a1f7 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -84,14 +84,6 @@ describe('Knowledge Graph Manage Workspace - graph management controls', () => { expect(manageWorkspaceVue).toContain('/workspace/transition-to-extraction') expect(manageWorkspaceVue).toContain('Go to Extraction/Mutations') }) - - it('loads scoped session history with run metrics after clear chat', () => { - expect(manageWorkspaceVue).toContain('loadSessionHistory') - expect(manageWorkspaceVue).toContain('/sessions/${sharedSessionMode.value}/history') - expect(manageWorkspaceVue).toContain('sessionHistory') - expect(manageWorkspaceVue).toContain('run_metrics') - expect(manageWorkspaceVue).toContain('Session history') - }) }) describe('Knowledge Graph Manage Workspace - graph writes history', () => { @@ -440,7 +432,7 @@ describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { expect(manageWorkspaceVue).toContain('GraphDesignEntitiesPanel') expect(manageWorkspaceVue).toContain('GraphDesignRelationshipsPanel') expect(manageWorkspaceVue).toContain('graph-management-artifact-detail') - expect(manageWorkspaceVue).toContain('graph-management-session-pointers') + expect(manageWorkspaceVue).not.toContain('graph-management-session-pointers') expect(manageWorkspaceVue).toContain('graphManagementArtifactRowClass') expect(manageWorkspaceVue).toContain('schemaRailItems') expect(manageWorkspaceVue).toContain('lg:grid-cols-[minmax(0,15.5rem)_minmax(0,1fr)]') @@ -463,9 +455,7 @@ describe('KG-MANAGE-008 - hybrid lower panel shared rail', () => { }) expect(items.every((item) => item.status && item.lastUpdated && item.label)).toBe(true) - expect(items.find((item) => item.id === 'session-pointers')?.modes).toEqual( - GRAPH_MANAGEMENT_MODE_ORDER, - ) + expect(items.map((item) => item.id)).not.toContain('session-pointers') }) }) @@ -568,14 +558,14 @@ describe('KG-MANAGE-016 - graph management top controls', () => { }) expect( - resolveRailSelectionForMode('session-pointers', 'extraction-jobs', items), - ).toBe('session-pointers') + resolveRailSelectionForMode('extraction-jobs-setup', 'extraction-jobs', items), + ).toBe('extraction-jobs-setup') expect( isRailItemValidInMode('schema-readiness', 'extraction-jobs', items), ).toBe(false) expect( resolveRailSelectionForMode('schema-readiness', 'extraction-jobs', items), - ).toBe('session-pointers') + ).toBe('extraction-jobs-setup') }) it('builds graph management URLs with mode query for keyboard navigation', () => { diff --git a/src/dev-ui/app/utils/kgGraphManagement.ts b/src/dev-ui/app/utils/kgGraphManagement.ts index 7cb5b79b9..2a2918634 100644 --- a/src/dev-ui/app/utils/kgGraphManagement.ts +++ b/src/dev-ui/app/utils/kgGraphManagement.ts @@ -10,7 +10,6 @@ export type GraphManagementRailItemId = | 'schema-relationships' | 'schema-readiness' | 'validation-diagnostics' - | 'session-pointers' | 'extraction-jobs-setup' | 'mutation-authoring' @@ -123,14 +122,6 @@ export function buildGraphManagementRailItems( detailHint: 'Blocking reasons and prepopulated type gaps.', modes: ['initial-schema-design'], }, - { - id: 'session-pointers', - label: 'Session pointers', - status: input.hasActiveSession ? 'ready' : 'in_progress', - lastUpdated: sessionStamp, - detailHint: 'Active bootstrap, extraction, and completed session references.', - modes: GRAPH_MANAGEMENT_MODE_ORDER, - }, { id: 'extraction-jobs-setup', label: 'Extraction jobs setup', diff --git a/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts b/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts index d73d8784d..e9dd57831 100644 --- a/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts +++ b/src/dev-ui/app/utils/kgGraphManagementArtifacts.ts @@ -8,7 +8,7 @@ import { import type { StepStatusLabel } from './kgManageWorkspace' export function filterSchemaRailItems(items: GraphManagementRailItem[]): GraphManagementRailItem[] { - return items.filter((item) => item.id !== 'session-pointers') + return items } export function resolveSchemaRailSelection( From fd006a32d25706f27d6a074dcaeb83a97048e44a Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 15:48:32 -0400 Subject: [PATCH 135/153] Dont show #RelationshipTypes x2.. show real count --- .../pages/knowledge-graphs/[kgId]/manage.vue | 9 +++- .../app/tests/kg-design-artifacts.test.ts | 43 +++++++++++-------- src/dev-ui/app/utils/kgDesignArtifacts.ts | 19 ++++++++ 3 files changed, 52 insertions(+), 19 deletions(-) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index fcc44ca7e..1698c7712 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -110,6 +110,7 @@ import { import { streamExtractionChatTurn, streamRuntimeWarmup } from '@/utils/kgExtractionChat' import { applyThinkingRecentUpdate } from '@/utils/thinkingActivityLines' import type { DesignArtifactsResponse } from '@/utils/kgDesignArtifacts' +import { primaryRelationshipTypeLabels } from '@/utils/kgDesignArtifacts' const runtimeConfig = useRuntimeConfig() const { accessToken } = useAuth() @@ -605,10 +606,14 @@ async function loadOverviewMetrics() { try { const ontology = await apiFetch<{ node_types?: Array<{ label: string }> - edge_types?: Array<{ label: string }> + edge_types?: Array<{ + label: string + auto_generated?: boolean + inverse_of?: string | null + }> }>(`/management/knowledge-graphs/${kgId.value}/ontology`) entityTypeLabels.value = (ontology.node_types ?? []).map((t) => t.label) - relationshipTypeLabels.value = (ontology.edge_types ?? []).map((t) => t.label) + relationshipTypeLabels.value = primaryRelationshipTypeLabels(ontology.edge_types ?? []) } catch { entityTypeLabels.value = [] relationshipTypeLabels.value = [] diff --git a/src/dev-ui/app/tests/kg-design-artifacts.test.ts b/src/dev-ui/app/tests/kg-design-artifacts.test.ts index 7bed4be68..5b0d984af 100644 --- a/src/dev-ui/app/tests/kg-design-artifacts.test.ts +++ b/src/dev-ui/app/tests/kg-design-artifacts.test.ts @@ -1,25 +1,34 @@ -/** Tests for design artifact UI helpers. */ - import { describe, expect, it } from 'vitest' import { - pageSlice, - prepopulationBadgeClass, - prepopulationLabel, - prepopulationMode, + isPrimaryRelationshipTypeForDisplay, + primaryRelationshipTypeCount, + primaryRelationshipTypeLabels, } from '../utils/kgDesignArtifacts' -describe('kgDesignArtifacts', () => { - it('maps prepopulation flags to k-extract-style labels', () => { - expect(prepopulationMode(true)).toBe('true') - expect(prepopulationMode(false)).toBe('false') - expect(prepopulationLabel(true)).toContain('prepopulated: true') - expect(prepopulationBadgeClass(true)).toContain('cyan') +describe('kgDesignArtifacts relationship type counting', () => { + const edgeTypes = [ + { label: 'contains', bidirectional: true }, + { label: 'contained_in', auto_generated: true, inverse_of: 'contains' }, + { label: 'calls', bidirectional: false }, + { label: 'calls_inverse', auto_generated: true, inverse_of: 'calls' }, + ] + + it('excludes auto-generated inverse edge types from primary counts', () => { + expect(primaryRelationshipTypeLabels(edgeTypes)).toEqual(['contains', 'calls']) + expect(primaryRelationshipTypeCount(edgeTypes)).toBe(2) + }) + + it('treats unidirectional edges as primary display types', () => { + expect(isPrimaryRelationshipTypeForDisplay({ label: 'depends_on' })).toBe(true) }) - it('pages instance lists consistently', () => { - const items = Array.from({ length: 25 }, (_, index) => index) - const slice = pageSlice({}, 'service', items) - expect(slice.items).toHaveLength(20) - expect(slice.totalPages).toBe(2) + it('treats inverse edge types as non-primary', () => { + expect( + isPrimaryRelationshipTypeForDisplay({ + label: 'contained_in', + auto_generated: true, + inverse_of: 'contains', + }), + ).toBe(false) }) }) diff --git a/src/dev-ui/app/utils/kgDesignArtifacts.ts b/src/dev-ui/app/utils/kgDesignArtifacts.ts index 57ad1cba6..6976c16c1 100644 --- a/src/dev-ui/app/utils/kgDesignArtifacts.ts +++ b/src/dev-ui/app/utils/kgDesignArtifacts.ts @@ -91,6 +91,25 @@ export interface DesignArtifactsResponse { export const DESIGN_ARTIFACTS_PAGE_SIZE = 20 +export interface OntologyEdgeTypeRef { + label: string + auto_generated?: boolean + inverse_of?: string | null +} + +/** Match backend `is_primary_relationship_for_display` — one row per logical relationship. */ +export function isPrimaryRelationshipTypeForDisplay(edge: OntologyEdgeTypeRef): boolean { + return !edge.auto_generated && !edge.inverse_of +} + +export function primaryRelationshipTypeLabels(edgeTypes: OntologyEdgeTypeRef[]): string[] { + return edgeTypes.filter(isPrimaryRelationshipTypeForDisplay).map((edge) => edge.label) +} + +export function primaryRelationshipTypeCount(edgeTypes: OntologyEdgeTypeRef[]): number { + return primaryRelationshipTypeLabels(edgeTypes).length +} + export function pageSlice<T>( pageByKey: Record<string, number>, key: string, From 5ca0f424d6a82763cbb86977356866ff7e7fe925 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 16:27:49 -0400 Subject: [PATCH 136/153] feat(extraction,dev-ui): speed bulk GMA mutations and improve manage schema explorer Add bulk instance edit workflow guidance, helpers/sync_instances.py for diff-and-generate JSONL, and clearer list-instances MCP tool docs so agents batch deletes instead of per-slug loops. Replace manage overview type badges with GraphSchemaExplorer and extract reusable entity/relationship type list components. Co-authored-by: Cursor <cursoragent@cursor.com> --- specs/extraction/one-off-mutations.spec.md | 7 + .../kartograph_agent_runtime/agent_prompt.py | 38 +-- .../kartograph_agent_runtime/schema_tools.py | 8 +- src/agent-runtime/tests/test_agent_prompt.py | 3 +- src/agent-runtime/tests/test_schema_tools.py | 7 + .../application/schema_authoring_guide.py | 21 +- .../application/skill_resolution_service.py | 42 +++- .../extraction_job_helpers/__init__.py | 2 +- .../extraction_job_helpers/sync_instances.py | 230 ++++++++++++++++++ .../test_schema_authoring_guide.py | 3 + .../test_skill_resolution_service.py | 5 +- ...est_extraction_job_workdir_materializer.py | 2 + ...est_sticky_session_workdir_materializer.py | 1 + .../infrastructure/test_sync_instances.py | 168 +++++++++++++ .../GraphDesignEntitiesPanel.vue | 147 +---------- .../GraphDesignEntityTypeList.vue | 157 ++++++++++++ .../GraphDesignRelationshipTypeList.vue | 161 ++++++++++++ .../GraphDesignRelationshipsPanel.vue | 157 +----------- .../graph-management/GraphSchemaExplorer.vue | 214 ++++++++++++++++ .../pages/knowledge-graphs/[kgId]/manage.vue | 55 ++--- .../knowledge-graph-manage-workspace.test.ts | 5 +- 21 files changed, 1071 insertions(+), 362 deletions(-) create mode 100644 src/api/extraction/infrastructure/extraction_job_helpers/sync_instances.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_sync_instances.py create mode 100644 src/dev-ui/app/components/graph-management/GraphDesignEntityTypeList.vue create mode 100644 src/dev-ui/app/components/graph-management/GraphDesignRelationshipTypeList.vue create mode 100644 src/dev-ui/app/components/graph-management/GraphSchemaExplorer.vue diff --git a/specs/extraction/one-off-mutations.spec.md b/specs/extraction/one-off-mutations.spec.md index 6899d4581..f93fdda91 100644 --- a/specs/extraction/one-off-mutations.spec.md +++ b/specs/extraction/one-off-mutations.spec.md @@ -23,6 +23,13 @@ The system SHALL implement requested schema and instance changes via Kartograph - THEN it validates and applies UPDATE JSONL mutations - AND reports write operation counts +#### Scenario: Bulk instance cleanup +- GIVEN an operator asks to delete many instances and keep or create a specific set +- WHEN the assistant completes the turn +- THEN it lists instances by type (not per-slug search loops) +- AND generates JSONL in batch to a workspace file or script +- AND validates once and applies once via file-based mutation tools + #### Scenario: Schema type change - GIVEN an operator asks to add an optional property to an entity type - WHEN the assistant completes the turn diff --git a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py index e62254de6..cc276b3d7 100644 --- a/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py +++ b/src/agent-runtime/kartograph_agent_runtime/agent_prompt.py @@ -26,7 +26,7 @@ | `kartograph_apply_graph_mutations` | Apply JSONL CREATE/UPDATE/DELETE (small batches) | | `kartograph_validate_graph_mutations_from_file` | Dry-run a workspace `.jsonl` file | | `kartograph_apply_graph_mutations_from_file` | Apply a workspace `.jsonl` file in one call | -| `kartograph_list_instances_by_type` | List/count entity instances for one type (verify prepopulation) | +| `kartograph_list_instances_by_type` | List instances with mutation-ready `id`, `slug`, `properties` (paginate for bulk) | | `kartograph_list_relationship_instances` | List relationship edges with source/target slugs and node IDs | | `kartograph_search_graph_by_slug` | Find existing nodes by slug to avoid duplicates | | `kartograph_check_graph_slugs` | Batch check which slugs already exist for one entity type | @@ -123,28 +123,35 @@ | Tool | Purpose | |------|---------| -| `kartograph_get_schema_authoring_guide` | JSONL shapes, schema rules, one-off workflow | +| `kartograph_get_schema_authoring_guide` | JSONL shapes, schema rules, one-off + bulk workflow | | `kartograph_get_schema_ontology` | **Always read before edits** | | `kartograph_save_schema_ontology` | Schema type/property changes (read → merge → save) | -| `kartograph_search_graph_by_slug` | Resolve existing node id for UPDATE/DELETE | +| `kartograph_search_graph_by_slug` | Resolve **one** slug when ambiguous (avoid in bulk loops) | | `kartograph_check_graph_slugs` | Batch slug existence before CREATE | -| `kartograph_list_instances_by_type` | Browse instances when picking targets | -| `kartograph_list_relationship_instances` | Inspect edges before create/update/delete | -| `kartograph_validate_graph_mutations` | Dry-run inline JSONL (primary for ≤20 lines) | -| `kartograph_apply_graph_mutations` | Apply inline JSONL after validate passes | -| `kartograph_validate_graph_mutations_from_file` | Dry-run workspace `.jsonl` file | +| `kartograph_list_instances_by_type` | **Primary bulk tool** — returns `id`, `slug`, `properties`; paginate with offset | +| `kartograph_list_relationship_instances` | Inspect edges before batch delete/create | +| `kartograph_validate_graph_mutations` | Dry-run inline JSONL (≤5 lines only) | +| `kartograph_apply_graph_mutations` | Apply inline JSONL after validate (small batches) | +| `kartograph_validate_graph_mutations_from_file` | Dry-run workspace `.jsonl` (**bulk default**) | | `kartograph_apply_graph_mutations_from_file` | Apply larger batches from workspace file | -Copy JSONL field names from `helpers/mutation-examples.jsonl` in the workspace. +Copy JSONL field names from `helpers/mutation-examples.jsonl`. For bulk work, Write to `helpers/bulk_<task>.jsonl`. -### Workflow +### Small edits (≤5 lines) -1. Classify request: schema edit, instance edit, mixed, or read-only -2. Read ontology; search/list targets -3. Validate → apply → verify -4. Report write op counts and affected slugs +1. Classify: schema vs instance vs read-only +2. List/search targets once +3. Validate inline → apply inline → verify -Confirm before DELETE nodes or schema removals. Route bulk enrichment to Extraction Jobs mode. +### Bulk edits (5+ instances or delete-and-recreate) + +1. **Classify** — what to delete vs create (operator intent in plain language) +2. **Query once per type** — `kartograph_list_instances_by_type` (includes mutation-ready `id`); paginate until `total` covered; filter in Bash/python — **no per-slug search** +3. **Generate JSONL in batch** — `helpers/sync_instances.py` (current vs desired JSON) or `helpers/bulk_<task>.jsonl`; never hand-type one line per instance +4. **Validate once → apply once** — `*_from_file` tools +5. **Verify** — one list call; report delete/create counts + +Target **2–4 tool rounds** for bulk cleanup. Confirm before schema removals only. """.strip() _TOOLS_COMPACT_REFERENCE = ( @@ -240,6 +247,7 @@ def _format_workspace_readiness(readiness: dict[str, Any]) -> str: _ONE_OFF_MUTATIONS_COMPACT_SKILL_KEYS = ( "instance_edit_workflow", + "bulk_instance_edit_workflow", "schema_edit_workflow", "confirmation_policy", "jsonl_shape_reference", diff --git a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py index 69908f9e9..97dcb61bd 100644 --- a/src/agent-runtime/kartograph_agent_runtime/schema_tools.py +++ b/src/agent-runtime/kartograph_agent_runtime/schema_tools.py @@ -14,6 +14,12 @@ WORKSPACE_FILE_TOOL_NAMES = ("Read", "Write", "Edit", "Grep", "Glob", "Bash") +LIST_INSTANCES_BY_TYPE_TOOL_DESCRIPTION = ( + "List entity instances for one type with pagination. Returns mutation-ready " + "`id`, `slug`, and `properties` per node — paginate with offset until total. " + "Primary bulk query tool for DELETE JSONL; avoid per-slug search loops." +) + KARTOGRAPH_SCHEMA_TOOL_NAMES = ( "kartograph_get_schema_authoring_guide", "kartograph_get_workspace_readiness", @@ -208,7 +214,7 @@ async def apply_graph_mutations_from_file(args: dict[str, Any]) -> dict[str, Any @tool( "kartograph_list_instances_by_type", - "List entity instances for one type with pagination (use to verify prepopulation).", + LIST_INSTANCES_BY_TYPE_TOOL_DESCRIPTION, {"entity_type": str, "limit": int, "offset": int}, ) async def list_instances_by_type(args: dict[str, Any]) -> dict[str, Any]: diff --git a/src/agent-runtime/tests/test_agent_prompt.py b/src/agent-runtime/tests/test_agent_prompt.py index 3d57d2250..490f56e2d 100644 --- a/src/agent-runtime/tests/test_agent_prompt.py +++ b/src/agent-runtime/tests/test_agent_prompt.py @@ -155,4 +155,5 @@ def test_build_agent_system_prompt_one_off_mutations_includes_tools_reference() assert "One-off mutation tools" in prompt assert "mutation-examples.jsonl" in prompt - assert "kartograph_validate_graph_mutations" in prompt + assert "Bulk edits" in prompt + assert "kartograph_list_instances_by_type" in prompt diff --git a/src/agent-runtime/tests/test_schema_tools.py b/src/agent-runtime/tests/test_schema_tools.py index 65279c504..9ebc4fc93 100644 --- a/src/agent-runtime/tests/test_schema_tools.py +++ b/src/agent-runtime/tests/test_schema_tools.py @@ -4,6 +4,7 @@ from kartograph_agent_runtime.schema_tools import ( KARTOGRAPH_SCHEMA_TOOL_NAMES, + LIST_INSTANCES_BY_TYPE_TOOL_DESCRIPTION, build_kartograph_schema_mcp_server, ) from kartograph_agent_runtime.settings import AgentRuntimeSettings @@ -51,6 +52,12 @@ def test_gma_allowed_tools_include_bash() -> None: assert "Bash" in GMA_ALLOWED_TOOL_NAMES +def test_list_instances_by_type_tool_description_mentions_mutation_ready_ids() -> None: + assert "mutation-ready" in LIST_INSTANCES_BY_TYPE_TOOL_DESCRIPTION + assert "id" in LIST_INSTANCES_BY_TYPE_TOOL_DESCRIPTION + assert "prepopulation" not in LIST_INSTANCES_BY_TYPE_TOOL_DESCRIPTION.lower() + + def test_build_kartograph_schema_mcp_server_registers_tools() -> None: tooling = RuntimeTooling( settings=AgentRuntimeSettings( diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 585319584..95b79296a 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -247,14 +247,27 @@ Rules: both `op` and `type` on every line; `set_properties` not `properties`; UPDATE/DELETE need top-level `id`. -### Workflow +### Workflow (small edits, ≤5 lines) 1. `kartograph_get_schema_ontology` — always before edits -2. Resolve targets: `kartograph_search_graph_by_slug`, `kartograph_list_instances_by_type` -3. `kartograph_validate_graph_mutations` → `kartograph_apply_graph_mutations` (≤20 lines) or apply-from-file +2. Resolve targets: one `kartograph_list_instances_by_type` or `kartograph_search_graph_by_slug` +3. `kartograph_validate_graph_mutations` → `kartograph_apply_graph_mutations` 4. Verify with list/search; report write op counts -Confirm before DELETE nodes or schema removals. Do not use prepopulation scanners unless the operator explicitly requests bulk import. +### Bulk instance operations (5+ deletes/creates/updates) + +Use when the operator asks to replace, prune, reconcile, or keep-only a set of instances. + +**Mental model:** classify delete vs create → query once per type → generate JSONL in batch → validate once → apply once → done. + +1. **List, don't loop search** — `kartograph_list_instances_by_type` returns `id`, `slug`, and `properties` (mutation-ready). Paginate with `offset` until you cover `total`. Filter by `data_source_id`, slug, or path in Bash/python. Do **not** call `search_by_slug` per instance. +2. **Generate JSONL programmatically** — save list output to `helpers/current_<Label>.json`, desired slugs to `helpers/desired_<Label>.json`, then run `python3 helpers/sync_instances.py --entity-type <Label> --current ... --desired ... --out helpers/bulk_<task>.jsonl` (optional `--filter-data-source-id`, `--create-missing`). Or Write `helpers/bulk_<task>.jsonl` directly. Example DELETE shape: `{"op":"DELETE","type":"node","id":"<id from list>"}`. Never hand-type dozens of lines in chat. +3. **Validate once, apply once** — `kartograph_validate_graph_mutations_from_file` then `kartograph_apply_graph_mutations_from_file`. +4. **Verify** — one list call; report counts. + +Target **2–4 tool rounds** for bulk cleanup. Explicit delete/replace requests do not need a second confirmation after validate passes. + +Confirm before schema type removals. Do not use prepopulation scanners unless the operator explicitly requests bulk import via scanner workflow. ## Readiness checklist diff --git a/src/api/extraction/application/skill_resolution_service.py b/src/api/extraction/application/skill_resolution_service.py index d5f89747d..41f6bd5a8 100644 --- a/src/api/extraction/application/skill_resolution_service.py +++ b/src/api/extraction/application/skill_resolution_service.py @@ -288,12 +288,24 @@ class ResolvedExtractionSkillPack: "Unique edge_types labels; one primary direction per label." ), "instance_edit_workflow": ( - "Instance edits: (1) kartograph_get_schema_ontology; " - "(2) kartograph_search_graph_by_slug or kartograph_list_instances_by_type; " - "(3) prefer UPDATE for existing slugs, CREATE only for new; " - "(4) kartograph_validate_graph_mutations then kartograph_apply_graph_mutations " - "for small batches (≤20 lines), or apply-from-file for larger; " - "(5) verify affected slugs. Copy JSONL shapes from helpers/mutation-examples.jsonl." + "Instance edits (small, ≤5 lines): (1) kartograph_get_schema_ontology; " + "(2) kartograph_list_instances_by_type or kartograph_search_graph_by_slug; " + "(3) prefer UPDATE for existing slugs; " + "(4) kartograph_validate_graph_mutations then kartograph_apply_graph_mutations; " + "(5) verify affected slugs. For 5+ instances use bulk_instance_edit_workflow instead." + ), + "bulk_instance_edit_workflow": ( + "Bulk instance ops (5+ creates/updates/deletes): mental model — classify what to " + "delete vs create → query once per entity type → generate JSONL in batch → validate " + "once → apply once → report. (1) kartograph_list_instances_by_type per affected " + "type (returns mutation-ready id + slug; paginate with offset until total covered); " + "filter by data_source_id, slug, or properties in code — never kartograph_search_graph_by_slug " + "per instance. (2) Generate ALL DELETE/CREATE/UPDATE lines via helpers/sync_instances.py " + "(current vs desired JSON snapshots) or Bash/python Write to helpers/bulk_<task>.jsonl " + "— never hand-type JSONL line-by-line. " + "(3) kartograph_validate_graph_mutations_from_file once → " + "kartograph_apply_graph_mutations_from_file once. (4) One list call to verify counts. " + "Target 2–4 tool rounds total for cleanup/replace tasks." ), "jsonl_shape_reference": ( "Every JSONL line needs op (CREATE|UPDATE|DELETE) and type (node|edge). " @@ -301,9 +313,10 @@ class ResolvedExtractionSkillPack: "CREATE nodes need label, id, set_properties with slug, name, data_source_id." ), "confirmation_policy": ( - "Auto-apply after validate when operator said apply/fix/update or change is a single " - "non-destructive UPDATE. Confirm before DELETE nodes, bulk CREATE (>5 lines), or " - "schema type removal. Never apply without validate passing." + "Auto-apply after validate when operator said apply/fix/update/delete/replace/cleanup " + "or change is a single non-destructive UPDATE. Confirm before schema type removal " + "or DELETE when operator intent is ambiguous. Bulk DELETE/Cleanup explicitly requested " + "does not need a second confirm — validate once, apply once. Never apply without validate passing." ), "session_reporting": ( "After successful apply, report operation counts, affected slugs/labels, and any " @@ -317,17 +330,22 @@ class ResolvedExtractionSkillPack: "You are the Graph Management Assistant in One-off Mutations mode. " "The operator requests specific schema or instance changes; you implement them yourself " "using Kartograph schema tools with validate-then-apply mutation workflow. " - "Every write must be auditable via mutation tools — do not instruct manual JSONL entry." + "Default mental model for instance work: classify what to delete vs create → " + "list/query once per type for ids → generate JSONL in batch (script or file) → " + "validate once → apply once → report. Avoid per-instance search/validate/apply loops." ) _ONE_OFF_MUTATIONS_GUARDRAILS: tuple[str, ...] = ( "Implement requested edits in-session via kartograph_* tools; do not ask the operator to paste JSONL manually.", "Validate before every apply; report validation errors verbatim.", "Prefer UPDATE over CREATE for existing slugs; strict CREATE rejects duplicate ids/slugs.", + "For 5+ instance changes: batch with list_instances_by_type + file-based validate/apply — never iterate per-instance tool calls.", + "Use kartograph_list_instances_by_type for DELETE targets (returns id); do not call search_by_slug once per slug.", + "Generate JSONL programmatically (Bash/python or Write helpers/bulk_*.jsonl); validate once, apply once.", "Do not start prepopulation scanners or run_scanner.py unless the operator explicitly requests bulk import.", - "DELETE on nodes requires explicit operator confirmation (cascades connected edges).", + "DELETE on nodes cascades edges — still use batch JSONL when operator asked to remove many instances.", "Schema saves require confirmation unless the operator explicitly approved.", - "Route bulk enrichment requests to Extraction Jobs mode; route greenfield ontology design to Initial Schema Design.", + "Route ongoing enrichment pipelines to Extraction Jobs mode; route greenfield ontology design to Initial Schema Design.", ) diff --git a/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py b/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py index f80a15b48..732ade2c0 100644 --- a/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py +++ b/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py @@ -4,6 +4,6 @@ HELPERS_DIR = Path(__file__).resolve().parent HELPERS_CONTAINER_DIR = "helpers" -HELPER_SCRIPT_NAMES = ("workload-mutations.sh",) +HELPER_SCRIPT_NAMES = ("workload-mutations.sh", "sync_instances.py") HELPER_RESOURCE_NAMES = ("mutation-examples.jsonl",) HELPER_BUNDLE_NAMES = HELPER_SCRIPT_NAMES + HELPER_RESOURCE_NAMES diff --git a/src/api/extraction/infrastructure/extraction_job_helpers/sync_instances.py b/src/api/extraction/infrastructure/extraction_job_helpers/sync_instances.py new file mode 100644 index 000000000..3bfb4b183 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_helpers/sync_instances.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +"""Diff current graph instances against a desired set and emit mutation JSONL. + +Typical bulk cleanup workflow: + + 1. Paginate ``kartograph_list_instances_by_type`` and save merged output to + ``helpers/current_<Label>.json`` (object with ``nodes`` array). + 2. Save desired slugs or scanner output to ``helpers/desired_<Label>.json``. + 3. Generate JSONL:: + + python3 helpers/sync_instances.py --entity-type Adapter \\ + --current helpers/current_Adapter.json \\ + --desired helpers/desired_Adapter.json \\ + --filter-data-source-id hyperfleet-e2e \\ + --out helpers/bulk_sync_Adapter.jsonl + + 4. ``kartograph_validate_graph_mutations_from_file`` → ``apply-from-file`` once. + +Use ``--create-missing`` with ``--data-source-id`` to emit CREATE lines for desired +slugs that are not already present in the current snapshot. +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import sys +from pathlib import Path +from typing import Any + + +def parse_current_nodes(payload: Any) -> list[dict[str, Any]]: + """Accept a list API response, a ``nodes`` array, or a bare node list.""" + if isinstance(payload, dict): + nodes = payload.get("nodes") + if isinstance(nodes, list): + return [_normalize_current_node(node, index) for index, node in enumerate(nodes)] + raise ValueError("Current snapshot object must include a nodes array") + if isinstance(payload, list): + return [_normalize_current_node(node, index) for index, node in enumerate(payload)] + raise ValueError("Current snapshot must be a JSON object or array") + + +def _normalize_current_node(node: Any, index: int) -> dict[str, Any]: + if not isinstance(node, dict): + raise ValueError(f"Current node at index {index} must be an object") + node_id = node.get("id") + slug = node.get("slug") + if not node_id or not str(node_id).strip(): + raise ValueError(f"Current node at index {index} is missing id") + if not slug or not str(slug).strip(): + raise ValueError(f"Current node at index {index} is missing slug") + properties = node.get("properties") or {} + if not isinstance(properties, dict): + raise ValueError(f"Current node at index {index} properties must be an object") + return { + "id": str(node_id).strip(), + "slug": str(slug).strip(), + "properties": properties, + } + + +def parse_desired_instances(payload: Any) -> dict[str, dict[str, Any]]: + """Return slug → {properties} for desired instances.""" + if isinstance(payload, list): + desired: dict[str, dict[str, Any]] = {} + for index, row in enumerate(payload): + if isinstance(row, str): + slug = row.strip() + if not slug: + raise ValueError(f"Desired slug at index {index} must not be empty") + desired[slug] = {"properties": {}} + continue + if not isinstance(row, dict): + raise ValueError(f"Desired entry at index {index} must be a slug string or object") + slug = row.get("slug") + if not slug or not str(slug).strip(): + raise ValueError(f"Desired entry at index {index} is missing slug") + properties = row.get("properties") or {} + if not isinstance(properties, dict): + raise ValueError(f"Desired entry at index {index} properties must be an object") + desired[str(slug).strip()] = {"properties": dict(properties)} + return desired + raise ValueError("Desired snapshot must be a JSON array") + + +def deterministic_node_id(*, entity_label: str, slug: str, tenant_id: str = "") -> str: + normalized_type = entity_label.strip().lower() + combined = f"{tenant_id}:{normalized_type}:{slug.strip()}" + digest = hashlib.sha256(combined.encode()).hexdigest()[:16] + return f"{normalized_type}:{digest}" + + +def instance_to_create_line( + *, + entity_label: str, + slug: str, + properties: dict[str, Any], + data_source_id: str, + tenant_id: str, +) -> dict[str, Any]: + set_properties = dict(properties) + set_properties.setdefault("slug", slug) + set_properties.setdefault("name", slug) + set_properties["data_source_id"] = data_source_id + return { + "op": "CREATE", + "type": "node", + "id": deterministic_node_id( + entity_label=entity_label, + slug=slug, + tenant_id=tenant_id, + ), + "label": entity_label.strip(), + "set_properties": set_properties, + } + + +def build_sync_mutations( + *, + entity_type: str, + current_nodes: list[dict[str, Any]], + desired_by_slug: dict[str, dict[str, Any]], + filter_data_source_id: str | None = None, + create_missing: bool = False, + data_source_id: str = "", + tenant_id: str = "", +) -> list[dict[str, Any]]: + """Return DELETE (and optional CREATE) mutation lines for one entity type.""" + entity_type = entity_type.strip() + if not entity_type: + raise ValueError("entity_type must not be empty") + + desired_slugs = set(desired_by_slug) + current_slugs = {node["slug"] for node in current_nodes} + lines: list[dict[str, Any]] = [] + + for node in sorted(current_nodes, key=lambda item: item["slug"]): + if filter_data_source_id: + node_ds = str(node["properties"].get("data_source_id") or "").strip() + if node_ds != filter_data_source_id.strip(): + continue + if node["slug"] not in desired_slugs: + lines.append({"op": "DELETE", "type": "node", "id": node["id"]}) + + if create_missing: + if not data_source_id.strip(): + raise ValueError("--data-source-id is required when --create-missing is set") + for slug in sorted(desired_slugs - current_slugs): + desired = desired_by_slug[slug] + lines.append( + instance_to_create_line( + entity_label=entity_type, + slug=slug, + properties=desired["properties"], + data_source_id=data_source_id.strip(), + tenant_id=tenant_id, + ) + ) + + return lines + + +def _load_json(path: Path) -> Any: + return json.loads(path.read_text(encoding="utf-8")) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Diff current graph instances against desired slugs and emit mutation JSONL.", + ) + parser.add_argument( + "--entity-type", + required=True, + help="Entity type label matching ontology exactly (case-sensitive).", + ) + parser.add_argument( + "--current", + required=True, + help="Path to JSON snapshot from kartograph_list_instances_by_type (nodes array).", + ) + parser.add_argument( + "--desired", + required=True, + help="Path to JSON array of desired slugs or scanner instances.", + ) + parser.add_argument( + "--out", + help="Write JSONL to this path; omit to write stdout.", + ) + parser.add_argument( + "--filter-data-source-id", + help="Only DELETE current nodes with this data_source_id property.", + ) + parser.add_argument( + "--create-missing", + action="store_true", + help="Emit CREATE lines for desired slugs absent from the current snapshot.", + ) + parser.add_argument( + "--data-source-id", + default="", + help="data_source_id for CREATE lines when --create-missing is set.", + ) + parser.add_argument("--tenant-id", default="", help="Tenant id for deterministic CREATE ids.") + args = parser.parse_args(argv) + + current_nodes = parse_current_nodes(_load_json(Path(args.current))) + desired_by_slug = parse_desired_instances(_load_json(Path(args.desired))) + lines = build_sync_mutations( + entity_type=args.entity_type, + current_nodes=current_nodes, + desired_by_slug=desired_by_slug, + filter_data_source_id=args.filter_data_source_id, + create_missing=args.create_missing, + data_source_id=args.data_source_id, + tenant_id=args.tenant_id, + ) + + rendered = "".join(json.dumps(line, separators=(",", ":")) + "\n" for line in lines) + if args.out: + Path(args.out).write_text(rendered, encoding="utf-8") + else: + sys.stdout.write(rendered) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py index ea8809878..623a238c9 100644 --- a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -20,6 +20,9 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: assert "smoke-test" in SCHEMA_AUTHORING_GUIDE.lower() or "smoke test" in SCHEMA_AUTHORING_GUIDE.lower() assert "## One-off mutations" in SCHEMA_AUTHORING_GUIDE assert "mutation-examples.jsonl" in SCHEMA_AUTHORING_GUIDE + assert "Bulk instance operations" in SCHEMA_AUTHORING_GUIDE + assert "sync_instances.py" in SCHEMA_AUTHORING_GUIDE + assert "validate once" in SCHEMA_AUTHORING_GUIDE.lower() assert "next_action" in SCHEMA_AUTHORING_GUIDE assert "## Relationship types" in SCHEMA_AUTHORING_GUIDE assert "one row per primary relationship label" in SCHEMA_AUTHORING_GUIDE diff --git a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py index dddcffa9f..ead0877e2 100644 --- a/src/api/tests/unit/extraction/application/test_skill_resolution_service.py +++ b/src/api/tests/unit/extraction/application/test_skill_resolution_service.py @@ -135,6 +135,9 @@ async def test_one_off_mutations_ui_mode_overlay(self) -> None: assert "One-off Mutations mode" in resolved.system_prompt assert "instance_edit_workflow" in resolved.skills + assert "bulk_instance_edit_workflow" in resolved.skills assert "schema_edit_workflow" in resolved.skills assert "confirmation_policy" in resolved.skills - assert any("DELETE on nodes" in item for item in resolved.guardrails) + assert any("5+ instance changes" in item for item in resolved.guardrails) + assert "validate once" in resolved.skills["bulk_instance_edit_workflow"] + assert "sync_instances.py" in resolved.skills["bulk_instance_edit_workflow"] diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py index afc4ef1ba..57ecfb553 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py @@ -134,6 +134,7 @@ async def test_prepare_materializes_instance_referenced_paths_and_workspace_layo assert (job_root / "mutations").is_dir() assert (job_root / "helpers" / "workload-mutations.sh").is_file() assert (job_root / "helpers" / "mutation-examples.jsonl").is_file() + assert (job_root / "helpers" / "sync_instances.py").is_file() context = json.loads((job_root / "job-context.json").read_text(encoding="utf-8")) assert context["repository_files"]["files_written"] == 1 assert probe.observations[0].files_written == 1 @@ -204,3 +205,4 @@ async def test_prepare_enriches_target_instances_with_graph_id_and_missing_prope assert target["graph_id"] == "adapter:abc123def4567890" assert target["properties_missing"] == ["resource_types", "transport"] assert (job_root / "helpers" / "mutation-examples.jsonl").is_file() + assert (job_root / "helpers" / "sync_instances.py").is_file() diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py index 207179a9f..c4ef30265 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_workdir_materializer.py @@ -197,3 +197,4 @@ def test_materializer_copies_mutation_helper_examples(tmp_path: Path) -> None: assert (session_root / "helpers" / "mutation-examples.jsonl").is_file() assert (session_root / "helpers" / "workload-mutations.sh").is_file() + assert (session_root / "helpers" / "sync_instances.py").is_file() diff --git a/src/api/tests/unit/extraction/infrastructure/test_sync_instances.py b/src/api/tests/unit/extraction/infrastructure/test_sync_instances.py new file mode 100644 index 000000000..f0c2d382d --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_sync_instances.py @@ -0,0 +1,168 @@ +"""Unit tests for helpers/sync_instances.py bulk diff JSONL generator.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from extraction.infrastructure.extraction_job_helpers import sync_instances + + +def test_parse_current_nodes_accepts_list_api_response() -> None: + payload = { + "entity_type": "Adapter", + "nodes": [ + {"id": "adapter:aaa", "slug": "keep-me", "properties": {"data_source_id": "ds-1"}}, + {"id": "adapter:bbb", "slug": "drop-me", "properties": {"data_source_id": "ds-1"}}, + ], + "total": 2, + } + + nodes = sync_instances.parse_current_nodes(payload) + + assert len(nodes) == 2 + assert nodes[0]["id"] == "adapter:aaa" + + +def test_parse_desired_slugs_accepts_slug_array() -> None: + desired = sync_instances.parse_desired_instances(["keep-me", "new-one"]) + + assert set(desired) == {"keep-me", "new-one"} + assert desired["keep-me"]["properties"] == {} + + +def test_parse_desired_slugs_accepts_scanner_instances_array() -> None: + payload = [ + {"slug": "keep-me", "properties": {"name": "Keep Me"}}, + {"slug": "new-one", "properties": {"name": "New One"}}, + ] + + desired = sync_instances.parse_desired_instances(payload) + + assert desired["keep-me"]["properties"]["name"] == "Keep Me" + + +def test_build_sync_mutations_emits_delete_for_extra_current_nodes() -> None: + current = [ + {"id": "adapter:aaa", "slug": "keep-me", "properties": {}}, + {"id": "adapter:bbb", "slug": "drop-me", "properties": {}}, + ] + desired = sync_instances.parse_desired_instances(["keep-me"]) + + lines = sync_instances.build_sync_mutations( + entity_type="Adapter", + current_nodes=current, + desired_by_slug=desired, + ) + + assert len(lines) == 1 + assert lines[0] == {"op": "DELETE", "type": "node", "id": "adapter:bbb"} + + +def test_build_sync_mutations_respects_data_source_filter() -> None: + current = [ + { + "id": "adapter:aaa", + "slug": "drop-me", + "properties": {"data_source_id": "hyperfleet-e2e"}, + }, + { + "id": "adapter:bbb", + "slug": "other-ds", + "properties": {"data_source_id": "other"}, + }, + ] + desired = sync_instances.parse_desired_instances([]) + + lines = sync_instances.build_sync_mutations( + entity_type="Adapter", + current_nodes=current, + desired_by_slug=desired, + filter_data_source_id="hyperfleet-e2e", + ) + + assert lines == [{"op": "DELETE", "type": "node", "id": "adapter:aaa"}] + + +def test_build_sync_mutations_create_missing_adds_create_lines() -> None: + current = [{"id": "adapter:aaa", "slug": "keep-me", "properties": {}}] + desired = sync_instances.parse_desired_instances( + [{"slug": "keep-me", "properties": {"name": "Keep Me"}}] + ) + desired["new-one"] = {"properties": {"name": "New One", "transport": "maestro"}} + + lines = sync_instances.build_sync_mutations( + entity_type="Adapter", + current_nodes=current, + desired_by_slug=desired, + create_missing=True, + data_source_id="hyperfleet-e2e", + ) + + assert len(lines) == 1 + create_line = lines[0] + assert create_line["op"] == "CREATE" + assert create_line["type"] == "node" + assert create_line["label"] == "Adapter" + assert create_line["set_properties"]["slug"] == "new-one" + assert create_line["set_properties"]["data_source_id"] == "hyperfleet-e2e" + + +def test_build_sync_mutations_skips_create_when_slug_already_exists() -> None: + current = [{"id": "adapter:aaa", "slug": "existing", "properties": {}}] + desired = sync_instances.parse_desired_instances(["existing", "brand-new"]) + + lines = sync_instances.build_sync_mutations( + entity_type="Adapter", + current_nodes=current, + desired_by_slug=desired, + create_missing=True, + data_source_id="hyperfleet-e2e", + ) + + assert len(lines) == 1 + assert lines[0]["set_properties"]["slug"] == "brand-new" + + +def test_main_writes_jsonl_file(tmp_path: Path) -> None: + current_path = tmp_path / "current.json" + desired_path = tmp_path / "desired.json" + out_path = tmp_path / "bulk.jsonl" + current_path.write_text( + json.dumps( + { + "nodes": [ + {"id": "adapter:aaa", "slug": "keep", "properties": {}}, + {"id": "adapter:bbb", "slug": "drop", "properties": {}}, + ] + } + ), + encoding="utf-8", + ) + desired_path.write_text(json.dumps(["keep"]), encoding="utf-8") + + exit_code = sync_instances.main( + [ + "--entity-type", + "Adapter", + "--current", + str(current_path), + "--desired", + str(desired_path), + "--out", + str(out_path), + ] + ) + + assert exit_code == 0 + lines = out_path.read_text(encoding="utf-8").strip().splitlines() + assert len(lines) == 1 + assert json.loads(lines[0])["id"] == "adapter:bbb" + + +def test_main_requires_entity_type() -> None: + with pytest.raises(SystemExit) as exc_info: + sync_instances.main(["--current", "a.json", "--desired", "b.json"]) + assert exc_info.value.code != 0 diff --git a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue index 3e6f266fb..2292279f8 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue @@ -1,17 +1,14 @@ <script setup lang="ts"> import { computed, ref, watch } from 'vue' import { toast } from 'vue-sonner' -import { ChevronDown, Loader2, RefreshCw } from 'lucide-vue-next' -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Loader2, RefreshCw } from 'lucide-vue-next' +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' import { Button } from '@/components/ui/button' +import GraphDesignEntityTypeList from '@/components/graph-management/GraphDesignEntityTypeList.vue' import { type DesignArtifactEntityType, type DesignArtifactsResponse, - pageSlice, - prepopulationBadgeClass, - prepopulationCardClass, - prepopulationLabel, } from '@/utils/kgDesignArtifacts' const props = withDefaults( @@ -140,139 +137,11 @@ defineExpose({ refresh: fetchEntities }) </Badge> </div> - <div class="space-y-1.5"> - <Card - v-for="row in entityRows" - :key="row.type" - :class="['overflow-hidden', prepopulationCardClass(row.prepopulated_instances)]" - > - <details class="group"> - <summary - class="flex cursor-pointer list-none items-center gap-2 px-2.5 py-2 [&::-webkit-details-marker]:hidden" - > - <ChevronDown - class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" - /> - <div class="flex min-w-0 flex-1 flex-wrap items-center gap-1.5"> - <span class="text-sm font-semibold leading-tight">{{ row.type }}</span> - <Badge - variant="outline" - class="h-5 px-1.5 text-[10px]" - :class="prepopulationBadgeClass(row.prepopulated_instances)" - > - {{ prepopulationLabel(row.prepopulated_instances) }} - </Badge> - <Badge variant="secondary" class="h-5 px-1.5 text-[10px]"> - {{ row.instance_count }} instance{{ row.instance_count === 1 ? '' : 's' }} - </Badge> - </div> - </summary> - <div class="space-y-2 border-t px-2.5 pb-2.5 pt-0"> - <p v-if="row.description" class="pt-2 text-xs leading-snug text-muted-foreground"> - {{ row.description }} - </p> - <div v-else class="pt-1.5 text-xs italic text-muted-foreground">No description</div> - - <div class="space-y-1"> - <p class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> - Properties - </p> - <div - v-if="row.property_definitions && Object.keys(row.property_definitions).length > 0" - class="divide-y rounded-md border text-xs" - > - <div - v-for="(label, key) in row.property_definitions" - :key="key" - class="flex flex-wrap items-center gap-x-1.5 gap-y-0.5 px-2 py-1" - > - <code class="rounded bg-muted px-1 py-0.5 font-mono text-[10px]">{{ key }}</code> - <span class="text-muted-foreground">{{ label }}</span> - <Badge - v-if="row.required_properties?.includes(String(key))" - variant="outline" - class="h-4 px-1 text-[9px]" - > - required - </Badge> - <Badge - v-else-if="row.optional_properties?.includes(String(key))" - variant="outline" - class="h-4 px-1 text-[9px] opacity-70" - > - optional - </Badge> - </div> - </div> - <p v-else class="text-xs text-muted-foreground">No property definitions</p> - </div> - - <details v-if="(row.instances?.length ?? 0) > 0" class="group/inst rounded-md border"> - <summary - class="flex cursor-pointer list-none items-center gap-1.5 px-2 py-1.5 text-xs font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" - > - <ChevronDown - class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" - /> - Instances - <span v-if="row.instances_truncated" class="font-normal text-muted-foreground"> - (showing {{ row.instances_returned ?? row.instances?.length ?? 0 }} of - {{ row.instance_count }}) - </span> - </summary> - <div class="space-y-2 border-t p-2"> - <ul class="space-y-1 text-xs"> - <li - v-for="(inst, idx) in pageSlice(instancePage, row.type, row.instances || []).items" - :key="inst.slug ?? idx" - class="rounded-md bg-muted/40 px-2 py-1" - > - <div class="mb-0.5 font-mono text-[10px] text-muted-foreground"> - {{ inst.slug ?? '—' }} - </div> - <pre class="max-h-24 overflow-y-auto whitespace-pre-wrap break-all text-[10px] leading-snug">{{ - JSON.stringify(inst.properties ?? {}, null, 2) - }}</pre> - </li> - </ul> - <div - v-if="pageSlice(instancePage, row.type, row.instances || []).total > 20" - class="flex flex-wrap items-center gap-1.5" - @click.stop - > - <Button - variant="outline" - size="sm" - class="h-7 px-2 text-xs" - :disabled="pageSlice(instancePage, row.type, row.instances || []).page <= 0" - @click.stop.prevent="setInstancePage(row.type, pageSlice(instancePage, row.type, row.instances || []).page - 1)" - > - Previous - </Button> - <span class="text-xs text-muted-foreground"> - Page {{ pageSlice(instancePage, row.type, row.instances || []).page + 1 }} / - {{ pageSlice(instancePage, row.type, row.instances || []).totalPages }} - ({{ row.instances_truncated ? `${row.instances_returned ?? row.instances?.length ?? 0} loaded of ${row.instance_count}` : pageSlice(instancePage, row.type, row.instances || []).total }} total) - </span> - <Button - variant="outline" - size="sm" - class="h-7 px-2 text-xs" - :disabled=" - pageSlice(instancePage, row.type, row.instances || []).page - >= pageSlice(instancePage, row.type, row.instances || []).totalPages - 1 - " - @click.stop.prevent="setInstancePage(row.type, pageSlice(instancePage, row.type, row.instances || []).page + 1)" - > - Next - </Button> - </div> - </div> - </details> - </div> - </details> - </Card> - </div> + <GraphDesignEntityTypeList + :rows="entityRows" + :instance-page="instancePage" + @update:instance-page="setInstancePage" + /> <p v-if="data.limits.entity_instances_truncated" diff --git a/src/dev-ui/app/components/graph-management/GraphDesignEntityTypeList.vue b/src/dev-ui/app/components/graph-management/GraphDesignEntityTypeList.vue new file mode 100644 index 000000000..a2723a6e7 --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphDesignEntityTypeList.vue @@ -0,0 +1,157 @@ +<script setup lang="ts"> +import { ChevronDown } from 'lucide-vue-next' +import { Card } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { + type DesignArtifactEntityType, + pageSlice, + prepopulationBadgeClass, + prepopulationCardClass, + prepopulationLabel, +} from '@/utils/kgDesignArtifacts' + +defineProps<{ + rows: DesignArtifactEntityType[] + instancePage: Record<string, number> +}>() + +const emit = defineEmits<{ + 'update:instancePage': [key: string, page: number] +}>() +</script> + +<template> + <div class="space-y-1.5"> + <Card + v-for="row in rows" + :key="row.type" + :class="['overflow-hidden', prepopulationCardClass(row.prepopulated_instances)]" + > + <details class="group"> + <summary + class="flex cursor-pointer list-none items-center gap-2 px-2.5 py-2 [&::-webkit-details-marker]:hidden" + > + <ChevronDown + class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" + /> + <div class="flex min-w-0 flex-1 flex-wrap items-center gap-1.5"> + <span class="text-sm font-semibold leading-tight">{{ row.type }}</span> + <Badge + variant="outline" + class="h-5 px-1.5 text-[10px]" + :class="prepopulationBadgeClass(row.prepopulated_instances)" + > + {{ prepopulationLabel(row.prepopulated_instances) }} + </Badge> + <Badge variant="secondary" class="h-5 px-1.5 text-[10px]"> + {{ row.instance_count }} instance{{ row.instance_count === 1 ? '' : 's' }} + </Badge> + </div> + </summary> + <div class="space-y-2 border-t px-2.5 pb-2.5 pt-0"> + <p v-if="row.description" class="pt-2 text-xs leading-snug text-muted-foreground"> + {{ row.description }} + </p> + <div v-else class="pt-1.5 text-xs italic text-muted-foreground">No description</div> + + <div class="space-y-1"> + <p class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> + Properties + </p> + <div + v-if="row.property_definitions && Object.keys(row.property_definitions).length > 0" + class="divide-y rounded-md border text-xs" + > + <div + v-for="(label, key) in row.property_definitions" + :key="key" + class="flex flex-wrap items-center gap-x-1.5 gap-y-0.5 px-2 py-1" + > + <code class="rounded bg-muted px-1 py-0.5 font-mono text-[10px]">{{ key }}</code> + <span class="text-muted-foreground">{{ label }}</span> + <Badge + v-if="row.required_properties?.includes(String(key))" + variant="outline" + class="h-4 px-1 text-[9px]" + > + required + </Badge> + <Badge + v-else-if="row.optional_properties?.includes(String(key))" + variant="outline" + class="h-4 px-1 text-[9px] opacity-70" + > + optional + </Badge> + </div> + </div> + <p v-else class="text-xs text-muted-foreground">No property definitions</p> + </div> + + <details v-if="(row.instances?.length ?? 0) > 0" class="group/inst rounded-md border"> + <summary + class="flex cursor-pointer list-none items-center gap-1.5 px-2 py-1.5 text-xs font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" + > + <ChevronDown + class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" + /> + Instances + <span v-if="row.instances_truncated" class="font-normal text-muted-foreground"> + (showing {{ row.instances_returned ?? row.instances?.length ?? 0 }} of + {{ row.instance_count }}) + </span> + </summary> + <div class="space-y-2 border-t p-2"> + <ul class="space-y-1 text-xs"> + <li + v-for="(inst, idx) in pageSlice(instancePage, row.type, row.instances || []).items" + :key="inst.slug ?? idx" + class="rounded-md bg-muted/40 px-2 py-1" + > + <div class="mb-0.5 font-mono text-[10px] text-muted-foreground"> + {{ inst.slug ?? '—' }} + </div> + <pre class="max-h-24 overflow-y-auto whitespace-pre-wrap break-all text-[10px] leading-snug">{{ + JSON.stringify(inst.properties ?? {}, null, 2) + }}</pre> + </li> + </ul> + <div + v-if="pageSlice(instancePage, row.type, row.instances || []).total > 20" + class="flex flex-wrap items-center gap-1.5" + @click.stop + > + <Button + variant="outline" + size="sm" + class="h-7 px-2 text-xs" + :disabled="pageSlice(instancePage, row.type, row.instances || []).page <= 0" + @click.stop.prevent="emit('update:instancePage', row.type, pageSlice(instancePage, row.type, row.instances || []).page - 1)" + > + Previous + </Button> + <span class="text-xs text-muted-foreground"> + Page {{ pageSlice(instancePage, row.type, row.instances || []).page + 1 }} / + {{ pageSlice(instancePage, row.type, row.instances || []).totalPages }} + </span> + <Button + variant="outline" + size="sm" + class="h-7 px-2 text-xs" + :disabled=" + pageSlice(instancePage, row.type, row.instances || []).page + >= pageSlice(instancePage, row.type, row.instances || []).totalPages - 1 + " + @click.stop.prevent="emit('update:instancePage', row.type, pageSlice(instancePage, row.type, row.instances || []).page + 1)" + > + Next + </Button> + </div> + </div> + </details> + </div> + </details> + </Card> + </div> +</template> diff --git a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipTypeList.vue b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipTypeList.vue new file mode 100644 index 000000000..bbc557fe2 --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipTypeList.vue @@ -0,0 +1,161 @@ +<script setup lang="ts"> +import { ChevronDown } from 'lucide-vue-next' +import { Card } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { + type DesignArtifactRelationshipType, + pageSlice, + prepopulationBadgeClass, + prepopulationCardClass, + prepopulationLabel, +} from '@/utils/kgDesignArtifacts' + +defineProps<{ + rows: DesignArtifactRelationshipType[] + instancePage: Record<string, number> +}>() + +const emit = defineEmits<{ + 'update:instancePage': [key: string, page: number] +}>() +</script> + +<template> + <div class="space-y-1.5"> + <Card + v-for="rel in rows" + :key="rel.key" + :class="['overflow-hidden', prepopulationCardClass(rel.prepopulated_instances)]" + > + <details class="group"> + <summary + class="flex cursor-pointer list-none items-start gap-2 px-2.5 py-2 [&::-webkit-details-marker]:hidden" + > + <ChevronDown + class="mt-0.5 size-3.5 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" + /> + <div class="min-w-0 flex-1 space-y-0.5"> + <div class="flex min-w-0 flex-wrap items-center gap-1.5"> + <span class="text-sm font-semibold leading-tight text-foreground">{{ + rel.source_entity_type + }}</span> + <Badge variant="secondary" class="h-5 px-1.5 font-mono text-[10px]">{{ + rel.relationship_type + }}</Badge> + <template v-if="rel.reverse_relationship_type"> + <span class="text-[10px] text-muted-foreground">/</span> + <Badge variant="outline" class="h-5 px-1.5 font-mono text-[10px]">{{ + rel.reverse_relationship_type + }}</Badge> + </template> + <Badge + variant="outline" + class="h-5 px-1.5 text-[10px]" + :class="prepopulationBadgeClass(rel.prepopulated_instances)" + > + {{ prepopulationLabel(rel.prepopulated_instances) }} + </Badge> + <span class="text-xs text-muted-foreground">→</span> + <span class="text-sm font-semibold leading-tight text-foreground">{{ + rel.target_entity_type + }}</span> + <Badge variant="outline" class="h-5 px-1.5 text-[10px]"> + {{ rel.instance_count }} instance{{ rel.instance_count === 1 ? '' : 's' }} + </Badge> + </div> + <p class="truncate font-mono text-[10px] text-muted-foreground">{{ rel.key }}</p> + </div> + </summary> + <div class="space-y-2 border-t px-2.5 pb-2.5 pt-0"> + <p v-if="rel.description" class="pt-2 text-xs leading-snug text-muted-foreground"> + {{ rel.description }} + </p> + <div v-else class="pt-1.5 text-xs italic text-muted-foreground">No description</div> + + <div class="space-y-1"> + <p class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> + Parameters + </p> + <div + v-if="rel.parameter_definitions && Object.keys(rel.parameter_definitions).length > 0" + class="divide-y rounded-md border text-xs" + > + <div + v-for="(label, key) in rel.parameter_definitions" + :key="key" + class="flex flex-wrap items-center gap-x-1.5 gap-y-0.5 px-2 py-1" + > + <code class="rounded bg-muted px-1 py-0.5 font-mono text-[10px]">{{ key }}</code> + <span class="text-muted-foreground">{{ label }}</span> + </div> + </div> + <p v-else class="text-xs text-muted-foreground">No parameter definitions</p> + </div> + + <details v-if="rel.instances.length > 0" class="group/inst rounded-md border"> + <summary + class="flex cursor-pointer list-none items-center gap-1.5 px-2 py-1.5 text-xs font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" + > + <ChevronDown + class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" + /> + Instances + <span v-if="rel.instances_truncated" class="font-normal text-muted-foreground"> + (showing {{ rel.instances_returned ?? rel.instances.length }} of + {{ rel.instance_count }}) + </span> + </summary> + <div class="space-y-2 border-t p-2"> + <ul class="space-y-1 text-xs"> + <li + v-for="(inst, idx) in pageSlice(instancePage, rel.key, rel.instances).items" + :key="`${rel.key}-${idx}`" + class="rounded-md bg-muted/40 px-2 py-1" + > + <div class="mb-0.5 font-mono text-[10px] text-muted-foreground"> + {{ inst.source_slug }} --{{ rel.relationship_type }}--> {{ inst.target_slug }} + </div> + <pre class="max-h-24 overflow-y-auto whitespace-pre-wrap break-all text-[10px] leading-snug">{{ + JSON.stringify(inst.properties ?? {}, null, 2) + }}</pre> + </li> + </ul> + <div + v-if="pageSlice(instancePage, rel.key, rel.instances).total > 20" + class="flex flex-wrap items-center gap-1.5" + @click.stop + > + <Button + variant="outline" + size="sm" + class="h-7 px-2 text-xs" + :disabled="pageSlice(instancePage, rel.key, rel.instances).page <= 0" + @click.stop.prevent="emit('update:instancePage', rel.key, pageSlice(instancePage, rel.key, rel.instances).page - 1)" + > + Previous + </Button> + <span class="text-xs text-muted-foreground"> + Page {{ pageSlice(instancePage, rel.key, rel.instances).page + 1 }} / + {{ pageSlice(instancePage, rel.key, rel.instances).totalPages }} + </span> + <Button + variant="outline" + size="sm" + class="h-7 px-2 text-xs" + :disabled=" + pageSlice(instancePage, rel.key, rel.instances).page + >= pageSlice(instancePage, rel.key, rel.instances).totalPages - 1 + " + @click.stop.prevent="emit('update:instancePage', rel.key, pageSlice(instancePage, rel.key, rel.instances).page + 1)" + > + Next + </Button> + </div> + </div> + </details> + </div> + </details> + </Card> + </div> +</template> diff --git a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue index 90ba6d09a..67bb8e39d 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue @@ -1,17 +1,12 @@ <script setup lang="ts"> import { computed, ref, watch } from 'vue' import { toast } from 'vue-sonner' -import { ChevronDown, Loader2, RefreshCw } from 'lucide-vue-next' +import { Loader2, RefreshCw } from 'lucide-vue-next' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' import { Button } from '@/components/ui/button' -import { - type DesignArtifactsResponse, - pageSlice, - prepopulationBadgeClass, - prepopulationCardClass, - prepopulationLabel, -} from '@/utils/kgDesignArtifacts' +import GraphDesignRelationshipTypeList from '@/components/graph-management/GraphDesignRelationshipTypeList.vue' +import { type DesignArtifactsResponse } from '@/utils/kgDesignArtifacts' const props = withDefaults( defineProps<{ @@ -129,147 +124,11 @@ defineExpose({ refresh: fetchRelationships }) </Badge> </div> - <div class="space-y-1.5"> - <Card - v-for="rel in relationshipRows" - :key="rel.key" - :class="['overflow-hidden', prepopulationCardClass(rel.prepopulated_instances)]" - > - <details class="group"> - <summary - class="flex cursor-pointer list-none items-start gap-2 px-2.5 py-2 [&::-webkit-details-marker]:hidden" - > - <ChevronDown - class="mt-0.5 size-3.5 shrink-0 text-muted-foreground transition-transform group-open:rotate-180" - /> - <div class="min-w-0 flex-1 space-y-0.5"> - <div class="flex min-w-0 flex-wrap items-center gap-1.5"> - <span class="text-sm font-semibold leading-tight text-foreground">{{ - rel.source_entity_type - }}</span> - <Badge variant="secondary" class="h-5 px-1.5 font-mono text-[10px]">{{ - rel.relationship_type - }}</Badge> - <template v-if="rel.reverse_relationship_type"> - <span class="text-[10px] text-muted-foreground">/</span> - <Badge variant="outline" class="h-5 px-1.5 font-mono text-[10px]">{{ - rel.reverse_relationship_type - }}</Badge> - </template> - <Badge - variant="outline" - class="h-5 px-1.5 text-[10px]" - :class="prepopulationBadgeClass(rel.prepopulated_instances)" - > - {{ prepopulationLabel(rel.prepopulated_instances) }} - </Badge> - <span class="text-xs text-muted-foreground">→</span> - <span class="text-sm font-semibold leading-tight text-foreground">{{ - rel.target_entity_type - }}</span> - <Badge variant="outline" class="h-5 px-1.5 text-[10px]"> - {{ rel.instance_count }} instance{{ rel.instance_count === 1 ? '' : 's' }} - </Badge> - </div> - <p class="truncate font-mono text-[10px] text-muted-foreground">{{ rel.key }}</p> - </div> - </summary> - <div class="space-y-2 border-t px-2.5 pb-2.5 pt-0"> - <p v-if="rel.description" class="pt-2 text-xs leading-snug text-muted-foreground"> - {{ rel.description }} - </p> - <div v-else class="pt-1.5 text-xs italic text-muted-foreground">No description</div> - - <div class="space-y-1"> - <p class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> - Parameters - </p> - <div - v-if="rel.parameter_definitions && Object.keys(rel.parameter_definitions).length > 0" - class="divide-y rounded-md border text-xs" - > - <div - v-for="(label, key) in rel.parameter_definitions" - :key="key" - class="flex flex-wrap items-center gap-x-1.5 gap-y-0.5 px-2 py-1" - > - <code class="rounded bg-muted px-1 py-0.5 font-mono text-[10px]">{{ key }}</code> - <span class="text-muted-foreground">{{ label }}</span> - </div> - </div> - <p v-else class="text-xs text-muted-foreground">No parameter definitions</p> - </div> - - <details v-if="rel.instances.length > 0" class="group/inst rounded-md border"> - <summary - class="flex cursor-pointer list-none items-center gap-1.5 px-2 py-1.5 text-xs font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" - > - <ChevronDown - class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" - /> - Instances - <span v-if="rel.instances_truncated" class="font-normal text-muted-foreground"> - (showing {{ rel.instances_returned ?? rel.instances.length }} of - {{ rel.instance_count }}) - </span> - </summary> - <div class="space-y-2 border-t p-2"> - <ul class="space-y-1 text-xs"> - <li - v-for="(inst, idx) in pageSlice(instancePage, rel.key, rel.instances).items" - :key="`${rel.key}-${idx}`" - class="rounded-md bg-muted/40 px-2 py-1" - > - <div class="mb-0.5 font-mono text-[10px] text-muted-foreground"> - {{ inst.source_slug }} --{{ rel.relationship_type }}--> {{ inst.target_slug }} - </div> - <pre class="max-h-24 overflow-y-auto whitespace-pre-wrap break-all text-[10px] leading-snug">{{ - JSON.stringify(inst.properties ?? {}, null, 2) - }}</pre> - </li> - </ul> - <div - v-if="pageSlice(instancePage, rel.key, rel.instances).total > 20" - class="flex flex-wrap items-center gap-1.5" - @click.stop - > - <Button - variant="outline" - size="sm" - class="h-7 px-2 text-xs" - :disabled="pageSlice(instancePage, rel.key, rel.instances).page <= 0" - @click.stop.prevent=" - setInstancePage(rel.key, pageSlice(instancePage, rel.key, rel.instances).page - 1) - " - > - Previous - </Button> - <span class="text-xs text-muted-foreground"> - Page {{ pageSlice(instancePage, rel.key, rel.instances).page + 1 }} / - {{ pageSlice(instancePage, rel.key, rel.instances).totalPages }} - ({{ rel.instances_truncated ? `${rel.instances_returned ?? rel.instances.length} loaded of ${rel.instance_count}` : pageSlice(instancePage, rel.key, rel.instances).total }} total) - </span> - <Button - variant="outline" - size="sm" - class="h-7 px-2 text-xs" - :disabled=" - pageSlice(instancePage, rel.key, rel.instances).page - >= pageSlice(instancePage, rel.key, rel.instances).totalPages - 1 - " - @click.stop.prevent=" - setInstancePage(rel.key, pageSlice(instancePage, rel.key, rel.instances).page + 1) - " - > - Next - </Button> - </div> - </div> - </details> - </div> - </details> - </Card> - </div> + <GraphDesignRelationshipTypeList + :rows="relationshipRows" + :instance-page="instancePage" + @update:instance-page="setInstancePage" + /> <p v-if="data.limits.relationship_instances_truncated" diff --git a/src/dev-ui/app/components/graph-management/GraphSchemaExplorer.vue b/src/dev-ui/app/components/graph-management/GraphSchemaExplorer.vue new file mode 100644 index 000000000..201a71575 --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphSchemaExplorer.vue @@ -0,0 +1,214 @@ +<script setup lang="ts"> +import { computed, ref, watch } from 'vue' +import { toast } from 'vue-sonner' +import { Box, Link2, Loader2, Network, RefreshCw } from 'lucide-vue-next' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs' +import GraphDesignEntityTypeList from '@/components/graph-management/GraphDesignEntityTypeList.vue' +import GraphDesignRelationshipTypeList from '@/components/graph-management/GraphDesignRelationshipTypeList.vue' +import { + type DesignArtifactEntityType, + type DesignArtifactsResponse, +} from '@/utils/kgDesignArtifacts' + +const props = withDefaults( + defineProps<{ + kgId: string + reloadNonce?: number + }>(), + { reloadNonce: 0 }, +) + +const { apiFetch } = useApiClient() + +const loading = ref(true) +const data = ref<DesignArtifactsResponse | null>(null) +const activeTab = ref<'entities' | 'relationships'>('entities') +const entityInstancePage = ref<Record<string, number>>({}) +const relationshipInstancePage = ref<Record<string, number>>({}) + +async function fetchArtifacts(options: { preserveUiState?: boolean } = {}) { + if (!props.kgId) { + data.value = null + loading.value = false + return + } + const preserveUiState = options.preserveUiState === true && data.value !== null + if (!preserveUiState) { + loading.value = true + entityInstancePage.value = {} + relationshipInstancePage.value = {} + } + try { + data.value = await apiFetch<DesignArtifactsResponse>( + `/management/knowledge-graphs/${props.kgId}/design-artifacts`, + { query: { limit: 500 } }, + ) + } catch (err: unknown) { + toast.error('Failed to load graph schema', { + description: err instanceof Error ? err.message : 'Request failed', + }) + data.value = null + } finally { + loading.value = false + } +} + +const entityRows = computed((): DesignArtifactEntityType[] => { + if (!data.value?.entities) return [] + return Object.entries(data.value.entities).map(([type, def]) => ({ + type, + ...def, + })) +}) + +const relationshipRows = computed(() => data.value?.relationships ?? []) + +const hasSchema = computed( + () => Boolean(data.value?.found && (entityRows.value.length > 0 || relationshipRows.value.length > 0)), +) + +watch( + () => [props.kgId, props.reloadNonce] as const, + ([, reloadNonce]) => { + void fetchArtifacts({ preserveUiState: reloadNonce > 0 }) + }, + { immediate: true }, +) + +defineExpose({ refresh: fetchArtifacts }) +</script> + +<template> + <Card class="overflow-hidden"> + <CardHeader class="gap-3 space-y-0 border-b bg-muted/20 pb-4"> + <div class="flex flex-wrap items-start justify-between gap-3"> + <div class="space-y-1"> + <CardTitle class="flex items-center gap-2 text-base"> + <Network class="size-4 text-primary" /> + Graph schema & instances + </CardTitle> + <CardDescription class="max-w-2xl"> + Unified view of entity and relationship types from the ontology, with live instance + inventory from the graph database. Expand any type to inspect properties and instances. + </CardDescription> + </div> + <Button variant="outline" size="sm" :disabled="loading" @click="fetchArtifacts"> + <Loader2 v-if="loading" class="mr-1.5 size-3.5 animate-spin" /> + <RefreshCw v-else class="mr-1.5 size-3.5" /> + Refresh + </Button> + </div> + <div v-if="data && hasSchema" class="flex flex-wrap gap-2 pt-1"> + <Badge variant="secondary" class="gap-1.5 px-2.5 py-1"> + <Box class="size-3.5" /> + {{ data.counts.entity_types }} entity type{{ data.counts.entity_types === 1 ? '' : 's' }} + · {{ data.counts.entity_instances }} instance{{ data.counts.entity_instances === 1 ? '' : 's' }} + </Badge> + <Badge variant="secondary" class="gap-1.5 px-2.5 py-1"> + <Link2 class="size-3.5" /> + {{ data.counts.relationship_types }} relationship type{{ + data.counts.relationship_types === 1 ? '' : 's' + }} + · {{ data.counts.relationship_instances }} edge instance{{ + data.counts.relationship_instances === 1 ? '' : 's' + }} + </Badge> + </div> + </CardHeader> + + <CardContent class="p-0"> + <div v-if="loading && !data" class="flex items-center justify-center py-20"> + <Loader2 class="size-8 animate-spin text-muted-foreground" /> + </div> + + <div v-else-if="!data || !hasSchema" class="space-y-3 px-6 py-10 text-center"> + <p class="text-sm font-medium"> + {{ !data?.found ? 'No ontology saved yet' : 'No schema types defined yet' }} + </p> + <p class="mx-auto max-w-md text-sm text-muted-foreground"> + Use Graph Management to design entity and relationship types. When changes are saved, + click Refresh to browse types and instances here. + </p> + </div> + + <Tabs v-else v-model="activeTab" class="w-full"> + <div class="border-b px-4 pt-3"> + <TabsList class="grid h-9 w-full max-w-md grid-cols-2"> + <TabsTrigger value="entities" class="gap-1.5 text-xs"> + <Box class="size-3.5" /> + Entities + <Badge variant="outline" class="ml-0.5 h-4 px-1 text-[10px]"> + {{ entityRows.length }} + </Badge> + </TabsTrigger> + <TabsTrigger value="relationships" class="gap-1.5 text-xs"> + <Link2 class="size-3.5" /> + Relationships + <Badge variant="outline" class="ml-0.5 h-4 px-1 text-[10px]"> + {{ relationshipRows.length }} + </Badge> + </TabsTrigger> + </TabsList> + </div> + + <div + class="flex flex-wrap items-center gap-2 border-b bg-muted/15 px-4 py-2 text-xs" + role="note" + aria-label="Prepopulation strategy color guide" + > + <span class="font-medium text-muted-foreground">Prepopulation:</span> + <Badge + variant="outline" + class="h-5 border-cyan-500/40 bg-cyan-500/10 px-1.5 text-[10px] text-cyan-700 dark:text-cyan-300" + > + true + </Badge> + <Badge + variant="outline" + class="h-5 border-emerald-500/40 bg-emerald-500/10 px-1.5 text-[10px] text-emerald-700 dark:text-emerald-300" + > + false + </Badge> + <span class="text-muted-foreground"> + · Bidirectional pairs show forward / inverse labels on one row + </span> + </div> + + <TabsContent value="entities" class="mt-0 space-y-3 px-4 py-4"> + <GraphDesignEntityTypeList + :rows="entityRows" + :instance-page="entityInstancePage" + @update:instance-page="(key, page) => { entityInstancePage = { ...entityInstancePage, [key]: page } }" + /> + <p + v-if="data.limits.entity_instances_truncated" + class="text-xs text-muted-foreground" + > + Browsable entity instances capped at {{ data.limits.entity_instances_returned }} of + {{ data.counts.entity_instances }} total (API limit {{ data.limits.requested }}). Type + badges show full counts. + </p> + </TabsContent> + + <TabsContent value="relationships" class="mt-0 space-y-3 px-4 py-4"> + <GraphDesignRelationshipTypeList + :rows="relationshipRows" + :instance-page="relationshipInstancePage" + @update:instance-page="(key, page) => { relationshipInstancePage = { ...relationshipInstancePage, [key]: page } }" + /> + <p + v-if="data.limits.relationship_instances_truncated" + class="text-xs text-muted-foreground" + > + Browsable relationship instances capped at + {{ data.limits.relationship_instances_returned }} of + {{ data.counts.relationship_instances }} total (API limit {{ data.limits.requested }}). + </p> + </TabsContent> + </Tabs> + </CardContent> + </Card> +</template> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 1698c7712..93a0bb741 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -49,6 +49,7 @@ import GraphDesignRelationshipsPanel from '@/components/graph-management/GraphDe import GraphExtractionJobsWorkspace from '@/components/graph-management/GraphExtractionJobsWorkspace.vue' import GraphExtractionArchivedHistory from '@/components/graph-management/GraphExtractionArchivedHistory.vue' import GraphManagementMutationAuthoringPanel from '@/components/graph-management/GraphManagementMutationAuthoringPanel.vue' +import GraphSchemaExplorer from '@/components/graph-management/GraphSchemaExplorer.vue' import { GRAPH_MANAGEMENT_INPUT_PLACEHOLDERS, GRAPH_MANAGEMENT_MODE_LABELS, @@ -73,6 +74,7 @@ import { resolveSchemaRailSelection, } from '@/utils/kgGraphManagementArtifacts' import { + buildDataSourcesStepUrl, buildManageStepUrl, parseManageStepQuery, } from '@/utils/kgManageWorkspace' @@ -234,6 +236,10 @@ const designArtifactsRefreshing = ref(false) const activeStep = computed(() => parseManageStepQuery(route.query.step)) const showOverview = computed(() => activeStep.value === null) +const dataSourcesDetailUrl = computed(() => + buildDataSourcesStepUrl(kgId.value, dataSourceCount.value), +) + const workspaceOverviewInput = computed(() => ({ kgId: kgId.value, dataSourceCount: dataSourceCount.value, @@ -1550,9 +1556,17 @@ watch( </div> <Card> - <CardHeader> - <CardTitle class="text-base">Data Sources</CardTitle> - <CardDescription>Configured repositories for this knowledge graph</CardDescription> + <CardHeader class="flex flex-row items-start justify-between gap-3 space-y-0"> + <div class="space-y-1"> + <CardTitle class="text-base">Data Sources</CardTitle> + <CardDescription>Configured repositories for this knowledge graph</CardDescription> + </div> + <Button as-child variant="outline" size="sm" class="shrink-0"> + <NuxtLink :to="dataSourcesDetailUrl" class="inline-flex items-center"> + More Detail + <ArrowRight class="ml-1.5 size-3.5" /> + </NuxtLink> + </Button> </CardHeader> <CardContent> <div v-if="overviewSourceRows.length === 0" class="text-sm text-muted-foreground"> @@ -1577,40 +1591,7 @@ watch( </CardContent> </Card> - <div class="grid gap-4 md:grid-cols-2"> - <Card> - <CardHeader> - <CardTitle class="text-base">Entity Types</CardTitle> - <CardDescription>Node types in the knowledge graph ontology</CardDescription> - </CardHeader> - <CardContent> - <div v-if="entityTypeLabels.length === 0" class="text-sm text-muted-foreground"> - No entity types defined yet. - </div> - <div v-else class="flex flex-wrap gap-2"> - <Badge v-for="label in entityTypeLabels" :key="label" variant="outline"> - {{ label }} - </Badge> - </div> - </CardContent> - </Card> - <Card> - <CardHeader> - <CardTitle class="text-base">Relationship Types</CardTitle> - <CardDescription>Edge types connecting entities</CardDescription> - </CardHeader> - <CardContent> - <div v-if="relationshipTypeLabels.length === 0" class="text-sm text-muted-foreground"> - No relationship types defined yet. - </div> - <div v-else class="flex flex-wrap gap-2"> - <Badge v-for="label in relationshipTypeLabels" :key="label" variant="outline"> - {{ label }} - </Badge> - </div> - </CardContent> - </Card> - </div> + <GraphSchemaExplorer :kg-id="kgId" :reload-nonce="designArtifactsReloadNonce" /> </section> <section v-else-if="activeStep === 'mutation-logs'" class="space-y-4"> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 1eff5a1f7..62d465ae1 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -191,8 +191,9 @@ describe('KG-MANAGE-002 - workspace hub tile set', () => { expect(manageWorkspaceVue).toContain('Project workspace') expect(manageWorkspaceVue).toContain('workspaceHubTiles') expect(manageWorkspaceVue).toContain('workspaceHubTileClasses') - expect(manageWorkspaceVue).toContain('Entity Types') - expect(manageWorkspaceVue).toContain('Relationship Types') + expect(manageWorkspaceVue).toContain('GraphSchemaExplorer') + expect(manageWorkspaceVue).toContain('More Detail') + expect(manageWorkspaceVue).toContain('dataSourcesDetailUrl') expect(manageWorkspaceVue).toContain('Archived writes') expect(manageWorkspaceHubTs).toContain('Data sources') expect(manageWorkspaceHubTs).toContain('Graph Management') From 53ab5cd258f8c5e6c342ccd220e8d9f6979166cf Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 16:41:13 -0400 Subject: [PATCH 137/153] feat(dev-ui): improve schema entity/relationship browsing in graph management Load 100 instances per type instead of a global cap, merge observed properties into schema display, and add paginated instance APIs with property search plus load-more UI on entity and relationship panels. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../services/graph_query_service.py | 8 + .../graph/infrastructure/graph_repository.py | 62 +++++ .../management/design_artifacts_service.py | 186 ++++++++++++- .../application/design_artifacts.py | 250 +++++++++++++----- .../presentation/knowledge_graphs/models.py | 29 ++ .../presentation/knowledge_graphs/routes.py | 75 +++++- .../application/test_design_artifacts.py | 84 +++++- .../GraphDesignEntitiesPanel.vue | 20 +- .../GraphDesignEntityTypeList.vue | 194 +++++++++++--- .../GraphDesignRelationshipTypeList.vue | 208 ++++++++++++--- .../GraphDesignRelationshipsPanel.vue | 21 +- .../graph-management/GraphSchemaExplorer.vue | 25 +- .../useDesignArtifactInstanceQuery.ts | 249 +++++++++++++++++ .../pages/knowledge-graphs/[kgId]/manage.vue | 4 +- .../app/tests/kg-design-artifacts.test.ts | 19 ++ src/dev-ui/app/utils/kgDesignArtifacts.ts | 19 ++ 16 files changed, 1273 insertions(+), 180 deletions(-) create mode 100644 src/dev-ui/app/composables/useDesignArtifactInstanceQuery.ts diff --git a/src/api/graph/application/services/graph_query_service.py b/src/api/graph/application/services/graph_query_service.py index 1e63cfc0c..9ba64d098 100644 --- a/src/api/graph/application/services/graph_query_service.py +++ b/src/api/graph/application/services/graph_query_service.py @@ -76,6 +76,8 @@ def list_by_label( knowledge_graph_id: str | None = None, limit: int = 100, offset: int = 0, + property_name: str | None = None, + property_value: str | None = None, ) -> list[NodeRecord]: """List nodes of one entity type.""" nodes = self._repository.find_nodes_by_label( @@ -83,6 +85,8 @@ def list_by_label( knowledge_graph_id=knowledge_graph_id, limit=limit, offset=offset, + property_name=property_name, + property_value=property_value, ) self._probe.slug_searched( slug=f"type:{node_type}", @@ -96,11 +100,15 @@ def count_by_label( node_type: str, *, knowledge_graph_id: str | None = None, + property_name: str | None = None, + property_value: str | None = None, ) -> int: """Count nodes of one entity type.""" return self._repository.count_nodes_by_label( node_type, knowledge_graph_id=knowledge_graph_id, + property_name=property_name, + property_value=property_value, ) def get_neighbors( diff --git a/src/api/graph/infrastructure/graph_repository.py b/src/api/graph/infrastructure/graph_repository.py index eb8bb1bbf..b8c9f74ad 100644 --- a/src/api/graph/infrastructure/graph_repository.py +++ b/src/api/graph/infrastructure/graph_repository.py @@ -28,6 +28,22 @@ def _escape_cypher_string(value: str) -> str: return value.replace("\\", "\\\\").replace("'", "\\'") +def _property_contains_filter( + alias: str, + *, + property_name: str | None, + property_value: str | None, +) -> str: + if not property_name or property_value is None: + return "" + validate_label_name(property_name) + escaped_value = _escape_cypher_string(property_value) + return ( + f" AND toLower(toString({alias}.{property_name})) " + f"CONTAINS toLower('{escaped_value}')" + ) + + class GraphExtractionReadOnlyRepository(IGraphReadOnlyRepository): """Read-only repository for the Extraction bounded context. @@ -149,8 +165,11 @@ def find_nodes_by_label( knowledge_graph_id: str | None = None, limit: int = 100, offset: int = 0, + property_name: str | None = None, + property_value: str | None = None, ) -> list[NodeRecord]: """List nodes of one entity type, optionally scoped to a knowledge graph.""" + validate_label_name(node_type) bounded_limit = max(1, min(limit, 500)) bounded_offset = max(0, offset) kg_filter = ( @@ -158,9 +177,16 @@ def find_nodes_by_label( if knowledge_graph_id else "" ) + prop_filter = _property_contains_filter( + "n", + property_name=property_name, + property_value=property_value, + ) query = f""" MATCH (n:{node_type} {{graph_id: '{self._graph_id}'{kg_filter}}}) + WHERE true{prop_filter} RETURN {{node: n}} + ORDER BY n.slug SKIP {bounded_offset} LIMIT {bounded_limit} """ @@ -257,15 +283,24 @@ def count_nodes_by_label( node_type: str, *, knowledge_graph_id: str | None = None, + property_name: str | None = None, + property_value: str | None = None, ) -> int: """Count nodes of one entity type within an optional knowledge graph scope.""" + validate_label_name(node_type) kg_filter = ( f", knowledge_graph_id: '{knowledge_graph_id}'" if knowledge_graph_id else "" ) + prop_filter = _property_contains_filter( + "n", + property_name=property_name, + property_value=property_value, + ) query = f""" MATCH (n:{node_type} {{graph_id: '{self._graph_id}'{kg_filter}}}) + WHERE true{prop_filter} RETURN count(n) AS total """ result = self._client.execute_cypher(query) @@ -288,8 +323,15 @@ def find_relationship_instances( target_entity_type: str | None = None, limit: int = 100, offset: int = 0, + property_name: str | None = None, + property_value: str | None = None, ) -> list[tuple[EdgeRecord, NodeRecord, NodeRecord]]: """List relationship instances with resolved source and target nodes.""" + validate_label_name(relationship_label) + if source_entity_type: + validate_label_name(source_entity_type) + if target_entity_type: + validate_label_name(target_entity_type) bounded_limit = max(1, min(limit, 500)) bounded_offset = max(0, offset) source_type = f":{source_entity_type}" if source_entity_type else "" @@ -299,11 +341,18 @@ def find_relationship_instances( if knowledge_graph_id else "" ) + prop_filter = _property_contains_filter( + "edge", + property_name=property_name, + property_value=property_value, + ) query = f""" MATCH (source{source_type})-[edge:{relationship_label} {{ graph_id: '{self._graph_id}'{kg_filter} }}]->(target{target_type}) + WHERE true{prop_filter} RETURN {{edge: edge, source: source, target: target}} + ORDER BY edge.id SKIP {bounded_offset} LIMIT {bounded_limit} """ @@ -335,8 +384,15 @@ def count_relationship_instances( knowledge_graph_id: str | None = None, source_entity_type: str | None = None, target_entity_type: str | None = None, + property_name: str | None = None, + property_value: str | None = None, ) -> int: """Count relationship instances matching optional endpoint type filters.""" + validate_label_name(relationship_label) + if source_entity_type: + validate_label_name(source_entity_type) + if target_entity_type: + validate_label_name(target_entity_type) source_type = f":{source_entity_type}" if source_entity_type else "" target_type = f":{target_entity_type}" if target_entity_type else "" kg_filter = ( @@ -344,10 +400,16 @@ def count_relationship_instances( if knowledge_graph_id else "" ) + prop_filter = _property_contains_filter( + "edge", + property_name=property_name, + property_value=property_value, + ) query = f""" MATCH (source{source_type})-[edge:{relationship_label} {{ graph_id: '{self._graph_id}'{kg_filter} }}]->(target{target_type}) + WHERE true{prop_filter} RETURN count(edge) AS total """ result = self._client.execute_cypher(query) diff --git a/src/api/infrastructure/management/design_artifacts_service.py b/src/api/infrastructure/management/design_artifacts_service.py index a92950a5c..4c3799e6d 100644 --- a/src/api/infrastructure/management/design_artifacts_service.py +++ b/src/api/infrastructure/management/design_artifacts_service.py @@ -2,11 +2,26 @@ from __future__ import annotations +import asyncio +from typing import Any + from starlette.concurrency import run_in_threadpool +from graph.application.observability import DefaultGraphServiceProbe +from graph.application.services import GraphQueryService +from graph.infrastructure.age_client import AgeGraphClient +from graph.infrastructure.graph_repository import GraphExtractionReadOnlyRepository +from graph.infrastructure.tenant_graph_handler import ensure_tenant_graph_operational from graph.infrastructure.bulk_data_reader import fetch_bulk_graph_data +from infrastructure.database.connection import ConnectionFactory from infrastructure.database.connection_pool import ConnectionPool -from management.application.design_artifacts import build_design_artifacts +from infrastructure.settings import DatabaseSettings +from management.application.design_artifacts import ( + DEFAULT_INSTANCES_PER_TYPE, + build_design_artifacts, + serialize_entity_instance, + serialize_relationship_instance, +) from management.application.services.knowledge_graph_service import KnowledgeGraphService @@ -19,17 +34,19 @@ def __init__( knowledge_graph_service: KnowledgeGraphService, connection_pool: ConnectionPool, tenant_id: str, + database_settings: DatabaseSettings | None = None, ) -> None: self._knowledge_graph_service = knowledge_graph_service self._connection_pool = connection_pool self._tenant_id = tenant_id + self._database_settings = database_settings or DatabaseSettings() async def get_design_artifacts( self, *, user_id: str, kg_id: str, - limit: int = 500, + limit: int = DEFAULT_INSTANCES_PER_TYPE, ) -> dict[str, object] | None: ontology = await self._knowledge_graph_service.get_ontology( user_id=user_id, @@ -46,10 +63,173 @@ async def get_design_artifacts( self._connection_pool, graph_name, ) - bounded_limit = max(1, min(limit, 3000)) + bounded_limit = max(1, min(limit, 500)) return build_design_artifacts( knowledge_graph_id=kg_id, ontology=ontology, graph_data=graph_data, limit=bounded_limit, + instances_per_type=bounded_limit, + ) + + async def list_entity_instances( + self, + *, + user_id: str, + kg_id: str, + entity_type: str, + limit: int = DEFAULT_INSTANCES_PER_TYPE, + offset: int = 0, + property_name: str | None = None, + property_value: str | None = None, + ) -> dict[str, Any] | None: + if not await self._ensure_view_access(user_id=user_id, kg_id=kg_id): + return None + + bounded_limit = max(1, min(limit, 500)) + bounded_offset = max(0, offset) + + def _query() -> dict[str, Any]: + client = self._connect_for_tenant() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=client.graph_name, + ) + service = GraphQueryService( + repository=repository, + probe=DefaultGraphServiceProbe(), + ) + total = service.count_by_label( + entity_type, + knowledge_graph_id=kg_id, + property_name=property_name, + property_value=property_value, + ) + nodes = service.list_by_label( + entity_type, + knowledge_graph_id=kg_id, + limit=bounded_limit, + offset=bounded_offset, + property_name=property_name, + property_value=property_value, + ) + instances = [ + serialize_entity_instance( + { + "slug": node.properties.get("slug"), + "id": node.id, + **node.properties, + } + ) + for node in nodes + ] + return { + "entity_type": entity_type, + "instances": instances, + "count": len(instances), + "total": total, + "limit": bounded_limit, + "offset": bounded_offset, + "property_name": property_name, + "property_value": property_value, + } + finally: + client.disconnect() + + return await asyncio.to_thread(_query) + + async def list_relationship_instances( + self, + *, + user_id: str, + kg_id: str, + relationship_type: str, + source_entity_type: str | None = None, + target_entity_type: str | None = None, + limit: int = DEFAULT_INSTANCES_PER_TYPE, + offset: int = 0, + property_name: str | None = None, + property_value: str | None = None, + ) -> dict[str, Any] | None: + if not await self._ensure_view_access(user_id=user_id, kg_id=kg_id): + return None + + bounded_limit = max(1, min(limit, 500)) + bounded_offset = max(0, offset) + + def _query() -> dict[str, Any]: + client = self._connect_for_tenant() + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=client.graph_name, + ) + total = repository.count_relationship_instances( + relationship_type, + knowledge_graph_id=kg_id, + source_entity_type=source_entity_type, + target_entity_type=target_entity_type, + property_name=property_name, + property_value=property_value, + ) + rows = repository.find_relationship_instances( + relationship_type, + knowledge_graph_id=kg_id, + source_entity_type=source_entity_type, + target_entity_type=target_entity_type, + limit=bounded_limit, + offset=bounded_offset, + property_name=property_name, + property_value=property_value, + ) + instances = [ + serialize_relationship_instance( + edge={ + "id": edge.id, + **edge.properties, + }, + source_node={ + "slug": source.properties.get("slug"), + "id": source.id, + **source.properties, + }, + target_node={ + "slug": target.properties.get("slug"), + "id": target.id, + **target.properties, + }, + ) + for edge, source, target in rows + ] + return { + "relationship_type": relationship_type, + "source_entity_type": source_entity_type, + "target_entity_type": target_entity_type, + "instances": instances, + "count": len(instances), + "total": total, + "limit": bounded_limit, + "offset": bounded_offset, + "property_name": property_name, + "property_value": property_value, + } + finally: + client.disconnect() + + return await asyncio.to_thread(_query) + + async def _ensure_view_access(self, *, user_id: str, kg_id: str) -> bool: + kg = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + return kg is not None + + def _connect_for_tenant(self) -> AgeGraphClient: + factory = ConnectionFactory(self._database_settings, pool=self._connection_pool) + graph_name = ensure_tenant_graph_operational(factory, self._tenant_id) + client = AgeGraphClient( + self._database_settings, + connection_factory=factory, + graph_name=graph_name, ) + client.connect() + return client diff --git a/src/api/management/application/design_artifacts.py b/src/api/management/application/design_artifacts.py index e561219d8..97059c6cf 100644 --- a/src/api/management/application/design_artifacts.py +++ b/src/api/management/application/design_artifacts.py @@ -24,6 +24,25 @@ } ) +_NODE_OBSERVATION_EXCLUDED_KEYS = _SYSTEM_NODE_PROPERTIES | frozenset( + {"type", "label", "domainId"} +) + +_EDGE_OBSERVATION_EXCLUDED_KEYS = frozenset( + { + "id", + "type", + "source", + "target", + "source_path", + "knowledge_graph_id", + "graph_id", + "data_source_id", + } +) + +DEFAULT_INSTANCES_PER_TYPE = 100 + def _instance_properties(raw: dict[str, Any]) -> dict[str, Any]: return { @@ -33,6 +52,93 @@ def _instance_properties(raw: dict[str, Any]) -> dict[str, Any]: } +def _observed_entity_property_keys(nodes: list[dict[str, Any]]) -> set[str]: + observed: set[str] = set() + for node in nodes: + for key in node: + if key in _NODE_OBSERVATION_EXCLUDED_KEYS or key.startswith("_"): + continue + observed.add(str(key)) + return observed + + +def _observed_edge_property_keys(edges: list[dict[str, Any]]) -> set[str]: + observed: set[str] = set() + for edge in edges: + for key, value in edge.items(): + if key in _EDGE_OBSERVATION_EXCLUDED_KEYS or key.startswith("_"): + continue + if isinstance(value, (dict, list)): + continue + observed.add(str(key)) + return observed + + +def _merge_entity_property_definitions( + *, + required: list[str], + optional: list[str], + observed: set[str], +) -> tuple[list[str], list[str], dict[str, str]]: + required_set = set(required) + optional_set = set(optional) + for prop in observed: + if prop not in required_set: + optional_set.add(prop) + ordered = sorted(required_set | optional_set) + property_definitions = { + prop: prop.replace("_", " ").strip() or prop for prop in ordered + } + return sorted(required_set), sorted(optional_set), property_definitions + + +def _merge_relationship_parameter_definitions( + *, + declared: list[str], + observed: set[str], +) -> tuple[list[str], list[str], dict[str, str]]: + required = list(declared) + optional = sorted(prop for prop in observed if prop not in declared) + ordered = sorted(set(required) | set(optional)) + parameter_definitions = { + prop: prop.replace("_", " ").strip() or prop for prop in ordered + } + return required, optional, parameter_definitions + + +def serialize_entity_instance(node: dict[str, Any]) -> dict[str, Any]: + """Serialize one graph node for design artifact instance browsing.""" + slug = str(node.get("slug") or node.get("domainId") or node.get("id") or "") + return { + "slug": slug, + "properties": _instance_properties(node), + } + + +def serialize_relationship_instance( + *, + edge: dict[str, Any], + source_node: dict[str, Any], + target_node: dict[str, Any], +) -> dict[str, Any]: + """Serialize one relationship instance for design artifact browsing.""" + return { + "source_slug": str( + source_node.get("slug") + or source_node.get("domainId") + or source_node.get("id") + or "" + ), + "target_slug": str( + target_node.get("slug") + or target_node.get("domainId") + or target_node.get("id") + or "" + ), + "properties": _instance_properties(edge), + } + + def _reverse_relationship_label(edge_type: EdgeTypeDefinition) -> str | None: if edge_type.auto_generated or edge_type.inverse_of: return None @@ -47,8 +153,10 @@ def build_design_artifacts( ontology: OntologyConfig | None, graph_data: dict[str, Any], limit: int, + instances_per_type: int | None = None, ) -> dict[str, Any]: """Merge canonical ontology with live AGE graph instances for the Dev UI.""" + per_type_limit = max(1, min(instances_per_type if instances_per_type is not None else limit, 500)) nodes = [ node for node in graph_data.get("nodes", []) @@ -62,14 +170,8 @@ def build_design_artifacts( node_by_age_id = {str(node.get("id")): node for node in nodes if node.get("id")} - def _node_instance(node: dict[str, Any]) -> dict[str, Any]: - slug = str(node.get("slug") or node.get("domainId") or node.get("id") or "") - return { - "slug": slug, - "properties": _instance_properties(node), - } - full_instances_by_type: dict[str, list[dict[str, Any]]] = defaultdict(list) + raw_nodes_by_type: dict[str, list[dict[str, Any]]] = defaultdict(list) for node in sorted( nodes, key=lambda item: ( @@ -78,31 +180,26 @@ def _node_instance(node: dict[str, Any]) -> dict[str, Any]: ), ): entity_type = str(node.get("type") or "unknown") - full_instances_by_type[entity_type].append(_node_instance(node)) + raw_nodes_by_type[entity_type].append(node) + full_instances_by_type[entity_type].append(serialize_entity_instance(node)) instances_by_type: dict[str, list[dict[str, Any]]] = defaultdict(list) - sorted_nodes = sorted( - nodes, - key=lambda node: ( - str(node.get("type") or ""), - str(node.get("slug") or node.get("domainId") or node.get("id") or ""), - ), - ) - truncated_nodes = sorted_nodes[:limit] - - for node in truncated_nodes: - entity_type = str(node.get("type") or "unknown") - instances_by_type[entity_type].append(_node_instance(node)) + for entity_type, type_nodes in raw_nodes_by_type.items(): + instances_by_type[entity_type] = [ + serialize_entity_instance(node) for node in type_nodes[:per_type_limit] + ] entities: dict[str, dict[str, Any]] = {} if ontology is not None: for node_type in ontology.node_types: required = list(node_type.required_properties) optional = list(node_type.optional_properties) - property_definitions = { - prop: prop.replace("_", " ").strip() or prop - for prop in (*required, *optional) - } + observed = _observed_entity_property_keys(raw_nodes_by_type.get(node_type.label, [])) + required, optional, property_definitions = _merge_entity_property_definitions( + required=required, + optional=optional, + observed=observed, + ) type_instances = instances_by_type.get(node_type.label, []) total_instances = len(full_instances_by_type.get(node_type.label, [])) entities[node_type.label] = { @@ -122,12 +219,18 @@ def _node_instance(node: dict[str, Any]) -> dict[str, Any]: if entity_type in entities: continue total_instances = len(full_instances_by_type.get(entity_type, [])) + observed = _observed_entity_property_keys(raw_nodes_by_type.get(entity_type, [])) + required, optional, property_definitions = _merge_entity_property_definitions( + required=[], + optional=[], + observed=observed, + ) entities[entity_type] = { "type": entity_type, "description": "", - "required_properties": [], - "optional_properties": [], - "property_definitions": {}, + "required_properties": required, + "optional_properties": optional, + "property_definitions": property_definitions, "prepopulated_instances": False, "instance_count": total_instances, "instances_returned": len(type_instances), @@ -137,6 +240,7 @@ def _node_instance(node: dict[str, Any]) -> dict[str, Any]: relationship_instances: dict[str, list[dict[str, Any]]] = defaultdict(list) full_relationship_instances: dict[str, list[dict[str, Any]]] = defaultdict(list) + raw_edges_by_key: dict[str, list[dict[str, Any]]] = defaultdict(list) def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: source_node = node_by_age_id.get(str(edge.get("source"))) @@ -149,21 +253,12 @@ def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: composite_key = f"{source_type}|{relationship_type}|{target_type}" return { "composite_key": composite_key, - "instance": { - "source_slug": str( - source_node.get("slug") - or source_node.get("domainId") - or source_node.get("id") - or "" - ), - "target_slug": str( - target_node.get("slug") - or target_node.get("domainId") - or target_node.get("id") - or "" - ), - "properties": _instance_properties(edge), - }, + "edge": edge, + "instance": serialize_relationship_instance( + edge=edge, + source_node=source_node, + target_node=target_node, + ), } sorted_edges = sorted( @@ -179,13 +274,23 @@ def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: if parsed is None: continue full_relationship_instances[parsed["composite_key"]].append(parsed["instance"]) + raw_edges_by_key[parsed["composite_key"]].append(parsed["edge"]) - truncated_edges = sorted_edges[:limit] - for edge in truncated_edges: - parsed = _edge_instance(edge) - if parsed is None: - continue - relationship_instances[parsed["composite_key"]].append(parsed["instance"]) + for composite_key, edge_rows in raw_edges_by_key.items(): + limited_instances: list[dict[str, Any]] = [] + for edge in edge_rows[:per_type_limit]: + source_node = node_by_age_id.get(str(edge.get("source"))) + target_node = node_by_age_id.get(str(edge.get("target"))) + if source_node is None or target_node is None: + continue + limited_instances.append( + serialize_relationship_instance( + edge=edge, + source_node=source_node, + target_node=target_node, + ) + ) + relationship_instances[composite_key] = limited_instances relationships: list[dict[str, Any]] = [] if ontology is not None: @@ -205,6 +310,14 @@ def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: break reverse_label = _reverse_relationship_label(edge_type) total_instances = len(full_relationship_instances.get(composite_key, [])) + declared = list(edge_type.properties) + observed = _observed_edge_property_keys(raw_edges_by_key.get(composite_key, [])) + required_parameters, optional_parameters, parameter_definitions = ( + _merge_relationship_parameter_definitions( + declared=declared, + observed=observed, + ) + ) relationships.append( { "key": composite_key, @@ -221,12 +334,9 @@ def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: "instances_returned": len(type_instances), "instances_truncated": total_instances > len(type_instances), "instances": type_instances, - "required_parameters": list(edge_type.properties), - "optional_parameters": [], - "parameter_definitions": { - prop: prop.replace("_", " ").strip() or prop - for prop in edge_type.properties - }, + "required_parameters": required_parameters, + "optional_parameters": optional_parameters, + "parameter_definitions": parameter_definitions, } ) @@ -245,6 +355,10 @@ def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: ): continue total_instances = len(full_relationship_instances.get(composite_key, [])) + observed = _observed_edge_property_keys(raw_edges_by_key.get(composite_key, [])) + required_parameters, optional_parameters, parameter_definitions = ( + _merge_relationship_parameter_definitions(declared=[], observed=observed) + ) relationships.append( { "key": composite_key, @@ -259,12 +373,23 @@ def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: "instances_returned": len(type_instances), "instances_truncated": total_instances > len(type_instances), "instances": type_instances, - "required_parameters": [], - "optional_parameters": [], - "parameter_definitions": {}, + "required_parameters": required_parameters, + "optional_parameters": optional_parameters, + "parameter_definitions": parameter_definitions, } ) + entity_instances_returned = sum(len(rows) for rows in instances_by_type.values()) + relationship_instances_returned = sum(len(rows) for rows in relationship_instances.values()) + entity_instances_truncated = any( + len(full_instances_by_type.get(entity_type, [])) > len(type_instances) + for entity_type, type_instances in instances_by_type.items() + ) + relationship_instances_truncated = any( + len(full_relationship_instances.get(key, [])) > len(type_instances) + for key, type_instances in relationship_instances.items() + ) + return { "found": ontology is not None or bool(entities) or bool(relationships), "knowledge_graph_id": knowledge_graph_id, @@ -277,10 +402,11 @@ def _edge_instance(edge: dict[str, Any]) -> dict[str, Any] | None: "relationship_instances": len(edges), }, "limits": { - "requested": limit, - "entity_instances_returned": len(truncated_nodes), - "relationship_instances_returned": len(truncated_edges), - "entity_instances_truncated": len(nodes) > len(truncated_nodes), - "relationship_instances_truncated": len(edges) > len(truncated_edges), + "requested": per_type_limit, + "instances_per_type": per_type_limit, + "entity_instances_returned": entity_instances_returned, + "relationship_instances_returned": relationship_instances_returned, + "entity_instances_truncated": entity_instances_truncated, + "relationship_instances_truncated": relationship_instances_truncated, }, } diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index 25c373795..1843c5f05 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -500,12 +500,41 @@ class DesignArtifactsLimitsModel(BaseModel): """Truncation metadata for instance payloads.""" requested: int + instances_per_type: int = 0 entity_instances_returned: int relationship_instances_returned: int entity_instances_truncated: bool relationship_instances_truncated: bool +class DesignArtifactInstanceListResponse(BaseModel): + """Paginated entity instances for one type.""" + + entity_type: str + instances: list[DesignArtifactInstanceModel] = Field(default_factory=list) + count: int = 0 + total: int = 0 + limit: int = 100 + offset: int = 0 + property_name: str | None = None + property_value: str | None = None + + +class DesignArtifactRelationshipInstanceListResponse(BaseModel): + """Paginated relationship instances for one type triple.""" + + relationship_type: str + source_entity_type: str | None = None + target_entity_type: str | None = None + instances: list[DesignArtifactInstanceModel] = Field(default_factory=list) + count: int = 0 + total: int = 0 + limit: int = 100 + offset: int = 0 + property_name: str | None = None + property_value: str | None = None + + class DesignArtifactsResponse(BaseModel): """Canonical schema plus live graph instances for Graph Management UI.""" diff --git a/src/api/management/presentation/knowledge_graphs/routes.py b/src/api/management/presentation/knowledge_graphs/routes.py index cf4ab596e..4d33a0b4b 100644 --- a/src/api/management/presentation/knowledge_graphs/routes.py +++ b/src/api/management/presentation/knowledge_graphs/routes.py @@ -30,8 +30,11 @@ OntologyConfigRequest, OntologyConfigResponse, DesignArtifactsResponse, + DesignArtifactInstanceListResponse, + DesignArtifactRelationshipInstanceListResponse, UpdateKnowledgeGraphRequest, ) +from management.application.design_artifacts import DEFAULT_INSTANCES_PER_TYPE from infrastructure.management.design_artifacts_service import DesignArtifactsService from management.dependencies.design_artifacts import get_design_artifacts_service from shared_kernel.authorization.types import Permission @@ -612,7 +615,7 @@ async def get_knowledge_graph_design_artifacts( kg_id: str, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[DesignArtifactsService, Depends(get_design_artifacts_service)], - limit: Annotated[int, Query(ge=1, le=3000)] = 500, + limit: Annotated[int, Query(ge=1, le=500)] = DEFAULT_INSTANCES_PER_TYPE, ) -> DesignArtifactsResponse: """Get merged ontology and graph instance artifacts for one knowledge graph.""" payload = await service.get_design_artifacts( @@ -628,6 +631,76 @@ async def get_knowledge_graph_design_artifacts( return DesignArtifactsResponse.model_validate(payload) +@router.get( + "/knowledge-graphs/{kg_id}/design-artifacts/entity-instances", + response_model=DesignArtifactInstanceListResponse, + summary="List entity instances for one type", +) +async def list_design_artifact_entity_instances( + kg_id: str, + entity_type: Annotated[str, Query(min_length=1)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[DesignArtifactsService, Depends(get_design_artifacts_service)], + limit: Annotated[int, Query(ge=1, le=500)] = DEFAULT_INSTANCES_PER_TYPE, + offset: Annotated[int, Query(ge=0)] = 0, + property_name: Annotated[str | None, Query(min_length=1)] = None, + property_value: Annotated[str | None, Query()] = None, +) -> DesignArtifactInstanceListResponse: + """Paginated entity instance browsing with optional property search.""" + payload = await service.list_entity_instances( + user_id=current_user.user_id.value, + kg_id=kg_id, + entity_type=entity_type, + limit=limit, + offset=offset, + property_name=property_name, + property_value=property_value, + ) + if payload is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Knowledge graph {kg_id} not found or not accessible", + ) + return DesignArtifactInstanceListResponse.model_validate(payload) + + +@router.get( + "/knowledge-graphs/{kg_id}/design-artifacts/relationship-instances", + response_model=DesignArtifactRelationshipInstanceListResponse, + summary="List relationship instances for one type", +) +async def list_design_artifact_relationship_instances( + kg_id: str, + relationship_type: Annotated[str, Query(min_length=1)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[DesignArtifactsService, Depends(get_design_artifacts_service)], + source_entity_type: Annotated[str | None, Query(min_length=1)] = None, + target_entity_type: Annotated[str | None, Query(min_length=1)] = None, + limit: Annotated[int, Query(ge=1, le=500)] = DEFAULT_INSTANCES_PER_TYPE, + offset: Annotated[int, Query(ge=0)] = 0, + property_name: Annotated[str | None, Query(min_length=1)] = None, + property_value: Annotated[str | None, Query()] = None, +) -> DesignArtifactRelationshipInstanceListResponse: + """Paginated relationship instance browsing with optional property search.""" + payload = await service.list_relationship_instances( + user_id=current_user.user_id.value, + kg_id=kg_id, + relationship_type=relationship_type, + source_entity_type=source_entity_type, + target_entity_type=target_entity_type, + limit=limit, + offset=offset, + property_name=property_name, + property_value=property_value, + ) + if payload is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Knowledge graph {kg_id} not found or not accessible", + ) + return DesignArtifactRelationshipInstanceListResponse.model_validate(payload) + + @router.get( "/knowledge-graphs/{kg_id}/ontology", response_model=OntologyConfigResponse, diff --git a/src/api/tests/unit/management/application/test_design_artifacts.py b/src/api/tests/unit/management/application/test_design_artifacts.py index 86da3482a..132304c5a 100644 --- a/src/api/tests/unit/management/application/test_design_artifacts.py +++ b/src/api/tests/unit/management/application/test_design_artifacts.py @@ -85,15 +85,93 @@ def test_build_design_artifacts_reports_true_instance_count_when_payload_truncat ), ), graph_data=graph_data, - limit=500, + limit=100, ) service = payload["entities"]["service"] assert service["instance_count"] == 600 - assert service["instances_returned"] == 500 + assert service["instances_returned"] == 100 assert service["instances_truncated"] is True - assert len(service["instances"]) == 500 + assert len(service["instances"]) == 100 assert payload["limits"]["entity_instances_truncated"] is True + assert payload["limits"]["instances_per_type"] == 100 + + +def test_build_design_artifacts_per_type_limit_applies_independently() -> None: + graph_data = { + "nodes": [ + *[ + { + "id": f"adapter-{index}", + "type": "Adapter", + "slug": f"adapter-{index:04d}", + "knowledge_graph_id": "kg-1", + } + for index in range(150) + ], + *[ + { + "id": f"endpoint-{index}", + "type": "APIEndpoint", + "slug": f"endpoint-{index:04d}", + "knowledge_graph_id": "kg-1", + } + for index in range(150) + ], + ], + "edges": [], + } + + payload = build_design_artifacts( + knowledge_graph_id="kg-1", + ontology=OntologyConfig( + node_types=( + NodeTypeDefinition(label="Adapter", description="Adapter"), + NodeTypeDefinition(label="APIEndpoint", description="Endpoint"), + ), + ), + graph_data=graph_data, + limit=100, + ) + + assert len(payload["entities"]["Adapter"]["instances"]) == 100 + assert len(payload["entities"]["APIEndpoint"]["instances"]) == 100 + assert payload["limits"]["entity_instances_returned"] == 200 + + +def test_build_design_artifacts_merges_observed_entity_properties() -> None: + payload = build_design_artifacts( + knowledge_graph_id="kg-1", + ontology=OntologyConfig( + node_types=( + NodeTypeDefinition( + label="Adapter", + description="Adapter", + required_properties=("slug", "data_source_id"), + ), + ), + ), + graph_data={ + "nodes": [ + { + "id": "age-1", + "type": "Adapter", + "slug": "my-adapter", + "knowledge_graph_id": "kg-1", + "data_source_id": "bootstrap", + "transport": "maestro", + "resource_types": "Cluster", + } + ], + "edges": [], + }, + limit=100, + ) + + adapter = payload["entities"]["Adapter"] + assert "transport" in adapter["property_definitions"] + assert "resource_types" in adapter["property_definitions"] + assert "transport" in adapter["optional_properties"] def test_build_design_artifacts_filters_other_knowledge_graphs() -> None: diff --git a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue index 2292279f8..c150dc772 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignEntitiesPanel.vue @@ -9,6 +9,7 @@ import GraphDesignEntityTypeList from '@/components/graph-management/GraphDesign import { type DesignArtifactEntityType, type DesignArtifactsResponse, + DEFAULT_DESIGN_ARTIFACTS_INSTANCES_PER_TYPE, } from '@/utils/kgDesignArtifacts' const props = withDefaults( @@ -24,7 +25,6 @@ const { apiFetch } = useApiClient() const loading = ref(true) const data = ref<DesignArtifactsResponse | null>(null) -const instancePage = ref<Record<string, number>>({}) async function fetchEntities(options: { preserveUiState?: boolean } = {}) { if (!props.kgId) { @@ -35,12 +35,11 @@ async function fetchEntities(options: { preserveUiState?: boolean } = {}) { const preserveUiState = options.preserveUiState === true && data.value !== null if (!preserveUiState) { loading.value = true - instancePage.value = {} } try { data.value = await apiFetch<DesignArtifactsResponse>( `/management/knowledge-graphs/${props.kgId}/design-artifacts`, - { query: { limit: 500 } }, + { query: { limit: DEFAULT_DESIGN_ARTIFACTS_INSTANCES_PER_TYPE } }, ) } catch (err: unknown) { toast.error('Failed to load entity design artifacts', { @@ -60,10 +59,6 @@ const entityRows = computed((): DesignArtifactEntityType[] => { })) }) -function setInstancePage(typeKey: string, page: number) { - instancePage.value = { ...instancePage.value, [typeKey]: page } -} - watch( () => [props.kgId, props.reloadNonce] as const, ([, reloadNonce]) => { @@ -138,19 +133,18 @@ defineExpose({ refresh: fetchEntities }) </div> <GraphDesignEntityTypeList + :kg-id="kgId" :rows="entityRows" - :instance-page="instancePage" - @update:instance-page="setInstancePage" + :reload-nonce="reloadNonce" /> <p v-if="data.limits.entity_instances_truncated" class="text-xs text-muted-foreground" > - Instance counts reflect the full graph. The browsable instance list is capped at - {{ data.limits.entity_instances_returned }} of {{ data.counts.entity_instances }} - total instances across all types (API limit {{ data.limits.requested }}). Per-type badges - still show true totals. + Each entity type loads the first + {{ data.limits.instances_per_type ?? data.limits.requested }} instances by default. + Expand a type to search or load the next batch. </p> </template> </template> diff --git a/src/dev-ui/app/components/graph-management/GraphDesignEntityTypeList.vue b/src/dev-ui/app/components/graph-management/GraphDesignEntityTypeList.vue index a2723a6e7..1407aedd8 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignEntityTypeList.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignEntityTypeList.vue @@ -1,30 +1,107 @@ <script setup lang="ts"> -import { ChevronDown } from 'lucide-vue-next' +import { computed, ref, toRef, watch } from 'vue' +import { ChevronDown, Loader2, Search, X } from 'lucide-vue-next' import { Card } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' import { Button } from '@/components/ui/button' +import { Input } from '@/components/ui/input' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import { useDesignArtifactInstanceQuery } from '@/composables/useDesignArtifactInstanceQuery' import { type DesignArtifactEntityType, - pageSlice, + DESIGN_ARTIFACT_INSTANCE_PAGE_SIZE, prepopulationBadgeClass, prepopulationCardClass, prepopulationLabel, + searchableEntityProperties, } from '@/utils/kgDesignArtifacts' -defineProps<{ +const props = defineProps<{ + kgId: string rows: DesignArtifactEntityType[] - instancePage: Record<string, number> + reloadNonce?: number }>() -const emit = defineEmits<{ - 'update:instancePage': [key: string, page: number] -}>() +const kgId = toRef(props, 'kgId') +const reloadNonce = computed(() => props.reloadNonce ?? 0) + +const { + entityStates, + ensureEntityState, + searchEntityInstances, + clearEntitySearch, + loadMoreEntityInstances, +} = useDesignArtifactInstanceQuery(kgId, reloadNonce) + +const draftSearchProperty = ref<Record<string, string>>({}) +const draftSearchValue = ref<Record<string, string>>({}) + +watch( + () => props.rows, + (rows) => { + for (const row of rows) { + const state = ensureEntityState(row.type, { + instances: row.instances ?? [], + total: row.instance_count, + }) + if (!state.activeSearch) { + state.instances = [...(row.instances ?? [])] + state.total = row.instance_count + } + if (!draftSearchProperty.value[row.type]) { + draftSearchProperty.value[row.type] = searchableEntityProperties(row)[0] ?? 'slug' + } + } + }, + { immediate: true, deep: true }, +) + +function propertyOptions(row: DesignArtifactEntityType): string[] { + return searchableEntityProperties(row) +} + +function canLoadMore(row: DesignArtifactEntityType): boolean { + const state = ensureEntityState(row.type) + return state.instances.length < state.total +} + +async function runSearch(row: DesignArtifactEntityType) { + const propertyName = draftSearchProperty.value[row.type] + const propertyValue = draftSearchValue.value[row.type] ?? '' + if (!propertyName || !propertyValue.trim()) return + await searchEntityInstances(row.type, { + entityType: row.type, + propertyName, + propertyValue: propertyValue.trim(), + }) +} + +async function resetSearch(row: DesignArtifactEntityType) { + draftSearchValue.value[row.type] = '' + await clearEntitySearch(row.type, { + entityType: row.type, + seedInstances: row.instances ?? [], + total: row.instance_count, + }) +} + +async function loadMore(row: DesignArtifactEntityType) { + await loadMoreEntityInstances(row.type, { entityType: row.type }) +} + +const visibleRows = computed(() => props.rows) </script> <template> <div class="space-y-1.5"> <Card - v-for="row in rows" + v-for="row in visibleRows" :key="row.type" :class="['overflow-hidden', prepopulationCardClass(row.prepopulated_instances)]" > @@ -89,7 +166,7 @@ const emit = defineEmits<{ <p v-else class="text-xs text-muted-foreground">No property definitions</p> </div> - <details v-if="(row.instances?.length ?? 0) > 0" class="group/inst rounded-md border"> + <details v-if="row.instance_count > 0" class="group/inst rounded-md border"> <summary class="flex cursor-pointer list-none items-center gap-1.5 px-2 py-1.5 text-xs font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" > @@ -97,15 +174,70 @@ const emit = defineEmits<{ class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" /> Instances - <span v-if="row.instances_truncated" class="font-normal text-muted-foreground"> - (showing {{ row.instances_returned ?? row.instances?.length ?? 0 }} of - {{ row.instance_count }}) + <span class="font-normal text-muted-foreground"> + (showing {{ ensureEntityState(row.type).instances.length }} of + {{ ensureEntityState(row.type).total }}) </span> </summary> <div class="space-y-2 border-t p-2"> + <div class="flex flex-wrap items-end gap-2" @click.stop> + <div class="min-w-[8rem] flex-1 space-y-1"> + <label class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> + Property + </label> + <Select v-model="draftSearchProperty[row.type]"> + <SelectTrigger class="h-8 text-xs"> + <SelectValue placeholder="Select property" /> + </SelectTrigger> + <SelectContent> + <SelectItem + v-for="prop in propertyOptions(row)" + :key="prop" + :value="prop" + class="text-xs" + > + {{ prop }} + </SelectItem> + </SelectContent> + </Select> + </div> + <div class="min-w-[10rem] flex-[2] space-y-1"> + <label class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> + Search value + </label> + <Input + v-model="draftSearchValue[row.type]" + class="h-8 text-xs" + placeholder="Contains…" + @keydown.enter.prevent="runSearch(row)" + /> + </div> + <Button + variant="outline" + size="sm" + class="h-8 px-2 text-xs" + :disabled="entityStates[row.type]?.loading" + @click.stop.prevent="runSearch(row)" + > + <Loader2 v-if="entityStates[row.type]?.loading" class="mr-1 size-3.5 animate-spin" /> + <Search v-else class="mr-1 size-3.5" /> + Search + </Button> + <Button + v-if="entityStates[row.type]?.activeSearch" + variant="ghost" + size="sm" + class="h-8 px-2 text-xs" + @click.stop.prevent="resetSearch(row)" + > + <X class="mr-1 size-3.5" /> + Clear + </Button> + </div> + <ul class="space-y-1 text-xs"> <li - v-for="(inst, idx) in pageSlice(instancePage, row.type, row.instances || []).items" + v-for="(inst, idx) in ensureEntityState(row.type).instances" :key="inst.slug ?? idx" class="rounded-md bg-muted/40 px-2 py-1" > @@ -117,36 +249,30 @@ const emit = defineEmits<{ }}</pre> </li> </ul> - <div - v-if="pageSlice(instancePage, row.type, row.instances || []).total > 20" - class="flex flex-wrap items-center gap-1.5" - @click.stop + + <p + v-if="ensureEntityState(row.type).instances.length === 0" + class="text-xs text-muted-foreground" > + No instances match the current filter. + </p> + + <div v-if="canLoadMore(row)" class="flex flex-wrap items-center gap-1.5" @click.stop> <Button variant="outline" size="sm" class="h-7 px-2 text-xs" - :disabled="pageSlice(instancePage, row.type, row.instances || []).page <= 0" - @click.stop.prevent="emit('update:instancePage', row.type, pageSlice(instancePage, row.type, row.instances || []).page - 1)" + :disabled="entityStates[row.type]?.loading" + @click.stop.prevent="loadMore(row)" > - Previous + <Loader2 v-if="entityStates[row.type]?.loading" class="mr-1 size-3.5 animate-spin" /> + Load next {{ DESIGN_ARTIFACT_INSTANCE_PAGE_SIZE }} </Button> <span class="text-xs text-muted-foreground"> - Page {{ pageSlice(instancePage, row.type, row.instances || []).page + 1 }} / - {{ pageSlice(instancePage, row.type, row.instances || []).totalPages }} + {{ ensureEntityState(row.type).instances.length }} loaded · + {{ ensureEntityState(row.type).total - ensureEntityState(row.type).instances.length }} + remaining </span> - <Button - variant="outline" - size="sm" - class="h-7 px-2 text-xs" - :disabled=" - pageSlice(instancePage, row.type, row.instances || []).page - >= pageSlice(instancePage, row.type, row.instances || []).totalPages - 1 - " - @click.stop.prevent="emit('update:instancePage', row.type, pageSlice(instancePage, row.type, row.instances || []).page + 1)" - > - Next - </Button> </div> </div> </details> diff --git a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipTypeList.vue b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipTypeList.vue index bbc557fe2..e459fb453 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipTypeList.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipTypeList.vue @@ -1,24 +1,103 @@ <script setup lang="ts"> -import { ChevronDown } from 'lucide-vue-next' +import { computed, ref, toRef, watch } from 'vue' +import { ChevronDown, Loader2, Search, X } from 'lucide-vue-next' import { Card } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' import { Button } from '@/components/ui/button' +import { Input } from '@/components/ui/input' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import { useDesignArtifactInstanceQuery } from '@/composables/useDesignArtifactInstanceQuery' import { type DesignArtifactRelationshipType, - pageSlice, + DESIGN_ARTIFACT_INSTANCE_PAGE_SIZE, prepopulationBadgeClass, prepopulationCardClass, prepopulationLabel, + searchableRelationshipProperties, } from '@/utils/kgDesignArtifacts' -defineProps<{ +const props = defineProps<{ + kgId: string rows: DesignArtifactRelationshipType[] - instancePage: Record<string, number> + reloadNonce?: number }>() -const emit = defineEmits<{ - 'update:instancePage': [key: string, page: number] -}>() +const kgId = toRef(props, 'kgId') +const reloadNonce = computed(() => props.reloadNonce ?? 0) + +const { + relationshipStates, + ensureRelationshipState, + searchRelationshipInstances, + clearRelationshipSearch, + loadMoreRelationshipInstances, +} = useDesignArtifactInstanceQuery(kgId, reloadNonce) + +const draftSearchProperty = ref<Record<string, string>>({}) +const draftSearchValue = ref<Record<string, string>>({}) + +watch( + () => props.rows, + (rows) => { + for (const row of rows) { + const state = ensureRelationshipState(row.key, { + instances: row.instances ?? [], + total: row.instance_count, + }) + if (!state.activeSearch) { + state.instances = [...(row.instances ?? [])] + state.total = row.instance_count + } + if (!draftSearchProperty.value[row.key]) { + draftSearchProperty.value[row.key] = searchableRelationshipProperties(row)[0] ?? 'data_source_id' + } + } + }, + { immediate: true, deep: true }, +) + +function canLoadMore(row: DesignArtifactRelationshipType): boolean { + const state = ensureRelationshipState(row.key) + return state.instances.length < state.total +} + +async function runSearch(row: DesignArtifactRelationshipType) { + const propertyName = draftSearchProperty.value[row.key] + const propertyValue = draftSearchValue.value[row.key] ?? '' + if (!propertyName || !propertyValue.trim()) return + await searchRelationshipInstances(row.key, { + relationshipType: row.relationship_type, + sourceEntityType: row.source_entity_type, + targetEntityType: row.target_entity_type, + propertyName, + propertyValue: propertyValue.trim(), + }) +} + +async function resetSearch(row: DesignArtifactRelationshipType) { + draftSearchValue.value[row.key] = '' + await clearRelationshipSearch(row.key, { + relationshipType: row.relationship_type, + sourceEntityType: row.source_entity_type, + targetEntityType: row.target_entity_type, + seedInstances: row.instances ?? [], + total: row.instance_count, + }) +} + +async function loadMore(row: DesignArtifactRelationshipType) { + await loadMoreRelationshipInstances(row.key, { + relationshipType: row.relationship_type, + sourceEntityType: row.source_entity_type, + targetEntityType: row.target_entity_type, + }) +} </script> <template> @@ -88,12 +167,26 @@ const emit = defineEmits<{ > <code class="rounded bg-muted px-1 py-0.5 font-mono text-[10px]">{{ key }}</code> <span class="text-muted-foreground">{{ label }}</span> + <Badge + v-if="rel.required_parameters?.includes(String(key))" + variant="outline" + class="h-4 px-1 text-[9px]" + > + required + </Badge> + <Badge + v-else-if="rel.optional_parameters?.includes(String(key))" + variant="outline" + class="h-4 px-1 text-[9px] opacity-70" + > + optional + </Badge> </div> </div> <p v-else class="text-xs text-muted-foreground">No parameter definitions</p> </div> - <details v-if="rel.instances.length > 0" class="group/inst rounded-md border"> + <details v-if="rel.instance_count > 0" class="group/inst rounded-md border"> <summary class="flex cursor-pointer list-none items-center gap-1.5 px-2 py-1.5 text-xs font-medium hover:bg-muted/50 [&::-webkit-details-marker]:hidden" > @@ -101,15 +194,70 @@ const emit = defineEmits<{ class="size-3.5 shrink-0 text-muted-foreground transition-transform group-open/inst:rotate-180" /> Instances - <span v-if="rel.instances_truncated" class="font-normal text-muted-foreground"> - (showing {{ rel.instances_returned ?? rel.instances.length }} of - {{ rel.instance_count }}) + <span class="font-normal text-muted-foreground"> + (showing {{ ensureRelationshipState(rel.key).instances.length }} of + {{ ensureRelationshipState(rel.key).total }}) </span> </summary> <div class="space-y-2 border-t p-2"> + <div class="flex flex-wrap items-end gap-2" @click.stop> + <div class="min-w-[8rem] flex-1 space-y-1"> + <label class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> + Property + </label> + <Select v-model="draftSearchProperty[rel.key]"> + <SelectTrigger class="h-8 text-xs"> + <SelectValue placeholder="Select property" /> + </SelectTrigger> + <SelectContent> + <SelectItem + v-for="prop in searchableRelationshipProperties(rel)" + :key="prop" + :value="prop" + class="text-xs" + > + {{ prop }} + </SelectItem> + </SelectContent> + </Select> + </div> + <div class="min-w-[10rem] flex-[2] space-y-1"> + <label class="text-[10px] font-medium uppercase tracking-wide text-muted-foreground"> + Search value + </label> + <Input + v-model="draftSearchValue[rel.key]" + class="h-8 text-xs" + placeholder="Contains…" + @keydown.enter.prevent="runSearch(rel)" + /> + </div> + <Button + variant="outline" + size="sm" + class="h-8 px-2 text-xs" + :disabled="relationshipStates[rel.key]?.loading" + @click.stop.prevent="runSearch(rel)" + > + <Loader2 v-if="relationshipStates[rel.key]?.loading" class="mr-1 size-3.5 animate-spin" /> + <Search v-else class="mr-1 size-3.5" /> + Search + </Button> + <Button + v-if="relationshipStates[rel.key]?.activeSearch" + variant="ghost" + size="sm" + class="h-8 px-2 text-xs" + @click.stop.prevent="resetSearch(rel)" + > + <X class="mr-1 size-3.5" /> + Clear + </Button> + </div> + <ul class="space-y-1 text-xs"> <li - v-for="(inst, idx) in pageSlice(instancePage, rel.key, rel.instances).items" + v-for="(inst, idx) in ensureRelationshipState(rel.key).instances" :key="`${rel.key}-${idx}`" class="rounded-md bg-muted/40 px-2 py-1" > @@ -121,36 +269,30 @@ const emit = defineEmits<{ }}</pre> </li> </ul> - <div - v-if="pageSlice(instancePage, rel.key, rel.instances).total > 20" - class="flex flex-wrap items-center gap-1.5" - @click.stop + + <p + v-if="ensureRelationshipState(rel.key).instances.length === 0" + class="text-xs text-muted-foreground" > + No instances match the current filter. + </p> + + <div v-if="canLoadMore(rel)" class="flex flex-wrap items-center gap-1.5" @click.stop> <Button variant="outline" size="sm" class="h-7 px-2 text-xs" - :disabled="pageSlice(instancePage, rel.key, rel.instances).page <= 0" - @click.stop.prevent="emit('update:instancePage', rel.key, pageSlice(instancePage, rel.key, rel.instances).page - 1)" + :disabled="relationshipStates[rel.key]?.loading" + @click.stop.prevent="loadMore(rel)" > - Previous + <Loader2 v-if="relationshipStates[rel.key]?.loading" class="mr-1 size-3.5 animate-spin" /> + Load next {{ DESIGN_ARTIFACT_INSTANCE_PAGE_SIZE }} </Button> <span class="text-xs text-muted-foreground"> - Page {{ pageSlice(instancePage, rel.key, rel.instances).page + 1 }} / - {{ pageSlice(instancePage, rel.key, rel.instances).totalPages }} + {{ ensureRelationshipState(rel.key).instances.length }} loaded · + {{ ensureRelationshipState(rel.key).total - ensureRelationshipState(rel.key).instances.length }} + remaining </span> - <Button - variant="outline" - size="sm" - class="h-7 px-2 text-xs" - :disabled=" - pageSlice(instancePage, rel.key, rel.instances).page - >= pageSlice(instancePage, rel.key, rel.instances).totalPages - 1 - " - @click.stop.prevent="emit('update:instancePage', rel.key, pageSlice(instancePage, rel.key, rel.instances).page + 1)" - > - Next - </Button> </div> </div> </details> diff --git a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue index 67bb8e39d..15da455b7 100644 --- a/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphDesignRelationshipsPanel.vue @@ -6,7 +6,7 @@ import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' import { Button } from '@/components/ui/button' import GraphDesignRelationshipTypeList from '@/components/graph-management/GraphDesignRelationshipTypeList.vue' -import { type DesignArtifactsResponse } from '@/utils/kgDesignArtifacts' +import { type DesignArtifactsResponse, DEFAULT_DESIGN_ARTIFACTS_INSTANCES_PER_TYPE } from '@/utils/kgDesignArtifacts' const props = withDefaults( defineProps<{ @@ -21,7 +21,6 @@ const { apiFetch } = useApiClient() const loading = ref(true) const data = ref<DesignArtifactsResponse | null>(null) -const instancePage = ref<Record<string, number>>({}) async function fetchRelationships(options: { preserveUiState?: boolean } = {}) { if (!props.kgId) { @@ -32,12 +31,11 @@ async function fetchRelationships(options: { preserveUiState?: boolean } = {}) { const preserveUiState = options.preserveUiState === true && data.value !== null if (!preserveUiState) { loading.value = true - instancePage.value = {} } try { data.value = await apiFetch<DesignArtifactsResponse>( `/management/knowledge-graphs/${props.kgId}/design-artifacts`, - { query: { limit: 500 } }, + { query: { limit: DEFAULT_DESIGN_ARTIFACTS_INSTANCES_PER_TYPE } }, ) } catch (err: unknown) { toast.error('Failed to load relationship design artifacts', { @@ -51,10 +49,6 @@ async function fetchRelationships(options: { preserveUiState?: boolean } = {}) { const relationshipRows = computed(() => data.value?.relationships ?? []) -function setInstancePage(key: string, page: number) { - instancePage.value = { ...instancePage.value, [key]: page } -} - watch( () => [props.kgId, props.reloadNonce] as const, ([, reloadNonce]) => { @@ -125,19 +119,18 @@ defineExpose({ refresh: fetchRelationships }) </div> <GraphDesignRelationshipTypeList + :kg-id="kgId" :rows="relationshipRows" - :instance-page="instancePage" - @update:instance-page="setInstancePage" + :reload-nonce="reloadNonce" /> <p v-if="data.limits.relationship_instances_truncated" class="text-xs text-muted-foreground" > - Relationship counts reflect the full graph. The browsable instance list is capped at - {{ data.limits.relationship_instances_returned }} of - {{ data.counts.relationship_instances }} total instances (API limit - {{ data.limits.requested }}). + Each relationship type loads the first + {{ data.limits.instances_per_type ?? data.limits.requested }} instances by default. + Expand a type to search or load the next batch. </p> </template> </template> diff --git a/src/dev-ui/app/components/graph-management/GraphSchemaExplorer.vue b/src/dev-ui/app/components/graph-management/GraphSchemaExplorer.vue index 201a71575..f3bcbd16f 100644 --- a/src/dev-ui/app/components/graph-management/GraphSchemaExplorer.vue +++ b/src/dev-ui/app/components/graph-management/GraphSchemaExplorer.vue @@ -11,6 +11,7 @@ import GraphDesignRelationshipTypeList from '@/components/graph-management/Graph import { type DesignArtifactEntityType, type DesignArtifactsResponse, + DEFAULT_DESIGN_ARTIFACTS_INSTANCES_PER_TYPE, } from '@/utils/kgDesignArtifacts' const props = withDefaults( @@ -26,8 +27,6 @@ const { apiFetch } = useApiClient() const loading = ref(true) const data = ref<DesignArtifactsResponse | null>(null) const activeTab = ref<'entities' | 'relationships'>('entities') -const entityInstancePage = ref<Record<string, number>>({}) -const relationshipInstancePage = ref<Record<string, number>>({}) async function fetchArtifacts(options: { preserveUiState?: boolean } = {}) { if (!props.kgId) { @@ -38,13 +37,11 @@ async function fetchArtifacts(options: { preserveUiState?: boolean } = {}) { const preserveUiState = options.preserveUiState === true && data.value !== null if (!preserveUiState) { loading.value = true - entityInstancePage.value = {} - relationshipInstancePage.value = {} } try { data.value = await apiFetch<DesignArtifactsResponse>( `/management/knowledge-graphs/${props.kgId}/design-artifacts`, - { query: { limit: 500 } }, + { query: { limit: DEFAULT_DESIGN_ARTIFACTS_INSTANCES_PER_TYPE } }, ) } catch (err: unknown) { toast.error('Failed to load graph schema', { @@ -179,33 +176,31 @@ defineExpose({ refresh: fetchArtifacts }) <TabsContent value="entities" class="mt-0 space-y-3 px-4 py-4"> <GraphDesignEntityTypeList + :kg-id="kgId" :rows="entityRows" - :instance-page="entityInstancePage" - @update:instance-page="(key, page) => { entityInstancePage = { ...entityInstancePage, [key]: page } }" + :reload-nonce="reloadNonce" /> <p v-if="data.limits.entity_instances_truncated" class="text-xs text-muted-foreground" > - Browsable entity instances capped at {{ data.limits.entity_instances_returned }} of - {{ data.counts.entity_instances }} total (API limit {{ data.limits.requested }}). Type - badges show full counts. + Each entity type loads the first + {{ data.limits.instances_per_type ?? data.limits.requested }} instances by default. </p> </TabsContent> <TabsContent value="relationships" class="mt-0 space-y-3 px-4 py-4"> <GraphDesignRelationshipTypeList + :kg-id="kgId" :rows="relationshipRows" - :instance-page="relationshipInstancePage" - @update:instance-page="(key, page) => { relationshipInstancePage = { ...relationshipInstancePage, [key]: page } }" + :reload-nonce="reloadNonce" /> <p v-if="data.limits.relationship_instances_truncated" class="text-xs text-muted-foreground" > - Browsable relationship instances capped at - {{ data.limits.relationship_instances_returned }} of - {{ data.counts.relationship_instances }} total (API limit {{ data.limits.requested }}). + Each relationship type loads the first + {{ data.limits.instances_per_type ?? data.limits.requested }} instances by default. </p> </TabsContent> </Tabs> diff --git a/src/dev-ui/app/composables/useDesignArtifactInstanceQuery.ts b/src/dev-ui/app/composables/useDesignArtifactInstanceQuery.ts new file mode 100644 index 000000000..efb3f36f6 --- /dev/null +++ b/src/dev-ui/app/composables/useDesignArtifactInstanceQuery.ts @@ -0,0 +1,249 @@ +import { reactive, watch, type Ref } from 'vue' +import { + type DesignArtifactInstance, + DESIGN_ARTIFACT_INSTANCE_PAGE_SIZE, +} from '@/utils/kgDesignArtifacts' + +export interface DesignArtifactInstanceQueryState { + instances: DesignArtifactInstance[] + total: number + loading: boolean + searchProperty: string + searchValue: string + activeSearch: boolean +} + +function emptyState(): DesignArtifactInstanceQueryState { + return { + instances: [], + total: 0, + loading: false, + searchProperty: '', + searchValue: '', + activeSearch: false, + } +} + +export function useDesignArtifactInstanceQuery(kgId: Ref<string>, resetNonce: Ref<number>) { + const { apiFetch } = useApiClient() + const entityStates = reactive<Record<string, DesignArtifactInstanceQueryState>>({}) + const relationshipStates = reactive<Record<string, DesignArtifactInstanceQueryState>>({}) + + watch([kgId, resetNonce], () => { + for (const key of Object.keys(entityStates)) delete entityStates[key] + for (const key of Object.keys(relationshipStates)) delete relationshipStates[key] + }) + + function ensureEntityState( + key: string, + seed?: { instances?: DesignArtifactInstance[]; total?: number }, + ): DesignArtifactInstanceQueryState { + if (!entityStates[key]) { + entityStates[key] = { + ...emptyState(), + instances: [...(seed?.instances ?? [])], + total: seed?.total ?? seed?.instances?.length ?? 0, + } + } + return entityStates[key] + } + + function ensureRelationshipState( + key: string, + seed?: { instances?: DesignArtifactInstance[]; total?: number }, + ): DesignArtifactInstanceQueryState { + if (!relationshipStates[key]) { + relationshipStates[key] = { + ...emptyState(), + instances: [...(seed?.instances ?? [])], + total: seed?.total ?? seed?.instances?.length ?? 0, + } + } + return relationshipStates[key] + } + + async function fetchEntityInstances( + key: string, + params: { + entityType: string + offset: number + propertyName?: string + propertyValue?: string + }, + ) { + const state = ensureEntityState(key) + state.loading = true + try { + const result = await apiFetch<{ + instances: DesignArtifactInstance[] + total: number + }>(`/management/knowledge-graphs/${kgId.value}/design-artifacts/entity-instances`, { + query: { + entity_type: params.entityType, + limit: DESIGN_ARTIFACT_INSTANCE_PAGE_SIZE, + offset: params.offset, + ...(params.propertyName && params.propertyValue !== undefined + ? { property_name: params.propertyName, property_value: params.propertyValue } + : {}), + }, + }) + return result + } finally { + state.loading = false + } + } + + async function fetchRelationshipInstances( + key: string, + params: { + relationshipType: string + sourceEntityType: string + targetEntityType: string + offset: number + propertyName?: string + propertyValue?: string + }, + ) { + const state = ensureRelationshipState(key) + state.loading = true + try { + return await apiFetch<{ + instances: DesignArtifactInstance[] + total: number + }>(`/management/knowledge-graphs/${kgId.value}/design-artifacts/relationship-instances`, { + query: { + relationship_type: params.relationshipType, + source_entity_type: params.sourceEntityType, + target_entity_type: params.targetEntityType, + limit: DESIGN_ARTIFACT_INSTANCE_PAGE_SIZE, + offset: params.offset, + ...(params.propertyName && params.propertyValue !== undefined + ? { property_name: params.propertyName, property_value: params.propertyValue } + : {}), + }, + }) + } finally { + state.loading = false + } + } + + async function searchEntityInstances( + key: string, + params: { entityType: string; propertyName: string; propertyValue: string }, + ) { + const state = ensureEntityState(key) + state.searchProperty = params.propertyName + state.searchValue = params.propertyValue + state.activeSearch = true + const result = await fetchEntityInstances(key, { + entityType: params.entityType, + offset: 0, + propertyName: params.propertyName, + propertyValue: params.propertyValue, + }) + state.instances = result.instances + state.total = result.total + } + + async function clearEntitySearch( + key: string, + params: { entityType: string; seedInstances: DesignArtifactInstance[]; total: number }, + ) { + const state = ensureEntityState(key) + state.searchProperty = '' + state.searchValue = '' + state.activeSearch = false + state.instances = [...params.seedInstances] + state.total = params.total + } + + async function loadMoreEntityInstances(key: string, params: { entityType: string }) { + const state = ensureEntityState(key) + const result = await fetchEntityInstances(key, { + entityType: params.entityType, + offset: state.instances.length, + propertyName: state.activeSearch ? state.searchProperty : undefined, + propertyValue: state.activeSearch ? state.searchValue : undefined, + }) + state.instances = [...state.instances, ...result.instances] + state.total = result.total + } + + async function searchRelationshipInstances( + key: string, + params: { + relationshipType: string + sourceEntityType: string + targetEntityType: string + propertyName: string + propertyValue: string + }, + ) { + const state = ensureRelationshipState(key) + state.searchProperty = params.propertyName + state.searchValue = params.propertyValue + state.activeSearch = true + const result = await fetchRelationshipInstances(key, { + relationshipType: params.relationshipType, + sourceEntityType: params.sourceEntityType, + targetEntityType: params.targetEntityType, + offset: 0, + propertyName: params.propertyName, + propertyValue: params.propertyValue, + }) + state.instances = result.instances + state.total = result.total + } + + async function clearRelationshipSearch( + key: string, + params: { + relationshipType: string + sourceEntityType: string + targetEntityType: string + seedInstances: DesignArtifactInstance[] + total: number + }, + ) { + const state = ensureRelationshipState(key) + state.searchProperty = '' + state.searchValue = '' + state.activeSearch = false + state.instances = [...params.seedInstances] + state.total = params.total + } + + async function loadMoreRelationshipInstances( + key: string, + params: { + relationshipType: string + sourceEntityType: string + targetEntityType: string + }, + ) { + const state = ensureRelationshipState(key) + const result = await fetchRelationshipInstances(key, { + relationshipType: params.relationshipType, + sourceEntityType: params.sourceEntityType, + targetEntityType: params.targetEntityType, + offset: state.instances.length, + propertyName: state.activeSearch ? state.searchProperty : undefined, + propertyValue: state.activeSearch ? state.searchValue : undefined, + }) + state.instances = [...state.instances, ...result.instances] + state.total = result.total + } + + return { + entityStates, + relationshipStates, + ensureEntityState, + ensureRelationshipState, + searchEntityInstances, + clearEntitySearch, + loadMoreEntityInstances, + searchRelationshipInstances, + clearRelationshipSearch, + loadMoreRelationshipInstances, + } +} diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 93a0bb741..924aae8cb 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -112,7 +112,7 @@ import { import { streamExtractionChatTurn, streamRuntimeWarmup } from '@/utils/kgExtractionChat' import { applyThinkingRecentUpdate } from '@/utils/thinkingActivityLines' import type { DesignArtifactsResponse } from '@/utils/kgDesignArtifacts' -import { primaryRelationshipTypeLabels } from '@/utils/kgDesignArtifacts' +import { primaryRelationshipTypeLabels, DEFAULT_DESIGN_ARTIFACTS_INSTANCES_PER_TYPE } from '@/utils/kgDesignArtifacts' const runtimeConfig = useRuntimeConfig() const { accessToken } = useAuth() @@ -517,7 +517,7 @@ async function refreshDesignArtifacts(options: { silent?: boolean } = {}) { try { const artifacts = await apiFetch<DesignArtifactsResponse>( `/management/knowledge-graphs/${kgId.value}/design-artifacts`, - { query: { limit: 500 } }, + { query: { limit: DEFAULT_DESIGN_ARTIFACTS_INSTANCES_PER_TYPE } }, ) const applyArtifactRefresh = () => { entityTypeLabels.value = Object.keys(artifacts.entities ?? {}).sort() diff --git a/src/dev-ui/app/tests/kg-design-artifacts.test.ts b/src/dev-ui/app/tests/kg-design-artifacts.test.ts index 5b0d984af..d8cf7cf5b 100644 --- a/src/dev-ui/app/tests/kg-design-artifacts.test.ts +++ b/src/dev-ui/app/tests/kg-design-artifacts.test.ts @@ -3,6 +3,7 @@ import { isPrimaryRelationshipTypeForDisplay, primaryRelationshipTypeCount, primaryRelationshipTypeLabels, + searchableEntityProperties, } from '../utils/kgDesignArtifacts' describe('kgDesignArtifacts relationship type counting', () => { @@ -32,3 +33,21 @@ describe('kgDesignArtifacts relationship type counting', () => { ).toBe(false) }) }) + +describe('searchable entity properties', () => { + it('includes ontology and searchable system properties', () => { + const props = searchableEntityProperties({ + type: 'Adapter', + instance_count: 1, + required_properties: ['slug', 'data_source_id'], + optional_properties: ['transport'], + property_definitions: { + slug: 'slug', + data_source_id: 'data source id', + transport: 'transport', + }, + }) + expect(props).toContain('slug') + expect(props).toContain('transport') + }) +}) diff --git a/src/dev-ui/app/utils/kgDesignArtifacts.ts b/src/dev-ui/app/utils/kgDesignArtifacts.ts index 6976c16c1..22e21016c 100644 --- a/src/dev-ui/app/utils/kgDesignArtifacts.ts +++ b/src/dev-ui/app/utils/kgDesignArtifacts.ts @@ -82,6 +82,7 @@ export interface DesignArtifactsResponse { } limits: { requested: number + instances_per_type?: number entity_instances_returned: number relationship_instances_returned: number entity_instances_truncated: boolean @@ -90,6 +91,8 @@ export interface DesignArtifactsResponse { } export const DESIGN_ARTIFACTS_PAGE_SIZE = 20 +export const DESIGN_ARTIFACT_INSTANCE_PAGE_SIZE = 100 +export const DEFAULT_DESIGN_ARTIFACTS_INSTANCES_PER_TYPE = 100 export interface OntologyEdgeTypeRef { label: string @@ -110,6 +113,22 @@ export function primaryRelationshipTypeCount(edgeTypes: OntologyEdgeTypeRef[]): return primaryRelationshipTypeLabels(edgeTypes).length } +export function searchableEntityProperties(row: DesignArtifactEntityType): string[] { + const keys = new Set<string>(['slug', 'data_source_id']) + for (const key of row.required_properties ?? []) keys.add(key) + for (const key of row.optional_properties ?? []) keys.add(key) + for (const key of Object.keys(row.property_definitions ?? {})) keys.add(key) + return [...keys].sort() +} + +export function searchableRelationshipProperties(row: DesignArtifactRelationshipType): string[] { + const keys = new Set<string>(['data_source_id']) + for (const key of row.required_parameters ?? []) keys.add(key) + for (const key of row.optional_parameters ?? []) keys.add(key) + for (const key of Object.keys(row.parameter_definitions ?? {})) keys.add(key) + return [...keys].sort() +} + export function pageSlice<T>( pageByKey: Record<string, number>, key: string, From e452e43f260e09e30b127147706ec0494f21b054 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 17:21:52 -0400 Subject: [PATCH 138/153] feat(gma): per-mode sticky sessions with explicit start/end lifecycle Scope GMA containers and conversations by graph-management UI mode (three parallel sessions per user/KG), add start/end/clear session APIs, terminate containers without auto-restart, expire idle sessions after 1 hour, and archive Graph Writes History only when a closed session has write_ops > 0. Update the manage UI with Start/End session controls and fix archived write count sourcing. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../application/agent_session_service.py | 174 ++++++++-- .../application/chat_turn_service.py | 14 +- .../graph_management_session_journal.py | 5 +- .../sticky_session_runtime_service.py | 14 +- src/api/extraction/dependencies.py | 4 + .../domain/entities/agent_session.py | 5 +- .../domain/graph_management_session_scope.py | 12 + .../container_workload_runtime.py | 12 + .../infrastructure/models/agent_session.py | 8 + .../repositories/agent_session_repository.py | 58 +++- .../infrastructure/workload_runtime.py | 10 + .../workload_runtime_settings.py | 2 +- src/api/extraction/ports/repositories.py | 15 +- src/api/extraction/ports/runtime.py | 11 + src/api/extraction/presentation/models.py | 8 + src/api/extraction/presentation/routes.py | 94 +++++- ...dd_graph_management_ui_mode_to_sessions.py | 45 +++ .../test_session_history_retention.py | 8 +- .../application/test_agent_session_service.py | 250 ++++++++++++-- .../application/test_chat_turn_service.py | 70 +++- .../test_graph_management_session_journal.py | 15 +- .../test_session_history_service.py | 45 ++- .../test_sticky_session_runtime_service.py | 45 ++- .../infrastructure/test_workload_runtime.py | 25 ++ .../extraction/presentation/test_routes.py | 88 +++-- .../extraction/SharedConversationPanel.vue | 27 +- .../GraphExtractionArchivedHistory.vue | 2 +- .../pages/knowledge-graphs/[kgId]/manage.vue | 318 +++++++++--------- .../knowledge-graph-manage-workspace.test.ts | 26 +- src/dev-ui/app/utils/kgGraphManagement.ts | 6 + src/dev-ui/app/utils/kgManageWorkspaceHub.ts | 2 +- 31 files changed, 1106 insertions(+), 312 deletions(-) create mode 100644 src/api/extraction/domain/graph_management_session_scope.py create mode 100644 src/api/infrastructure/migrations/versions/k4l5m6n7o8p9_add_graph_management_ui_mode_to_sessions.py diff --git a/src/api/extraction/application/agent_session_service.py b/src/api/extraction/application/agent_session_service.py index 18747cebc..f8135a9d0 100644 --- a/src/api/extraction/application/agent_session_service.py +++ b/src/api/extraction/application/agent_session_service.py @@ -3,7 +3,7 @@ from __future__ import annotations from dataclasses import dataclass -from datetime import UTC, datetime +from datetime import UTC, datetime, timedelta from ulid import ULID @@ -14,8 +14,13 @@ ExtractionSkillResolutionService, ) from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import BootstrapIntakePath, ExtractionSessionMode -from extraction.domain.value_objects import ExtractionSessionRunMetric +from extraction.domain.graph_management_session_scope import resolve_backend_session_mode +from extraction.domain.value_objects import ( + BootstrapIntakePath, + ExtractionSessionMode, + ExtractionSessionRunMetric, + GraphManagementUiMode, +) from extraction.ports.repositories import ( IExtractionAgentSessionRepository, IExtractionSessionRunMetricsReader, @@ -41,12 +46,14 @@ def __init__( run_metrics_reader: IExtractionSessionRunMetricsReader | None = None, sticky_runtime_manager: IStickySessionRuntimeManager | None = None, session_journal_service: GraphManagementSessionJournalService | None = None, + idle_session_ttl: timedelta = timedelta(hours=1), ) -> None: self._repository = repository self._skill_resolution_service = skill_resolution_service self._run_metrics_reader = run_metrics_reader self._sticky_runtime_manager = sticky_runtime_manager self._session_journal_service = session_journal_service + self._idle_session_ttl = idle_session_ttl @staticmethod def _build_bootstrap_intake_prompt() -> str: @@ -57,30 +64,59 @@ def _build_bootstrap_intake_prompt() -> str: "(2) guided question-by-question co-design." ) - async def get_or_create_active_session( - self, - user_id: str, - knowledge_graph_id: str, - mode: ExtractionSessionMode, - ) -> ExtractionAgentSession: - existing = await self._repository.find_active_by_scope( + async def _expire_idle_sessions(self, user_id: str, knowledge_graph_id: str) -> None: + now = datetime.now(UTC) + if self._sticky_runtime_manager is not None: + self._sticky_runtime_manager.cleanup_expired(now=now) + + active_sessions = await self._repository.list_active_by_user_and_kg( user_id=user_id, knowledge_graph_id=knowledge_graph_id, - mode=mode, ) - if existing is not None: - return existing + for session in active_sessions: + if session.updated_at + self._idle_session_ttl <= now: + await self._end_session_record(session) + + async def _terminate_sticky_runtime(self, session: ExtractionAgentSession) -> None: + if self._sticky_runtime_manager is None: + return + self._sticky_runtime_manager.terminate_runtime( + session_id=session.id, + user_id=session.user_id, + knowledge_graph_id=session.knowledge_graph_id, + mode=session.mode.value, + ) + async def _end_session_record(self, session: ExtractionAgentSession) -> None: + if not session.is_active: + return + await self._terminate_sticky_runtime(session) + if self._session_journal_service is not None: + await self._session_journal_service.archive_session_mutations(session) + session.archive() + await self._repository.save(session) + + async def _create_session( + self, + *, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession: + mode = resolve_backend_session_mode(ui_mode) session = ExtractionAgentSession( id=str(ULID()), user_id=user_id, knowledge_graph_id=knowledge_graph_id, mode=mode, + graph_management_ui_mode=ui_mode, ) + session.runtime_context["graph_management_ui_mode"] = ui_mode.value if self._skill_resolution_service is not None: - resolved = await self._skill_resolution_service.resolve_for_session( + resolved = await self._skill_resolution_service.resolve_for_graph_management_turn( knowledge_graph_id=knowledge_graph_id, mode=mode, + ui_mode=ui_mode, ) session.runtime_context["agent_configuration"] = { "system_prompt": resolved.system_prompt, @@ -104,6 +140,84 @@ async def get_or_create_active_session( await self._repository.save(session) return session + async def get_active_session( + self, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession | None: + await self._expire_idle_sessions(user_id, knowledge_graph_id) + return await self._repository.find_active_by_ui_mode( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + ui_mode=ui_mode, + ) + + async def start_session( + self, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession: + await self._expire_idle_sessions(user_id, knowledge_graph_id) + existing = await self._repository.find_active_by_ui_mode( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + ui_mode=ui_mode, + ) + if existing is not None: + return existing + return await self._create_session( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + ui_mode=ui_mode, + ) + + async def end_session( + self, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession | None: + await self._expire_idle_sessions(user_id, knowledge_graph_id) + active = await self._repository.find_active_by_ui_mode( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + ui_mode=ui_mode, + ) + if active is None: + return None + await self._end_session_record(active) + return active + + async def get_or_create_active_session( + self, + user_id: str, + knowledge_graph_id: str, + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode | None = None, + ) -> ExtractionAgentSession: + """Return active session for UI mode or create one (legacy chat auto-start).""" + resolved_ui_mode = ui_mode or ( + GraphManagementUiMode.INITIAL_SCHEMA_DESIGN + if mode == ExtractionSessionMode.SCHEMA_BOOTSTRAP + else GraphManagementUiMode.EXTRACTION_JOBS + ) + if resolve_backend_session_mode(resolved_ui_mode) != mode: + raise ValueError("graph_management_ui_mode does not match session mode") + existing = await self.get_active_session( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + ui_mode=resolved_ui_mode, + ) + if existing is not None: + return existing + return await self.start_session( + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + ui_mode=resolved_ui_mode, + ) + async def save_session(self, session: ExtractionAgentSession) -> ExtractionAgentSession: """Persist session mutations after a chat turn.""" session.updated_at = datetime.now(UTC) @@ -114,30 +228,17 @@ async def clear_chat( self, user_id: str, knowledge_graph_id: str, - mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, ) -> ExtractionAgentSession: - active = await self._repository.find_active_by_scope( + await self.end_session( user_id=user_id, knowledge_graph_id=knowledge_graph_id, - mode=mode, + ui_mode=ui_mode, ) - if active is not None: - if self._sticky_runtime_manager is not None: - self._sticky_runtime_manager.reset_runtime( - session_id=active.id, - user_id=user_id, - knowledge_graph_id=knowledge_graph_id, - mode=mode.value, - ) - if self._session_journal_service is not None: - await self._session_journal_service.archive_session_mutations(active) - active.archive() - await self._repository.save(active) - - return await self.get_or_create_active_session( + return await self.start_session( user_id=user_id, knowledge_graph_id=knowledge_graph_id, - mode=mode, + ui_mode=ui_mode, ) async def list_sessions( @@ -186,8 +287,7 @@ async def archive_session(self, session_id: str) -> ExtractionAgentSession | Non if session is None: return None if session.is_active: - session.archive() - await self._repository.save(session) + await self._end_session_record(session) return session async def set_bootstrap_intake_path_for_active_session( @@ -198,11 +298,13 @@ async def set_bootstrap_intake_path_for_active_session( capabilities_goals: str | None, ) -> ExtractionAgentSession: """Persist bootstrap path selection for session continuity.""" - session = await self.get_or_create_active_session( + session = await self.get_active_session( user_id=user_id, knowledge_graph_id=knowledge_graph_id, - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) + if session is None: + raise ValueError("No active initial schema design session") intake = dict(session.runtime_context.get("bootstrap_intake", {})) intake["status"] = "path_selected" intake["selected_path"] = selected_path.value diff --git a/src/api/extraction/application/chat_turn_service.py b/src/api/extraction/application/chat_turn_service.py index 978f95aa5..c47ecd995 100644 --- a/src/api/extraction/application/chat_turn_service.py +++ b/src/api/extraction/application/chat_turn_service.py @@ -74,11 +74,21 @@ async def stream_chat_turn( } return - session = await self._session_service.get_or_create_active_session( + session = await self._session_service.get_active_session( user_id=user_id, knowledge_graph_id=knowledge_graph_id, - mode=mode, + ui_mode=ui_mode, ) + if session is None: + yield { + "type": "done", + "ok": False, + "error": { + "code": "SESSION_NOT_STARTED", + "message": "Start a Graph Management Assistant session before chatting.", + }, + } + return async for event in self._runtime_service.ensure_runtime_for_chat( tenant_id=tenant_id, diff --git a/src/api/extraction/application/graph_management_session_journal.py b/src/api/extraction/application/graph_management_session_journal.py index 599f12d2c..c3c986ae0 100644 --- a/src/api/extraction/application/graph_management_session_journal.py +++ b/src/api/extraction/application/graph_management_session_journal.py @@ -50,6 +50,8 @@ def _journal_token_total(journal: dict[str, object]) -> int: def _job_set_name_for_session(session: ExtractionAgentSession) -> str: + if session.graph_management_ui_mode is not None: + return _JOB_SET_BY_UI_MODE[session.graph_management_ui_mode.value] ui_mode = str(session.runtime_context.get("graph_management_ui_mode") or "") if ui_mode in _JOB_SET_BY_UI_MODE: return _JOB_SET_BY_UI_MODE[ui_mode] @@ -121,8 +123,7 @@ async def archive_session_mutations(self, session: ExtractionAgentSession) -> No jsonl = str(journal.get("jsonl") or "").strip() metrics = metrics_from_mutation_jsonl(jsonl) if jsonl else {} write_ops = int(metrics.get("write_ops") or 0) - token_total = _journal_token_total(journal) - if write_ops <= 0 and token_total <= 0: + if write_ops <= 0: return now = datetime.now(UTC) diff --git a/src/api/extraction/application/sticky_session_runtime_service.py b/src/api/extraction/application/sticky_session_runtime_service.py index 52d49f24f..c740b2015 100644 --- a/src/api/extraction/application/sticky_session_runtime_service.py +++ b/src/api/extraction/application/sticky_session_runtime_service.py @@ -65,11 +65,21 @@ async def stream_runtime_warmup( mode: ExtractionSessionMode, ui_mode: GraphManagementUiMode, ) -> AsyncIterator[dict[str, Any]]: - session = await self._session_service.get_or_create_active_session( + session = await self._session_service.get_active_session( user_id=user_id, knowledge_graph_id=knowledge_graph_id, - mode=mode, + ui_mode=ui_mode, ) + if session is None: + yield { + "type": "done", + "ok": False, + "error": { + "code": "SESSION_NOT_STARTED", + "message": "Start a Graph Management Assistant session before warming runtime.", + }, + } + return async for event in self._stream_prepare_runtime( tenant_id=tenant_id, user_id=user_id, diff --git a/src/api/extraction/dependencies.py b/src/api/extraction/dependencies.py index db8f6d40e..946ddd4dd 100644 --- a/src/api/extraction/dependencies.py +++ b/src/api/extraction/dependencies.py @@ -36,6 +36,8 @@ create_sticky_session_runtime_manager, get_workload_credential_issuer, ) +from datetime import timedelta + from extraction.infrastructure.workload_runtime_settings import get_extraction_workload_runtime_settings from extraction.ports.runtime import ( IEphemeralExtractionWorkerLauncher, @@ -61,6 +63,7 @@ def _build_extraction_agent_session_service( *, sticky_runtime_manager: IStickySessionRuntimeManager | None = None, ) -> ExtractionAgentSessionService: + runtime_settings = get_extraction_workload_runtime_settings() skill_resolution_service = ExtractionSkillResolutionService( override_repository=ExtractionSkillOverrideRepository() ) @@ -73,6 +76,7 @@ def _build_extraction_agent_session_service( session_repository=ExtractionAgentSessionRepository(session=session), extraction_job_repository=ExtractionJobRepository(session=session), ), + idle_session_ttl=timedelta(minutes=runtime_settings.session_ttl_minutes), ) diff --git a/src/api/extraction/domain/entities/agent_session.py b/src/api/extraction/domain/entities/agent_session.py index 50903162e..ac182f19e 100644 --- a/src/api/extraction/domain/entities/agent_session.py +++ b/src/api/extraction/domain/entities/agent_session.py @@ -6,17 +6,18 @@ from datetime import UTC, datetime from typing import Any -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode @dataclass class ExtractionAgentSession: - """Long-running conversational session scoped to user/KG/mode.""" + """Long-running conversational session scoped to user/KG/UI mode.""" id: str user_id: str knowledge_graph_id: str mode: ExtractionSessionMode + graph_management_ui_mode: GraphManagementUiMode | None = None message_history: list[dict[str, Any]] = field(default_factory=list) runtime_context: dict[str, Any] = field(default_factory=dict) created_at: datetime = field(default_factory=lambda: datetime.now(UTC)) diff --git a/src/api/extraction/domain/graph_management_session_scope.py b/src/api/extraction/domain/graph_management_session_scope.py new file mode 100644 index 000000000..33001badf --- /dev/null +++ b/src/api/extraction/domain/graph_management_session_scope.py @@ -0,0 +1,12 @@ +"""Scope helpers for Graph Management Assistant sessions.""" + +from __future__ import annotations + +from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode + + +def resolve_backend_session_mode(ui_mode: GraphManagementUiMode) -> ExtractionSessionMode: + """Map graph-management UI mode to extraction session backend mode.""" + if ui_mode == GraphManagementUiMode.INITIAL_SCHEMA_DESIGN: + return ExtractionSessionMode.SCHEMA_BOOTSTRAP + return ExtractionSessionMode.EXTRACTION_OPERATIONS diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index 173e9f210..c6208988a 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -147,6 +147,18 @@ def reset_runtime( bootstrap=bootstrap, ) + def terminate_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> None: + existing = self._leases.pop(session_id, None) + if existing is not None: + self._terminate_container(existing.container_id) + def cleanup_expired(self, *, now: datetime) -> list[str]: expired_sessions = [ session_id diff --git a/src/api/extraction/infrastructure/models/agent_session.py b/src/api/extraction/infrastructure/models/agent_session.py index 02d282592..bbf76ec20 100644 --- a/src/api/extraction/infrastructure/models/agent_session.py +++ b/src/api/extraction/infrastructure/models/agent_session.py @@ -20,6 +20,7 @@ class ExtractionAgentSessionModel(Base): user_id: Mapped[str] = mapped_column(String(255), nullable=False) knowledge_graph_id: Mapped[str] = mapped_column(String(26), nullable=False) mode: Mapped[str] = mapped_column(String(64), nullable=False) + graph_management_ui_mode: Mapped[str | None] = mapped_column(String(64), nullable=True) message_history: Mapped[list[dict]] = mapped_column( JSONB, nullable=False, default=list ) @@ -48,6 +49,13 @@ class ExtractionAgentSessionModel(Base): "mode", "archived_at", ), + Index( + "idx_extract_sessions_ui_mode_active", + "user_id", + "knowledge_graph_id", + "graph_management_ui_mode", + "archived_at", + ), Index( "idx_extract_sessions_scope_updated", "user_id", diff --git a/src/api/extraction/infrastructure/repositories/agent_session_repository.py b/src/api/extraction/infrastructure/repositories/agent_session_repository.py index 01596dc64..ca9f761fe 100644 --- a/src/api/extraction/infrastructure/repositories/agent_session_repository.py +++ b/src/api/extraction/infrastructure/repositories/agent_session_repository.py @@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode from extraction.infrastructure.models.agent_session import ExtractionAgentSessionModel from extraction.ports.repositories import IExtractionAgentSessionRepository @@ -29,6 +29,11 @@ async def save(self, session: ExtractionAgentSession) -> None: user_id=session.user_id, knowledge_graph_id=session.knowledge_graph_id, mode=session.mode.value, + graph_management_ui_mode=( + session.graph_management_ui_mode.value + if session.graph_management_ui_mode is not None + else None + ), message_history=session.message_history, runtime_context=session.runtime_context, created_at=session.created_at, @@ -39,6 +44,11 @@ async def save(self, session: ExtractionAgentSession) -> None: else: model.message_history = session.message_history model.runtime_context = session.runtime_context + model.graph_management_ui_mode = ( + session.graph_management_ui_mode.value + if session.graph_management_ui_mode is not None + else None + ) model.updated_at = session.updated_at model.archived_at = session.archived_at await self._session.flush() @@ -77,6 +87,46 @@ async def find_active_by_scope( return None return self._to_domain(model) + async def find_active_by_ui_mode( + self, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession | None: + stmt = ( + select(ExtractionAgentSessionModel) + .where( + ExtractionAgentSessionModel.user_id == user_id, + ExtractionAgentSessionModel.knowledge_graph_id == knowledge_graph_id, + ExtractionAgentSessionModel.graph_management_ui_mode == ui_mode.value, + ExtractionAgentSessionModel.archived_at.is_(None), + ) + .order_by(desc(ExtractionAgentSessionModel.updated_at)) + .limit(1) + ) + result = await self._session.execute(stmt) + model = result.scalar_one_or_none() + if model is None: + return None + return self._to_domain(model) + + async def list_active_by_user_and_kg( + self, + user_id: str, + knowledge_graph_id: str, + ) -> list[ExtractionAgentSession]: + stmt = ( + select(ExtractionAgentSessionModel) + .where( + ExtractionAgentSessionModel.user_id == user_id, + ExtractionAgentSessionModel.knowledge_graph_id == knowledge_graph_id, + ExtractionAgentSessionModel.archived_at.is_(None), + ) + .order_by(desc(ExtractionAgentSessionModel.updated_at)) + ) + result = await self._session.execute(stmt) + return [self._to_domain(model) for model in result.scalars().all()] + async def list_by_scope( self, user_id: str, @@ -94,11 +144,17 @@ async def list_by_scope( return [self._to_domain(model) for model in result.scalars().all()] def _to_domain(self, model: ExtractionAgentSessionModel) -> ExtractionAgentSession: + ui_mode = ( + GraphManagementUiMode(model.graph_management_ui_mode) + if model.graph_management_ui_mode + else None + ) return ExtractionAgentSession( id=model.id, user_id=model.user_id, knowledge_graph_id=model.knowledge_graph_id, mode=ExtractionSessionMode(model.mode), + graph_management_ui_mode=ui_mode, message_history=list(model.message_history or []), runtime_context=dict(model.runtime_context or {}), created_at=model.created_at, diff --git a/src/api/extraction/infrastructure/workload_runtime.py b/src/api/extraction/infrastructure/workload_runtime.py index 5d4e9f658..6c2244d25 100644 --- a/src/api/extraction/infrastructure/workload_runtime.py +++ b/src/api/extraction/infrastructure/workload_runtime.py @@ -78,6 +78,16 @@ def reset_runtime( mode=mode, ) + def terminate_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> None: + self._leases.pop(session_id, None) + def cleanup_expired(self, *, now: datetime) -> list[str]: expired_sessions = [ session_id diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 08a562f9a..bdc47537c 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -50,7 +50,7 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): worker_command: tuple[str, ...] = Field(default=("sleep", "3600")) sticky_service_port: int = Field(default=8787, ge=1024, le=65535) container_work_mount: str = Field(default="/workspace") - session_ttl_minutes: int = Field(default=30, ge=1, le=24 * 60) + session_ttl_minutes: int = Field(default=60, ge=1, le=24 * 60) job_package_work_dir: str = Field(default="/tmp/kartograph/job_packages") api_base_url: str = Field(default="http://api:8000") workload_token_signing_key: str = Field( diff --git a/src/api/extraction/ports/repositories.py b/src/api/extraction/ports/repositories.py index 69020a98c..cd78ad61c 100644 --- a/src/api/extraction/ports/repositories.py +++ b/src/api/extraction/ports/repositories.py @@ -6,7 +6,7 @@ from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.extraction_job import ExtractionJobRecord -from extraction.domain.value_objects import ExtractionSessionMode, ExtractionSessionRunMetric +from extraction.domain.value_objects import ExtractionSessionMode, ExtractionSessionRunMetric, GraphManagementUiMode class IExtractionAgentSessionRepository(Protocol): @@ -23,6 +23,19 @@ async def find_active_by_scope( mode: ExtractionSessionMode, ) -> ExtractionAgentSession | None: ... + async def find_active_by_ui_mode( + self, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession | None: ... + + async def list_active_by_user_and_kg( + self, + user_id: str, + knowledge_graph_id: str, + ) -> list[ExtractionAgentSession]: ... + async def list_by_scope( self, user_id: str, diff --git a/src/api/extraction/ports/runtime.py b/src/api/extraction/ports/runtime.py index 14ee058a0..a4e9c31fd 100644 --- a/src/api/extraction/ports/runtime.py +++ b/src/api/extraction/ports/runtime.py @@ -98,6 +98,17 @@ def reset_runtime( """Terminate existing runtime for session and start a clean one.""" ... + def terminate_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> None: + """Terminate sticky runtime for session without starting a replacement.""" + ... + def cleanup_expired(self, *, now: datetime) -> list[str]: """Terminate and remove expired sticky runtimes; return container IDs.""" ... diff --git a/src/api/extraction/presentation/models.py b/src/api/extraction/presentation/models.py index 9d57ed426..d60f5f8b7 100644 --- a/src/api/extraction/presentation/models.py +++ b/src/api/extraction/presentation/models.py @@ -50,6 +50,7 @@ class ExtractionSessionResponse(BaseModel): user_id: str knowledge_graph_id: str mode: ExtractionSessionMode + graph_management_ui_mode: GraphManagementUiMode | None = None message_history: list[dict[str, Any]] = Field(default_factory=list) runtime_context: dict[str, Any] = Field(default_factory=dict) created_at: datetime @@ -63,6 +64,7 @@ def from_domain(cls, session: ExtractionAgentSession) -> "ExtractionSessionRespo user_id=session.user_id, knowledge_graph_id=session.knowledge_graph_id, mode=session.mode, + graph_management_ui_mode=session.graph_management_ui_mode, message_history=session.message_history, runtime_context=session.runtime_context, created_at=session.created_at, @@ -132,6 +134,12 @@ class BootstrapIntakePathSelectionRequest(BaseModel): ) +class GraphManagementSessionRequest(BaseModel): + """Request model for graph-management session lifecycle actions.""" + + graph_management_ui_mode: GraphManagementUiMode = GraphManagementUiMode.INITIAL_SCHEMA_DESIGN + + class ExtractionChatTurnRequest(BaseModel): """Request model for a graph-management chat turn.""" diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index cf32f492b..8d3be0186 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -16,7 +16,8 @@ get_extraction_agent_session_service_with_runtime, get_extraction_chat_turn_service, ) -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.graph_management_session_scope import resolve_backend_session_mode +from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode from extraction.presentation.models import ( BootstrapIntakePathSelectionRequest, ExtractionChatTurnRequest, @@ -24,6 +25,7 @@ ExtractionSessionHistoryResponse, ExtractionSessionListResponse, ExtractionSessionResponse, + GraphManagementSessionRequest, StickyRuntimeWarmupRequest, ) from iam.application.value_objects import CurrentUser @@ -46,6 +48,17 @@ } +def _validate_graph_management_session_mode( + mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, +) -> None: + if resolve_backend_session_mode(ui_mode) != mode: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="graph_management_ui_mode does not match session mode path", + ) + + async def _assert_kg_edit_permission( *, authz: AuthorizationProvider, @@ -69,6 +82,7 @@ async def _assert_kg_edit_permission( async def get_active_session( knowledge_graph_id: str, mode: ExtractionSessionMode, + graph_management_ui_mode: GraphManagementUiMode, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[ ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) @@ -80,11 +94,79 @@ async def get_active_session( current_user=current_user, knowledge_graph_id=knowledge_graph_id, ) - session = await service.get_or_create_active_session( + _validate_graph_management_session_mode(mode, graph_management_ui_mode) + session = await service.get_active_session( user_id=current_user.user_id.value, knowledge_graph_id=knowledge_graph_id, - mode=mode, + ui_mode=graph_management_ui_mode, ) + if session is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No active Graph Management Assistant session for this mode", + ) + return ExtractionSessionResponse.from_domain(session) + + +@router.post( + "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}/start-session", + response_model=ExtractionSessionResponse, +) +async def start_session( + knowledge_graph_id: str, + mode: ExtractionSessionMode, + request: GraphManagementSessionRequest, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[ + ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) + ], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], +) -> ExtractionSessionResponse: + await _assert_kg_edit_permission( + authz=authz, + current_user=current_user, + knowledge_graph_id=knowledge_graph_id, + ) + _validate_graph_management_session_mode(mode, request.graph_management_ui_mode) + session = await service.start_session( + user_id=current_user.user_id.value, + knowledge_graph_id=knowledge_graph_id, + ui_mode=request.graph_management_ui_mode, + ) + return ExtractionSessionResponse.from_domain(session) + + +@router.post( + "/knowledge-graphs/{knowledge_graph_id}/sessions/{mode}/end-session", + response_model=ExtractionSessionResponse, +) +async def end_session( + knowledge_graph_id: str, + mode: ExtractionSessionMode, + request: GraphManagementSessionRequest, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[ + ExtractionAgentSessionService, + Depends(get_extraction_agent_session_service_with_runtime), + ], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], +) -> ExtractionSessionResponse: + await _assert_kg_edit_permission( + authz=authz, + current_user=current_user, + knowledge_graph_id=knowledge_graph_id, + ) + _validate_graph_management_session_mode(mode, request.graph_management_ui_mode) + session = await service.end_session( + user_id=current_user.user_id.value, + knowledge_graph_id=knowledge_graph_id, + ui_mode=request.graph_management_ui_mode, + ) + if session is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No active Graph Management Assistant session for this mode", + ) return ExtractionSessionResponse.from_domain(session) @@ -152,6 +234,7 @@ async def list_session_history( async def clear_chat( knowledge_graph_id: str, mode: ExtractionSessionMode, + request: GraphManagementSessionRequest, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[ ExtractionAgentSessionService, @@ -164,10 +247,11 @@ async def clear_chat( current_user=current_user, knowledge_graph_id=knowledge_graph_id, ) + _validate_graph_management_session_mode(mode, request.graph_management_ui_mode) session = await service.clear_chat( user_id=current_user.user_id.value, knowledge_graph_id=knowledge_graph_id, - mode=mode, + ui_mode=request.graph_management_ui_mode, ) return ExtractionSessionResponse.from_domain(session) @@ -188,6 +272,7 @@ async def stream_runtime_warmup( current_user=current_user, knowledge_graph_id=knowledge_graph_id, ) + _validate_graph_management_session_mode(mode, request.graph_management_ui_mode) async def event_stream(): async for event in service.stream_runtime_warmup( @@ -223,6 +308,7 @@ async def stream_chat_turn( current_user=current_user, knowledge_graph_id=knowledge_graph_id, ) + _validate_graph_management_session_mode(mode, request.graph_management_ui_mode) async def event_stream(): async for event in service.stream_chat_turn( diff --git a/src/api/infrastructure/migrations/versions/k4l5m6n7o8p9_add_graph_management_ui_mode_to_sessions.py b/src/api/infrastructure/migrations/versions/k4l5m6n7o8p9_add_graph_management_ui_mode_to_sessions.py new file mode 100644 index 000000000..abad83f9d --- /dev/null +++ b/src/api/infrastructure/migrations/versions/k4l5m6n7o8p9_add_graph_management_ui_mode_to_sessions.py @@ -0,0 +1,45 @@ +"""Add graph_management_ui_mode to extraction agent sessions. + +Revision ID: k4l5m6n7o8p9 +Revises: j3k4l5m6n7o8 +Create Date: 2026-06-14 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "k4l5m6n7o8p9" +down_revision: Union[str, Sequence[str], None] = "j3k4l5m6n7o8" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "extraction_agent_sessions", + sa.Column("graph_management_ui_mode", sa.String(length=64), nullable=True), + ) + op.execute( + """ + UPDATE extraction_agent_sessions + SET graph_management_ui_mode = COALESCE( + runtime_context->>'graph_management_ui_mode', + CASE mode + WHEN 'schema_bootstrap' THEN 'initial-schema-design' + ELSE 'extraction-jobs' + END + ) + """ + ) + op.create_index( + "idx_extract_sessions_ui_mode_active", + "extraction_agent_sessions", + ["user_id", "knowledge_graph_id", "graph_management_ui_mode", "archived_at"], + ) + + +def downgrade() -> None: + op.drop_index("idx_extract_sessions_ui_mode_active", table_name="extraction_agent_sessions") + op.drop_column("extraction_agent_sessions", "graph_management_ui_mode") diff --git a/src/api/tests/integration/extraction/test_session_history_retention.py b/src/api/tests/integration/extraction/test_session_history_retention.py index 14e1763bb..82ffeb546 100644 --- a/src/api/tests/integration/extraction/test_session_history_retention.py +++ b/src/api/tests/integration/extraction/test_session_history_retention.py @@ -8,7 +8,7 @@ from sqlalchemy import text from extraction.application.agent_session_service import ExtractionAgentSessionService -from extraction.domain.value_objects import ExtractionSessionMode +from extraction.domain.value_objects import ExtractionSessionMode, GraphManagementUiMode from extraction.infrastructure.repositories import ( ExtractionAgentSessionRepository, ExtractionSessionRunMetricsReader, @@ -121,10 +121,10 @@ async def test_archived_session_history_retains_linked_run_metadata( run_metrics_reader=metrics_reader, ) - active = await session_service.get_or_create_active_session( + active = await session_service.start_session( user_id=user_id, knowledge_graph_id=knowledge_graph.id.value, - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) session_id = active.id @@ -163,7 +163,7 @@ async def test_archived_session_history_retains_linked_run_metadata( archived_session = await session_service.clear_chat( user_id=user_id, knowledge_graph_id=knowledge_graph.id.value, - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) assert archived_session.id != session_id diff --git a/src/api/tests/unit/extraction/application/test_agent_session_service.py b/src/api/tests/unit/extraction/application/test_agent_session_service.py index 06c1d32f6..763546682 100644 --- a/src/api/tests/unit/extraction/application/test_agent_session_service.py +++ b/src/api/tests/unit/extraction/application/test_agent_session_service.py @@ -3,13 +3,23 @@ from __future__ import annotations from dataclasses import replace -from datetime import UTC, datetime +from datetime import UTC, datetime, timedelta import pytest from extraction.application.agent_session_service import ExtractionAgentSessionService +from extraction.application.graph_management_session_journal import ( + GraphManagementSessionJournalService, + append_applied_jsonl_to_session, +) from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import BootstrapIntakePath, ExtractionSessionMode +from extraction.domain.graph_management_session_scope import resolve_backend_session_mode +from extraction.domain.value_objects import ( + BootstrapIntakePath, + ExtractionSessionMode, + GraphManagementUiMode, +) +from extraction.infrastructure.workload_runtime import InMemoryStickySessionRuntimeManager class _InMemoryAgentSessionRepository: @@ -39,6 +49,35 @@ async def find_active_by_scope( return replace(session) return None + async def find_active_by_ui_mode( + self, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession | None: + for session in self._by_id.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.graph_management_ui_mode == ui_mode + and session.archived_at is None + ): + return replace(session) + return None + + async def list_active_by_user_and_kg( + self, + user_id: str, + knowledge_graph_id: str, + ) -> list[ExtractionAgentSession]: + return [ + replace(session) + for session in self._by_id.values() + if session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.archived_at is None + ] + async def list_by_scope( self, user_id: str, @@ -55,16 +94,26 @@ async def list_by_scope( return sorted(sessions, key=lambda s: s.updated_at, reverse=True) +class _InMemoryJobRepository: + def __init__(self) -> None: + self.inserted = [] + + async def insert_archived_session_job(self, job) -> None: + self.inserted.append(job) + + class _StaticSkillResolutionService: def __init__(self) -> None: - self.calls: list[tuple[str, ExtractionSessionMode]] = [] + self.calls: list[tuple[str, ExtractionSessionMode, GraphManagementUiMode]] = [] - async def resolve_for_session( + async def resolve_for_graph_management_turn( self, + *, knowledge_graph_id: str, mode: ExtractionSessionMode, + ui_mode: GraphManagementUiMode, ): - self.calls.append((knowledge_graph_id, mode)) + self.calls.append((knowledge_graph_id, mode, ui_mode)) if mode == ExtractionSessionMode.SCHEMA_BOOTSTRAP: return type( "_Resolved", @@ -90,19 +139,19 @@ async def resolve_for_session( @pytest.mark.asyncio class TestExtractionAgentSessionService: - async def test_reuses_active_session_for_same_scope(self): + async def test_start_session_reuses_active_for_same_ui_mode(self): repo = _InMemoryAgentSessionRepository() service = ExtractionAgentSessionService(repository=repo) - first = await service.get_or_create_active_session( + first = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) - second = await service.get_or_create_active_session( + second = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) assert first.id == second.id @@ -111,44 +160,68 @@ async def test_scope_isolated_by_user(self): repo = _InMemoryAgentSessionRepository() service = ExtractionAgentSessionService(repository=repo) - first = await service.get_or_create_active_session( + first = await service.start_session( user_id="alice", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) - second = await service.get_or_create_active_session( + second = await service.start_session( user_id="bob", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) assert first.id != second.id - async def test_scope_isolated_by_mode(self): + async def test_scope_isolated_by_ui_mode(self): repo = _InMemoryAgentSessionRepository() service = ExtractionAgentSessionService(repository=repo) - bootstrap = await service.get_or_create_active_session( + bootstrap = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) - operations = await service.get_or_create_active_session( + extraction_jobs = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + ) + one_off = await service.start_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.ONE_OFF_MUTATIONS, ) - assert bootstrap.id != operations.id + assert len({bootstrap.id, extraction_jobs.id, one_off.id}) == 3 + assert bootstrap.mode == ExtractionSessionMode.SCHEMA_BOOTSTRAP + assert extraction_jobs.mode == ExtractionSessionMode.EXTRACTION_OPERATIONS + assert one_off.mode == ExtractionSessionMode.EXTRACTION_OPERATIONS - async def test_clear_chat_archives_old_session_and_creates_new_one(self): + async def test_get_active_session_returns_none_when_not_started(self): repo = _InMemoryAgentSessionRepository() service = ExtractionAgentSessionService(repository=repo) - old_session = await service.get_or_create_active_session( + active = await service.get_active_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.ONE_OFF_MUTATIONS, + ) + + assert active is None + + async def test_clear_chat_archives_old_session_and_creates_new_one(self): + repo = _InMemoryAgentSessionRepository() + sticky = InMemoryStickySessionRuntimeManager() + service = ExtractionAgentSessionService( + repository=repo, + sticky_runtime_manager=sticky, + ) + + old_session = await service.start_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) old_session.message_history = [{"role": "user", "content": "hello"}] old_session.runtime_context = {"draft": "x"} @@ -158,7 +231,7 @@ async def test_clear_chat_archives_old_session_and_creates_new_one(self): new_session = await service.clear_chat( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) archived = await repo.get_by_id(old_session.id) @@ -166,21 +239,114 @@ async def test_clear_chat_archives_old_session_and_creates_new_one(self): assert archived.archived_at is not None assert new_session.id != old_session.id assert new_session.message_history == [] - assert new_session.runtime_context == {} + assert new_session.runtime_context.get("graph_management_ui_mode") == ( + GraphManagementUiMode.EXTRACTION_JOBS.value + ) + + async def test_end_session_archives_writes_to_graph_history(self): + repo = _InMemoryAgentSessionRepository() + job_repo = _InMemoryJobRepository() + journal = GraphManagementSessionJournalService( + session_repository=repo, + extraction_job_repository=job_repo, + ) + sticky = InMemoryStickySessionRuntimeManager() + service = ExtractionAgentSessionService( + repository=repo, + sticky_runtime_manager=sticky, + session_journal_service=journal, + ) + + session = await service.start_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.ONE_OFF_MUTATIONS, + ) + append_applied_jsonl_to_session( + session, + applied_jsonl=( + '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service",' + '"set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap"}}' + ), + ) + await repo.save(session) + + ended = await service.end_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.ONE_OFF_MUTATIONS, + ) + + assert ended is not None + assert ended.archived_at is not None + assert len(job_repo.inserted) == 1 + + async def test_end_session_skips_graph_history_when_no_writes(self): + repo = _InMemoryAgentSessionRepository() + job_repo = _InMemoryJobRepository() + journal = GraphManagementSessionJournalService( + session_repository=repo, + extraction_job_repository=job_repo, + ) + service = ExtractionAgentSessionService( + repository=repo, + session_journal_service=journal, + ) + + await service.start_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + ) + await service.end_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + ) + + assert job_repo.inserted == [] + + async def test_idle_sessions_auto_end_after_one_hour(self): + repo = _InMemoryAgentSessionRepository() + sticky = InMemoryStickySessionRuntimeManager() + service = ExtractionAgentSessionService( + repository=repo, + sticky_runtime_manager=sticky, + idle_session_ttl=timedelta(hours=1), + ) + + session = await service.start_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + ) + session.updated_at = datetime.now(UTC) - timedelta(hours=2) + await repo.save(session) + + active = await service.get_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + ) + + assert active is None + archived = await repo.get_by_id(session.id) + assert archived is not None + assert archived.archived_at is not None async def test_list_sessions_includes_archived_history(self): repo = _InMemoryAgentSessionRepository() service = ExtractionAgentSessionService(repository=repo) - first = await service.get_or_create_active_session( + first = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) await service.clear_chat( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) sessions = await service.list_sessions( @@ -200,26 +366,28 @@ async def test_new_session_initializes_runtime_context_from_skill_resolution(sel skill_resolution_service=skill_resolution, ) - session = await service.get_or_create_active_session( + session = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) assert "agent_configuration" in session.runtime_context config = session.runtime_context["agent_configuration"] assert config["system_prompt"] == "Bootstrap system prompt" assert config["skills"]["schema_modeling"] == "bootstrap schema guidance" - assert skill_resolution.calls == [("kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP)] + assert skill_resolution.calls == [ + ("kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP, GraphManagementUiMode.INITIAL_SCHEMA_DESIGN) + ] async def test_bootstrap_session_seeds_capabilities_intake_prompt_state(self): repo = _InMemoryAgentSessionRepository() service = ExtractionAgentSessionService(repository=repo) - session = await service.get_or_create_active_session( + session = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) assert session.message_history @@ -232,10 +400,10 @@ async def test_bootstrap_session_seeds_capabilities_intake_prompt_state(self): async def test_select_bootstrap_intake_path_persists_choice_for_continuity(self): repo = _InMemoryAgentSessionRepository() service = ExtractionAgentSessionService(repository=repo) - session = await service.get_or_create_active_session( + session = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) updated = await service.set_bootstrap_intake_path_for_active_session( @@ -250,3 +418,15 @@ async def test_select_bootstrap_intake_path_persists_choice_for_continuity(self) assert intake["status"] == "path_selected" assert intake["capabilities_goals"] == "I can provide domain terms but need guidance." assert updated.id == session.id + + +def test_resolve_backend_session_mode_maps_ui_modes() -> None: + assert resolve_backend_session_mode(GraphManagementUiMode.INITIAL_SCHEMA_DESIGN) == ( + ExtractionSessionMode.SCHEMA_BOOTSTRAP + ) + assert resolve_backend_session_mode(GraphManagementUiMode.EXTRACTION_JOBS) == ( + ExtractionSessionMode.EXTRACTION_OPERATIONS + ) + assert resolve_backend_session_mode(GraphManagementUiMode.ONE_OFF_MUTATIONS) == ( + ExtractionSessionMode.EXTRACTION_OPERATIONS + ) diff --git a/src/api/tests/unit/extraction/application/test_chat_turn_service.py b/src/api/tests/unit/extraction/application/test_chat_turn_service.py index 09c2b16b6..139f2cf81 100644 --- a/src/api/tests/unit/extraction/application/test_chat_turn_service.py +++ b/src/api/tests/unit/extraction/application/test_chat_turn_service.py @@ -47,6 +47,35 @@ async def find_active_by_scope( return replace(session) return None + async def find_active_by_ui_mode( + self, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession | None: + for session in self._sessions.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.graph_management_ui_mode == ui_mode + and session.archived_at is None + ): + return replace(session) + return None + + async def list_active_by_user_and_kg( + self, + user_id: str, + knowledge_graph_id: str, + ) -> list[ExtractionAgentSession]: + return [ + replace(session) + for session in self._sessions.values() + if session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.archived_at is None + ] + async def list_by_scope( self, user_id: str, @@ -119,6 +148,18 @@ def _build_chat_turn_service( return service, repo +async def _start_session( + session_service: ExtractionAgentSessionService, + *, + ui_mode: GraphManagementUiMode, +) -> None: + await session_service.start_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=ui_mode, + ) + + class _UsageEmittingChatAgent: async def stream_turn(self, **kwargs): yield { @@ -155,6 +196,7 @@ async def test_stream_chat_turn_accumulates_token_usage_in_session_journal() -> runtime_service=runtime_service, chat_agent=_UsageEmittingChatAgent(), ) + await _start_session(session_service, ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN) events = [ event @@ -169,7 +211,9 @@ async def test_stream_chat_turn_accumulates_token_usage_in_session_journal() -> ] assert events[-1]["ok"] is True - active = await repo.find_active_by_scope("user-1", "kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP) + active = await repo.find_active_by_ui_mode( + "user-1", "kg-1", GraphManagementUiMode.INITIAL_SCHEMA_DESIGN + ) assert active is not None journal = active.runtime_context["mutation_journal"] assert journal["input_tokens"] == 800 @@ -180,6 +224,10 @@ async def test_stream_chat_turn_accumulates_token_usage_in_session_journal() -> @pytest.mark.asyncio async def test_stream_chat_turn_persists_assistant_reply() -> None: service, repo = _build_chat_turn_service(readiness=IngestionReadinessSnapshot(1, 1)) + await _start_session( + service._session_service, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + ) events = [ event @@ -195,7 +243,9 @@ async def test_stream_chat_turn_persists_assistant_reply() -> None: assert events[-1]["type"] == "done" assert events[-1]["ok"] is True - active = await repo.find_active_by_scope("user-1", "kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP) + active = await repo.find_active_by_ui_mode( + "user-1", "kg-1", GraphManagementUiMode.INITIAL_SCHEMA_DESIGN + ) assert active is not None assert active.message_history[-2]["role"] == "user" assert active.message_history[-1]["role"] == "assistant" @@ -234,6 +284,7 @@ async def test_stream_chat_turn_passes_fresh_workload_token_to_agent() -> None: chat_agent=chat_agent, credential_issuer=ScopedWorkloadCredentialIssuer(default_ttl=__import__("datetime").timedelta(minutes=5)), ) + await _start_session(session_service, ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN) events = [ event @@ -254,6 +305,7 @@ async def test_stream_chat_turn_passes_fresh_workload_token_to_agent() -> None: @pytest.mark.asyncio async def test_stream_chat_turn_wait_when_job_package_unprepared() -> None: service, repo = _build_chat_turn_service(readiness=IngestionReadinessSnapshot(2, 0)) + await _start_session(service._session_service, ui_mode=GraphManagementUiMode.EXTRACTION_JOBS) events = [ event @@ -271,8 +323,8 @@ async def test_stream_chat_turn_wait_when_job_package_unprepared() -> None: done = events[-1] assert done["ok"] is True assert done.get("wait") is True - active = await repo.find_active_by_scope( - "user-1", "kg-1", ExtractionSessionMode.EXTRACTION_OPERATIONS + active = await repo.find_active_by_ui_mode( + "user-1", "kg-1", GraphManagementUiMode.EXTRACTION_JOBS ) assert active is not None assert active.runtime_context["job_package"]["phase"] == "awaiting_job_package" @@ -281,6 +333,10 @@ async def test_stream_chat_turn_wait_when_job_package_unprepared() -> None: @pytest.mark.asyncio async def test_stream_runtime_warmup_marks_memory_backend_ready() -> None: service, _repo = _build_chat_turn_service(readiness=IngestionReadinessSnapshot(1, 1)) + await _start_session( + service._session_service, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + ) events = [ event @@ -336,6 +392,7 @@ async def test_stream_chat_turn_emits_error_when_agent_stream_incomplete() -> No runtime_service=runtime_service, chat_agent=_IncompleteChatAgent(), ) + await _start_session(session_service, ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN) events = [ event @@ -377,6 +434,7 @@ async def test_stream_chat_turn_persists_user_message_when_agent_fails() -> None runtime_service=runtime_service, chat_agent=_FailingChatAgent(), ) + await _start_session(session_service, ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN) events = [ event @@ -391,6 +449,8 @@ async def test_stream_chat_turn_persists_user_message_when_agent_fails() -> None ] assert events[-1]["ok"] is False - active = await repo.find_active_by_scope("user-1", "kg-1", ExtractionSessionMode.SCHEMA_BOOTSTRAP) + active = await repo.find_active_by_ui_mode( + "user-1", "kg-1", GraphManagementUiMode.INITIAL_SCHEMA_DESIGN + ) assert active is not None assert active.message_history[-1] == {"role": "user", "content": "Hello!"} diff --git a/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py b/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py index 811ab4c0c..1d9ccf53b 100644 --- a/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py +++ b/src/api/tests/unit/extraction/application/test_graph_management_session_journal.py @@ -182,9 +182,17 @@ async def test_archive_session_mutations_uses_one_off_mutations_job_set() -> Non user_id="user-1", knowledge_graph_id="kg-1", mode=ExtractionSessionMode.EXTRACTION_OPERATIONS, + graph_management_ui_mode=GraphManagementUiMode.ONE_OFF_MUTATIONS, created_at=datetime(2026, 6, 5, tzinfo=UTC), ) session.runtime_context["graph_management_ui_mode"] = GraphManagementUiMode.ONE_OFF_MUTATIONS.value + append_applied_jsonl_to_session( + session, + applied_jsonl=( + '{"op":"CREATE","type":"node","id":"service:0123456789abcdef","label":"service",' + '"set_properties":{"name":"api","slug":"api","data_source_id":"bootstrap"}}' + ), + ) append_turn_usage_to_session( session, usage={"input_tokens": 100, "output_tokens": 50, "cost_usd": 0.02}, @@ -197,7 +205,7 @@ async def test_archive_session_mutations_uses_one_off_mutations_job_set() -> Non @pytest.mark.asyncio -async def test_archive_session_mutations_token_only_session() -> None: +async def test_archive_session_mutations_skips_token_only_session() -> None: session_repo = _InMemorySessionRepository() job_repo = _InMemoryJobRepository() service = GraphManagementSessionJournalService( @@ -218,7 +226,4 @@ async def test_archive_session_mutations_token_only_session() -> None: await service.archive_session_mutations(session) - assert len(job_repo.inserted) == 1 - job = job_repo.inserted[0] - assert job.input_tokens == 500 - assert job.applied_mutations_jsonl is None + assert job_repo.inserted == [] diff --git a/src/api/tests/unit/extraction/application/test_session_history_service.py b/src/api/tests/unit/extraction/application/test_session_history_service.py index 9977f94c7..fc3dd2f6e 100644 --- a/src/api/tests/unit/extraction/application/test_session_history_service.py +++ b/src/api/tests/unit/extraction/application/test_session_history_service.py @@ -9,7 +9,7 @@ from extraction.application.agent_session_service import ExtractionAgentSessionService from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import ExtractionSessionMode, ExtractionSessionRunMetric +from extraction.domain.value_objects import ExtractionSessionMode, ExtractionSessionRunMetric, GraphManagementUiMode from extraction.domain.value_objects import ExtractionSessionMode as Mode @@ -40,6 +40,35 @@ async def find_active_by_scope( return replace(session) return None + async def find_active_by_ui_mode( + self, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession | None: + for session in self._by_id.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.graph_management_ui_mode == ui_mode + and session.archived_at is None + ): + return replace(session) + return None + + async def list_active_by_user_and_kg( + self, + user_id: str, + knowledge_graph_id: str, + ) -> list[ExtractionAgentSession]: + return [ + replace(session) + for session in self._by_id.values() + if session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.archived_at is None + ] + async def list_by_scope( self, user_id: str, @@ -86,10 +115,10 @@ async def test_list_session_history_includes_archived_sessions_with_metrics(self run_metrics_reader=metrics_reader, ) - archived = await service.get_or_create_active_session( + archived = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=Mode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) archived.message_history = [{"role": "user", "content": "hello"}] archived.updated_at = datetime(2026, 5, 20, 12, 0, tzinfo=UTC) @@ -111,7 +140,7 @@ async def test_list_session_history_includes_archived_sessions_with_metrics(self await service.clear_chat( user_id="user-1", knowledge_graph_id="kg-1", - mode=Mode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) history = await service.list_session_history( @@ -137,20 +166,20 @@ async def test_clear_chat_retains_archived_sessions_for_history(self): run_metrics_reader=metrics_reader, ) - first = await service.get_or_create_active_session( + first = await service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=Mode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) await service.clear_chat( user_id="user-1", knowledge_graph_id="kg-1", - mode=Mode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) await service.clear_chat( user_id="user-1", knowledge_graph_id="kg-1", - mode=Mode.EXTRACTION_OPERATIONS, + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, ) history = await service.list_session_history( diff --git a/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py index b52f47196..bbb060b29 100644 --- a/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py +++ b/src/api/tests/unit/extraction/application/test_sticky_session_runtime_service.py @@ -45,6 +45,30 @@ async def find_active_by_scope(self, user_id: str, knowledge_graph_id: str, mode return replace(session) return None + async def find_active_by_ui_mode(self, user_id: str, knowledge_graph_id: str, ui_mode): + for session in self._sessions.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.graph_management_ui_mode == ui_mode + and session.archived_at is None + ): + from dataclasses import replace + + return replace(session) + return None + + async def list_active_by_user_and_kg(self, user_id: str, knowledge_graph_id: str): + from dataclasses import replace + + return [ + replace(session) + for session in self._sessions.values() + if session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.archived_at is None + ] + async def list_by_scope(self, user_id: str, knowledge_graph_id: str, mode=None): return [] @@ -129,6 +153,11 @@ def get_or_start_runtime(self, **kwargs): async def test_stream_runtime_warmup_surfaces_container_start_failure() -> None: repo = _InMemoryAgentSessionRepository() session_service = ExtractionAgentSessionService(repository=repo) + await session_service.start_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, + ) service = StickySessionRuntimeService( session_service=session_service, skill_resolution_service=_StaticSkillResolutionService(), @@ -185,10 +214,10 @@ async def test_ensure_runtime_for_chat_reprepares_when_persisted_runtime_is_inac runtime_backend="memory", sticky_health_timeout_seconds=5.0, ) - session = await session_service.get_or_create_active_session( + session = await session_service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) session.runtime_context["sticky_runtime"] = { "container_id": "dead-container", @@ -230,10 +259,10 @@ async def test_ensure_runtime_for_chat_restarts_when_job_package_materialization runtime_backend="container", sticky_health_timeout_seconds=5.0, ) - session = await session_service.get_or_create_active_session( + session = await session_service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) sticky.get_or_start_runtime( session_id=session.id, @@ -283,10 +312,10 @@ async def test_ensure_runtime_for_chat_reuses_running_container_without_reprepar runtime_backend="container", sticky_health_timeout_seconds=5.0, ) - session = await session_service.get_or_create_active_session( + session = await session_service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) sticky.get_or_start_runtime( session_id=session.id, @@ -329,10 +358,10 @@ async def test_ensure_runtime_for_chat_restarts_when_persisted_container_is_unhe runtime_backend="memory", sticky_health_timeout_seconds=5.0, ) - session = await session_service.get_or_create_active_session( + session = await session_service.start_session( user_id="user-1", knowledge_graph_id="kg-1", - mode=ExtractionSessionMode.SCHEMA_BOOTSTRAP, + ui_mode=GraphManagementUiMode.INITIAL_SCHEMA_DESIGN, ) sticky.get_or_start_runtime( session_id=session.id, diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py index 7e2b4d0d0..2c07d1c90 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime.py @@ -57,6 +57,31 @@ def test_reset_rotates_container_for_same_session(self) -> None: assert rotated.container_id != original.container_id assert rotated.status == "active" + def test_terminate_runtime_removes_active_lease(self) -> None: + manager = InMemoryStickySessionRuntimeManager(session_ttl=timedelta(minutes=30)) + lease = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + + manager.terminate_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + + assert manager.is_runtime_active(session_id="session-1") is False + replacement = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="schema_bootstrap", + ) + assert replacement.container_id != lease.container_id + def test_cleanup_terminates_expired_sessions(self) -> None: manager = InMemoryStickySessionRuntimeManager(session_ttl=timedelta(minutes=5)) lease = manager.get_or_start_runtime( diff --git a/src/api/tests/unit/extraction/presentation/test_routes.py b/src/api/tests/unit/extraction/presentation/test_routes.py index 5b6d479a6..c3b295a1c 100644 --- a/src/api/tests/unit/extraction/presentation/test_routes.py +++ b/src/api/tests/unit/extraction/presentation/test_routes.py @@ -3,13 +3,14 @@ from __future__ import annotations from dataclasses import replace + import pytest from fastapi import FastAPI, status from fastapi.testclient import TestClient from extraction.application.agent_session_service import ExtractionAgentSessionService from extraction.domain.entities.agent_session import ExtractionAgentSession -from extraction.domain.value_objects import BootstrapIntakePath +from extraction.domain.value_objects import BootstrapIntakePath, ExtractionSessionMode, GraphManagementUiMode from iam.application.value_objects import CurrentUser from iam.domain.value_objects import TenantId, UserId @@ -41,6 +42,35 @@ async def find_active_by_scope( return replace(session) return None + async def find_active_by_ui_mode( + self, + user_id: str, + knowledge_graph_id: str, + ui_mode: GraphManagementUiMode, + ) -> ExtractionAgentSession | None: + for session in self._sessions.values(): + if ( + session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.graph_management_ui_mode == ui_mode + and session.archived_at is None + ): + return replace(session) + return None + + async def list_active_by_user_and_kg( + self, + user_id: str, + knowledge_graph_id: str, + ) -> list[ExtractionAgentSession]: + return [ + replace(session) + for session in self._sessions.values() + if session.user_id == user_id + and session.knowledge_graph_id == knowledge_graph_id + and session.archived_at is None + ] + async def list_by_scope( self, user_id: str, @@ -146,20 +176,24 @@ def test_clear_chat_archives_old_session_and_returns_fresh_session( self, extraction_client ): client, _ = extraction_client - active = client.get( - "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/active" + started = client.post( + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/start-session", + json={"graph_management_ui_mode": GraphManagementUiMode.EXTRACTION_JOBS.value}, ) - assert active.status_code == status.HTTP_200_OK - old_id = active.json()["id"] + assert started.status_code == status.HTTP_200_OK + old_id = started.json()["id"] response = client.post( - "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/clear-chat" + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/clear-chat", + json={"graph_management_ui_mode": GraphManagementUiMode.EXTRACTION_JOBS.value}, ) assert response.status_code == status.HTTP_200_OK payload = response.json() assert payload["id"] != old_id assert payload["message_history"] == [] - assert payload["runtime_context"] == {} + assert payload["runtime_context"]["graph_management_ui_mode"] == ( + GraphManagementUiMode.EXTRACTION_JOBS.value + ) history_resp = client.get( "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations" @@ -169,15 +203,25 @@ def test_clear_chat_archives_old_session_and_returns_fresh_session( assert len(history) == 2 assert any(row["id"] == old_id and row["archived_at"] is not None for row in history) - def test_active_session_endpoint_returns_existing_active_session( + def test_active_session_endpoint_returns_404_when_not_started( self, extraction_client ): client, _ = extraction_client - first = client.get( - "/extraction/knowledge-graphs/kg-999/sessions/schema_bootstrap/active" + response = client.get( + "/extraction/knowledge-graphs/kg-999/sessions/schema_bootstrap/active", + params={"graph_management_ui_mode": GraphManagementUiMode.INITIAL_SCHEMA_DESIGN.value}, ) - second = client.get( - "/extraction/knowledge-graphs/kg-999/sessions/schema_bootstrap/active" + assert response.status_code == status.HTTP_404_NOT_FOUND + + def test_start_session_is_idempotent_for_same_ui_mode(self, extraction_client): + client, _ = extraction_client + first = client.post( + "/extraction/knowledge-graphs/kg-999/sessions/schema_bootstrap/start-session", + json={"graph_management_ui_mode": GraphManagementUiMode.INITIAL_SCHEMA_DESIGN.value}, + ) + second = client.post( + "/extraction/knowledge-graphs/kg-999/sessions/schema_bootstrap/start-session", + json={"graph_management_ui_mode": GraphManagementUiMode.INITIAL_SCHEMA_DESIGN.value}, ) assert first.status_code == status.HTTP_200_OK assert second.status_code == status.HTTP_200_OK @@ -185,10 +229,11 @@ def test_active_session_endpoint_returns_existing_active_session( def test_select_bootstrap_intake_path_persists_choice(self, extraction_client): client, _ = extraction_client - active = client.get( - "/extraction/knowledge-graphs/kg-123/sessions/schema_bootstrap/active" + started = client.post( + "/extraction/knowledge-graphs/kg-123/sessions/schema_bootstrap/start-session", + json={"graph_management_ui_mode": GraphManagementUiMode.INITIAL_SCHEMA_DESIGN.value}, ) - assert active.status_code == status.HTTP_200_OK + assert started.status_code == status.HTTP_200_OK response = client.post( "/extraction/knowledge-graphs/kg-123/sessions/schema_bootstrap/active/intake-path", @@ -207,14 +252,16 @@ def test_session_history_endpoint_returns_archived_sessions_with_run_metrics( self, extraction_client ): client, _ = extraction_client - active = client.get( - "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/active" + started = client.post( + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/start-session", + json={"graph_management_ui_mode": GraphManagementUiMode.EXTRACTION_JOBS.value}, ) - assert active.status_code == status.HTTP_200_OK - archived_id = active.json()["id"] + assert started.status_code == status.HTTP_200_OK + archived_id = started.json()["id"] client.post( - "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/clear-chat" + "/extraction/knowledge-graphs/kg-123/sessions/extraction_operations/clear-chat", + json={"graph_management_ui_mode": GraphManagementUiMode.EXTRACTION_JOBS.value}, ) response = client.get( @@ -229,4 +276,3 @@ def test_session_history_endpoint_returns_archived_sessions_with_run_metrics( assert archived["archived_at"] is not None assert archived["updated_at"] is not None assert archived["run_metrics"] == [] - diff --git a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue index 8f032984b..9d62498a4 100644 --- a/src/dev-ui/app/components/extraction/SharedConversationPanel.vue +++ b/src/dev-ui/app/components/extraction/SharedConversationPanel.vue @@ -7,7 +7,7 @@ import { } from '@/composables/useScrollPositionPreserve' import DOMPurify from 'isomorphic-dompurify' import { marked } from 'marked' -import { Bot, Loader2, RefreshCw, RotateCcw, Send, Sparkles, User } from 'lucide-vue-next' +import { Bot, Loader2, PlayCircle, RotateCcw, Send, Sparkles, Square, User } from 'lucide-vue-next' import { normalizeThinkingActivityLines, THINKING_DISPLAY_LINE_COUNT, @@ -41,6 +41,8 @@ const props = withDefaults(defineProps<{ session: ConversationSession | null loading?: boolean clearing?: boolean + togglingSession?: boolean + sessionActive?: boolean sending?: boolean preparingRuntime?: boolean draftMessage?: string @@ -57,6 +59,8 @@ const props = withDefaults(defineProps<{ }>(), { loading: false, clearing: false, + togglingSession: false, + sessionActive: false, sending: false, preparingRuntime: false, draftMessage: '', @@ -75,7 +79,7 @@ const props = withDefaults(defineProps<{ }) const emit = defineEmits<{ - refresh: [] + toggleSession: [] clearChat: [] sendMessage: [message: string] 'update:draftMessage': [value: string] @@ -101,7 +105,7 @@ const showConversationRefreshIndicator = computed( ) const composerBlocked = computed( - () => props.loading || props.clearing || props.inputDisabled || props.forbidden, + () => props.loading || props.clearing || props.togglingSession || props.inputDisabled || props.forbidden, ) const chatSendDisabled = computed( @@ -300,20 +304,22 @@ onMounted(() => { <Button type="button" size="sm" - variant="outline" + :variant="sessionActive ? 'destructive' : 'default'" class="gap-1.5" - :disabled="loading" - @click="emit('refresh')" + :disabled="loading || togglingSession || forbidden" + @click="emit('toggleSession')" > - <RefreshCw class="size-4" /> - Resume session + <Loader2 v-if="togglingSession" class="size-4 animate-spin" /> + <Square v-else-if="sessionActive" class="size-4" /> + <PlayCircle v-else class="size-4" /> + {{ sessionActive ? 'End session' : 'Start session' }} </Button> <Button type="button" size="sm" variant="outline" class="gap-1.5" - :disabled="clearing || loading || forbidden" + :disabled="clearing || loading || togglingSession || forbidden" @click="clearConfirmOpen = true" > <Loader2 v-if="clearing" class="size-4 animate-spin" /> @@ -482,7 +488,8 @@ onMounted(() => { <AlertDialogHeader> <AlertDialogTitle>Clear conversation?</AlertDialogTitle> <AlertDialogDescription> - This starts a fresh server-side session timeline while keeping the selected graph management mode. + This ends the current session, archives any graph writes to history, and starts a fresh + conversation with a new assistant container for this mode. </AlertDialogDescription> </AlertDialogHeader> <AlertDialogFooter> diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue b/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue index d425938d3..4bf265137 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue @@ -179,7 +179,7 @@ watch( <template v-else> <div class="rounded border"> <div class="flex items-center justify-between border-b px-3 py-2"> - <p class="text-xs font-medium text-muted-foreground">Runs ({{ payload.archivedJobCount }})</p> + <p class="text-xs font-medium text-muted-foreground">Entries ({{ payload.archivedJobCount }})</p> <Button size="sm" variant="ghost" class="h-6 px-2 text-[10px]" @click="loadHistory">Refresh</Button> </div> <div class="max-h-80 space-y-1 overflow-auto p-2"> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 924aae8cb..f9c97951d 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -61,7 +61,7 @@ import { isGraphManagementModeUnlocked, parseGraphManagementModeQuery, resolveEffectiveGraphManagementMode, - resolveSharedSessionMode, + resolveSessionModeForGraphManagementMode, type GraphManagementMode, type GraphManagementModeGateInput, type GraphManagementRailItemId, @@ -99,16 +99,6 @@ import { resolveSectionState, shouldApplyMutationResult, } from '@/utils/kgManageState' -import { - buildMutationLogEntryPreviewUrl, - collectScopedMutationLogRuns, - hasMutationLogEntryPreviewPage, - MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE, - MUTATION_LOG_NO_PREVIEW_MESSAGE, - resolveDefaultSelectedMutationLogRunId, - type MutationLogEntryPreviewPage, - type MutationLogRunRecord, -} from '@/utils/kgMutationLogs' import { streamExtractionChatTurn, streamRuntimeWarmup } from '@/utils/kgExtractionChat' import { applyThinkingRecentUpdate } from '@/utils/thinkingActivityLines' import type { DesignArtifactsResponse } from '@/utils/kgDesignArtifacts' @@ -158,10 +148,6 @@ interface DataSourceRef { newest_unpulled_commit?: string | null } -interface MutationLogRunView extends MutationLogRunRecord { - data_source_name: string -} - interface InlineSyncRun { id: string status: string @@ -176,6 +162,10 @@ interface ExtractionSessionResponse { updated_at: string } +interface ArchivedHistorySummary { + archivedJobCount: number +} + const route = useRoute() const { hasTenant, tenantVersion } = useTenant() const { extractErrorMessage } = useErrorHandler() @@ -201,6 +191,7 @@ const sessionLoadError = ref<string | null>(null) const sessionForbidden = ref(false) const sessionForbiddenReason = ref<string | null>(null) const clearingChat = ref(false) +const togglingSession = ref(false) const sendingChat = ref(false) const runtimeWarming = ref(false) const runtimeReady = ref(false) @@ -209,15 +200,9 @@ let runtimeWarmupGeneration = 0 const extractionSession = ref<ExtractionSessionResponse | null>(null) const draftMessage = ref('') const statusProjection = ref<WorkspaceStatusResponse | null>(null) -const mutationLogLoading = ref(false) -const mutationLogLoadError = ref<string | null>(null) -const mutationLogRuns = ref<MutationLogRunView[]>([]) -const selectedMutationLogRunId = ref<string | null>(null) +const archivedWriteCount = ref(0) const graphManagementMode = ref<GraphManagementMode>('initial-schema-design') const selectedRailItemId = ref<GraphManagementRailItemId | null>(null) -const mutationLogEntryPreviewLoading = ref(false) -const mutationLogEntryPreviewPage = ref<MutationLogEntryPreviewPage | null>(null) -const mutationLogEntryPreviewOffset = ref(0) const graphManagementDataSources = ref<DataSourceRef[]>([]) const graphManagementDataSourcesLoading = ref(false) const graphManagementDataSourcesError = ref<string | null>(null) @@ -233,6 +218,30 @@ const inlineRunLogsError = ref<string | null>(null) const designArtifactsReloadNonce = ref(0) const designArtifactsRefreshing = ref(false) +type ModeConversationState = { + session: ExtractionSessionResponse | null + runtimeReady: boolean + runtimeWarmupError: string | null + sessionActivityLines: string[] + draftMessage: string +} + +function emptyModeConversationState(): ModeConversationState { + return { + session: null, + runtimeReady: false, + runtimeWarmupError: null, + sessionActivityLines: [], + draftMessage: '', + } +} + +const modeConversationState = ref<Record<GraphManagementMode, ModeConversationState>>({ + 'initial-schema-design': emptyModeConversationState(), + 'extraction-jobs': emptyModeConversationState(), + 'one-off-mutations': emptyModeConversationState(), +}) + const activeStep = computed(() => parseManageStepQuery(route.query.step)) const showOverview = computed(() => activeStep.value === null) @@ -244,7 +253,7 @@ const workspaceOverviewInput = computed(() => ({ kgId: kgId.value, dataSourceCount: dataSourceCount.value, maintenanceReadyCount: maintenanceReadyCount.value, - mutationLogRunCount: mutationLogRuns.value.length, + mutationLogRunCount: archivedWriteCount.value, workspaceStatus: statusProjection.value, })) @@ -284,10 +293,12 @@ const stepBadgeLabel = computed(() => { return modeLabel.value }) -const sharedSessionMode = computed<'schema_bootstrap' | 'extraction_operations'>(() => - resolveSharedSessionMode( - statusProjection.value?.workspace_mode ?? 'schema_bootstrap', - ), +const graphManagementSessionMode = computed<'schema_bootstrap' | 'extraction_operations'>(() => + resolveSessionModeForGraphManagementMode(graphManagementMode.value), +) + +const graphManagementSessionActive = computed( + () => Boolean(extractionSession.value?.id && !extractionSession.value.archived_at), ) const graphManagementModeLabel = computed( @@ -322,6 +333,8 @@ const sessionStatusLabel = computed(() => { } if (sessionLoading.value) return 'Loading session' if (clearingChat.value) return 'Resetting chat' + if (togglingSession.value) return 'Updating session' + if (!graphManagementSessionActive.value) return 'No active session' if (extractionSession.value?.id) { return `Active · ${extractionSession.value.id.slice(0, 8)}` } @@ -339,11 +352,18 @@ const conversationPanelLoading = computed( ) const chatInputDisabled = computed( - () => workspaceForbidden.value || runtimeWarming.value || !runtimeReady.value, + () => + workspaceForbidden.value + || !graphManagementSessionActive.value + || runtimeWarming.value + || !runtimeReady.value, ) const chatInputDisabledReason = computed(() => { if (workspaceForbidden.value) return workspaceForbiddenReason.value + if (!graphManagementSessionActive.value) { + return 'Start a session to chat with the Graph Management Assistant.' + } if (runtimeWarming.value) return 'Starting Graph Management Assistant…' if (!runtimeReady.value) { return runtimeWarmupError.value ?? 'Assistant runtime is not ready yet.' @@ -410,20 +430,6 @@ const workspaceOverviewState = computed(() => }), ) -const mutationLogsSectionState = computed(() => - resolveSectionState({ - section: 'mutation-logs', - loading: mutationLogLoading.value, - error: mutationLogLoadError.value, - forbidden: workspaceForbidden.value, - forbiddenReason: workspaceForbiddenReason.value, - empty: !mutationLogLoading.value - && !mutationLogLoadError.value - && mutationLogRuns.value.length === 0, - emptyActionLabel: 'Refresh runs', - }), -) - const graphManagementSectionState = computed(() => resolveSectionState({ section: 'graph-management', @@ -434,10 +440,6 @@ const graphManagementSectionState = computed(() => }), ) -const selectedMutationLogRun = computed(() => - mutationLogRuns.value.find((run) => run.id === selectedMutationLogRunId.value) ?? null, -) - const selectedOpsDataSource = computed(() => graphManagementDataSources.value.find((ds) => ds.id === selectedOpsDataSourceId.value) ?? null, ) @@ -773,96 +775,63 @@ async function loadWorkspaceStatus() { } } -async function loadMutationLogRuns() { +async function loadArchivedWriteCount() { if (!hasTenant.value || !kgId.value) return - mutationLogLoading.value = true - mutationLogLoadError.value = null try { - const dataSources = await apiFetch<DataSourceRef[]>( - `/management/knowledge-graphs/${kgId.value}/data-sources`, - ) - - const runsByDataSourceId: Record<string, MutationLogRunRecord[]> = {} - for (const ds of dataSources) { - try { - runsByDataSourceId[ds.id] = await apiFetch<MutationLogRunRecord[]>( - `/management/data-sources/${ds.id}/sync-runs`, - ) - } catch { - runsByDataSourceId[ds.id] = [] - } - } - - const collected = collectScopedMutationLogRuns( - kgId.value, - dataSources, - runsByDataSourceId, - ) as MutationLogRunView[] - - mutationLogRuns.value = collected - selectedMutationLogRunId.value = resolveDefaultSelectedMutationLogRunId( - collected, - selectedMutationLogRunId.value, + const payload = await apiFetch<ArchivedHistorySummary>( + `/management/knowledge-graphs/${kgId.value}/extraction-jobs/archived-history`, ) - } catch (err) { - if (isForbiddenHttpError(err)) { - mutationLogLoadError.value = resolveForbiddenReason( - err, - 'You do not have permission to view graph writes history for this graph.', - ) - } else { - mutationLogLoadError.value = extractErrorMessage(err) - toast.error('Failed to load archived write history', { - description: mutationLogLoadError.value, - }) - } - mutationLogRuns.value = [] - selectedMutationLogRunId.value = null - mutationLogEntryPreviewPage.value = null - } finally { - mutationLogLoading.value = false + archivedWriteCount.value = payload.archivedJobCount + } catch { + archivedWriteCount.value = 0 } } -async function loadMutationLogEntryPreviews(offset = 0) { - const run = selectedMutationLogRun.value - if (!run) { - mutationLogEntryPreviewPage.value = null - mutationLogEntryPreviewOffset.value = 0 - return - } - - mutationLogEntryPreviewLoading.value = true +async function clearChat() { + if (!kgId.value || sessionForbidden.value) return + clearingChat.value = true + runtimeWarmupGeneration += 1 + runtimeWarming.value = false + runtimeReady.value = false try { - mutationLogEntryPreviewPage.value = await apiFetch<MutationLogEntryPreviewPage>( - buildMutationLogEntryPreviewUrl( - run.data_source_id, - run.id, - offset, - MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE, - ), + extractionSession.value = await apiFetch<ExtractionSessionResponse>( + `/extraction/knowledge-graphs/${kgId.value}/sessions/${graphManagementSessionMode.value}/clear-chat`, + { + method: 'POST', + body: { graph_management_ui_mode: graphManagementMode.value }, + }, ) - mutationLogEntryPreviewOffset.value = offset + snapshotCurrentModeConversation() + await warmupAssistantRuntime() + toast.success('Extraction chat cleared') + void loadArchivedWriteCount() } catch (err) { - mutationLogEntryPreviewPage.value = { - entries: [], - total: 0, - offset, - limit: MUTATION_LOG_ENTRY_PREVIEW_PAGE_SIZE, - preview_available: false, - } - mutationLogEntryPreviewOffset.value = offset - toast.error('Failed to load graph write entry previews', { + toast.error('Failed to clear chat', { description: extractErrorMessage(err), }) } finally { - mutationLogEntryPreviewLoading.value = false + clearingChat.value = false } } -async function refreshGraphManagementSession() { - await loadExtractionSession() - await warmupAssistantRuntime() +function snapshotCurrentModeConversation() { + const mode = graphManagementMode.value + modeConversationState.value[mode] = { + session: extractionSession.value, + runtimeReady: runtimeReady.value, + runtimeWarmupError: runtimeWarmupError.value, + sessionActivityLines: [...sessionActivityLines.value], + draftMessage: draftMessage.value, + } +} + +function restoreModeConversation(mode: GraphManagementMode) { + const cached = modeConversationState.value[mode] ?? emptyModeConversationState() + extractionSession.value = cached.session + runtimeReady.value = cached.runtimeReady + runtimeWarmupError.value = cached.runtimeWarmupError + sessionActivityLines.value = [...cached.sessionActivityLines] + draftMessage.value = cached.draftMessage } async function loadExtractionSession() { @@ -871,7 +840,8 @@ async function loadExtractionSession() { sessionLoadError.value = null try { extractionSession.value = await apiFetch<ExtractionSessionResponse>( - `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/active`, + `/extraction/knowledge-graphs/${kgId.value}/sessions/${graphManagementSessionMode.value}/active` + + `?graph_management_ui_mode=${encodeURIComponent(graphManagementMode.value)}`, ) syncActivityLinesFromSession() const stickyPhase = extractionSession.value?.runtime_context?.sticky_runtime @@ -887,12 +857,17 @@ async function loadExtractionSession() { sessionForbiddenReason.value = null } catch (err) { extractionSession.value = null + runtimeReady.value = false if (isForbiddenHttpError(err)) { sessionForbidden.value = true sessionForbiddenReason.value = resolveForbiddenReason( err, 'You do not have permission to manage this knowledge graph.', ) + } else if (extractErrorMessage(err).includes('404') || extractErrorMessage(err).toLowerCase().includes('not found')) { + sessionForbidden.value = false + sessionForbiddenReason.value = null + sessionLoadError.value = null } else { sessionForbidden.value = false sessionForbiddenReason.value = null @@ -903,28 +878,70 @@ async function loadExtractionSession() { } } finally { sessionLoading.value = false + snapshotCurrentModeConversation() } } -async function clearChat() { - // Clear chat resets the active extraction session for this knowledge graph. +async function startGraphManagementSession() { if (!kgId.value || sessionForbidden.value) return - clearingChat.value = true + togglingSession.value = true try { extractionSession.value = await apiFetch<ExtractionSessionResponse>( - `/extraction/knowledge-graphs/${kgId.value}/sessions/${sharedSessionMode.value}/clear-chat`, - { method: 'POST' }, + `/extraction/knowledge-graphs/${kgId.value}/sessions/${graphManagementSessionMode.value}/start-session`, + { + method: 'POST', + body: { graph_management_ui_mode: graphManagementMode.value }, + }, ) - toast.success('Extraction chat cleared') + snapshotCurrentModeConversation() + await warmupAssistantRuntime() + toast.success('Graph Management Assistant session started') } catch (err) { - toast.error('Failed to clear chat', { + toast.error('Failed to start session', { description: extractErrorMessage(err), }) } finally { - clearingChat.value = false + togglingSession.value = false } } +async function endGraphManagementSession() { + if (!kgId.value || sessionForbidden.value || !graphManagementSessionActive.value) return + togglingSession.value = true + runtimeWarmupGeneration += 1 + runtimeWarming.value = false + runtimeReady.value = false + try { + await apiFetch<ExtractionSessionResponse>( + `/extraction/knowledge-graphs/${kgId.value}/sessions/${graphManagementSessionMode.value}/end-session`, + { + method: 'POST', + body: { graph_management_ui_mode: graphManagementMode.value }, + }, + ) + extractionSession.value = null + runtimeWarmupError.value = null + sessionActivityLines.value = [] + snapshotCurrentModeConversation() + void loadArchivedWriteCount() + toast.success('Graph Management Assistant session ended') + } catch (err) { + toast.error('Failed to end session', { + description: extractErrorMessage(err), + }) + } finally { + togglingSession.value = false + } +} + +async function toggleGraphManagementSession() { + if (graphManagementSessionActive.value) { + await endGraphManagementSession() + return + } + await startGraphManagementSession() +} + function syncGraphManagementState() { if (activeStep.value !== 'graph-management') return const fromQuery = parseGraphManagementModeQuery(route.query.gm_mode) @@ -949,13 +966,16 @@ function setGraphManagementMode(mode: GraphManagementMode) { toast.message('Mode locked', { description: reason ?? 'Finish schema design first.' }) return } + snapshotCurrentModeConversation() graphManagementMode.value = mode + restoreModeConversation(mode) selectedRailItemId.value = resolveSchemaRailSelection( selectedRailItemId.value, mode, graphManagementRailItems.value, ) navigateTo(buildGraphManagementStepUrl(kgId.value, mode), { replace: true }) + void loadExtractionSession() } function selectSchemaRailItem(itemId: GraphManagementRailItemId) { @@ -976,14 +996,6 @@ function onModeSwitchKeydown(event: KeyboardEvent, mode: GraphManagementMode) { handleActivatableKeydown(event, () => setGraphManagementMode(mode)) } -function selectMutationLogRun(runId: string) { - selectedMutationLogRunId.value = runId -} - -function onMutationRunKeydown(event: KeyboardEvent, runId: string) { - handleActivatableKeydown(event, () => selectMutationLogRun(runId)) -} - function applySessionThinkingRecent(recent: string[]) { sessionActivityLines.value = applyThinkingRecentUpdate(sessionActivityLines.value, recent) } @@ -1023,7 +1035,7 @@ async function warmupAssistantRuntime() { accessToken: accessToken.value, tenantId: currentTenantId.value, kgId: kgId.value, - sessionMode: sharedSessionMode.value, + sessionMode: graphManagementSessionMode.value, uiMode: graphManagementMode.value, })) { if (generation !== runtimeWarmupGeneration) return @@ -1061,6 +1073,7 @@ async function warmupAssistantRuntime() { } finally { if (generation === runtimeWarmupGeneration) { runtimeWarming.value = false + snapshotCurrentModeConversation() } } } @@ -1099,7 +1112,7 @@ async function sendChatMessage(message: string) { accessToken: accessToken.value, tenantId: currentTenantId.value, kgId: kgId.value, - sessionMode: sharedSessionMode.value, + sessionMode: graphManagementSessionMode.value, uiMode: graphManagementMode.value, message: trimmed, })) { @@ -1208,7 +1221,7 @@ onMounted(() => { loadKgIdentity() loadWorkspaceStatus() loadOverviewMetrics() - loadMutationLogRuns() + loadArchivedWriteCount() }) watch(tenantVersion, () => { @@ -1221,17 +1234,17 @@ watch(tenantVersion, () => { overviewSourceRows.value = [] entityTypeLabels.value = [] relationshipTypeLabels.value = [] + archivedWriteCount.value = 0 workspaceLoadError.value = null workspaceForbidden.value = false workspaceForbiddenReason.value = null - mutationLogLoadError.value = null sessionLoadError.value = null sessionForbidden.value = false sessionForbiddenReason.value = null loadKgIdentity() loadWorkspaceStatus() loadOverviewMetrics() - loadMutationLogRuns() + loadArchivedWriteCount() }) watch( @@ -1245,17 +1258,18 @@ watch( ) watch( - () => [activeStep.value, route.query.gm_mode, sharedSessionMode.value] as const, - async () => { - if (activeStep.value === 'graph-management') { + () => [activeStep.value, route.query.gm_mode] as const, + async ([step]) => { + if (step === 'graph-management') { syncGraphManagementState() + restoreModeConversation(graphManagementMode.value) await Promise.all([ loadExtractionSession(), loadGraphManagementDataSources(), refreshDesignArtifacts({ silent: true }), ]) - await warmupAssistantRuntime() } else { + snapshotCurrentModeConversation() runtimeWarmupGeneration += 1 runtimeWarming.value = false runtimeReady.value = false @@ -1264,10 +1278,6 @@ watch( }, ) -watch(selectedMutationLogRunId, () => { - loadMutationLogEntryPreviews(0) -}) - watch(selectedOpsDataSourceId, () => { inlineRunLogs.value = [] inlineRunLogsError.value = null @@ -1548,7 +1558,7 @@ watch( <FileText class="size-4 text-muted-foreground" /> </div> <div> - <div class="text-2xl font-bold">{{ mutationLogRuns.length }}</div> + <div class="text-2xl font-bold">{{ archivedWriteCount }}</div> <p class="text-xs text-muted-foreground">Archived writes</p> </div> </CardContent> @@ -1674,8 +1684,10 @@ watch( :input-placeholder="graphManagementInputPlaceholder" :session-status-label="sessionStatusLabel" :session="conversationSessionForPanel" + :session-active="graphManagementSessionActive" :loading="conversationPanelLoading" :clearing="clearingChat" + :toggling-session="togglingSession" :sending="sendingChat" :preparing-runtime="runtimeWarming" :activity-lines="sessionActivityLines" @@ -1683,7 +1695,7 @@ watch( :forbidden-reason="sessionForbiddenReason" :input-disabled="chatInputDisabled" :input-disabled-reason="chatInputDisabledReason" - @refresh="refreshGraphManagementSession" + @toggle-session="toggleGraphManagementSession" @clear-chat="clearChat" @send-message="sendChatMessage" /> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 62d465ae1..fdc43edb1 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -195,6 +195,8 @@ describe('KG-MANAGE-002 - workspace hub tile set', () => { expect(manageWorkspaceVue).toContain('More Detail') expect(manageWorkspaceVue).toContain('dataSourcesDetailUrl') expect(manageWorkspaceVue).toContain('Archived writes') + expect(manageWorkspaceVue).toContain('archivedWriteCount') + expect(manageWorkspaceVue).toContain('/extraction-jobs/archived-history') expect(manageWorkspaceHubTs).toContain('Data sources') expect(manageWorkspaceHubTs).toContain('Graph Management') expect(manageWorkspaceHubTs).toContain('Graph Writes History') @@ -353,9 +355,10 @@ describe('KG-MANAGE-015 - graph-scoped maintain step and round trip', () => { }) describe('Shared conversation panel - extraction UX contract', () => { - it('renders phase-2 style conversational intelligence header and resume action', () => { + it('renders phase-2 style conversational intelligence header and session toggle', () => { expect(sharedConversationPanelVue).toContain('Graph Management Assistant') - expect(sharedConversationPanelVue).toContain('Resume session') + expect(sharedConversationPanelVue).toContain('Start session') + expect(sharedConversationPanelVue).toContain('End session') expect(sharedConversationPanelVue).toContain('Sparkles') }) @@ -395,10 +398,11 @@ describe('KG-MANAGE-006 - graph management conversation-first layout', () => { expect(manageWorkspaceVue).toContain('mx-auto max-w-7xl') }) - it('uses one shared session endpoint across UI mode changes', () => { - expect(manageWorkspaceVue).toContain('sharedSessionMode') - expect(manageWorkspaceVue).toContain('/sessions/${sharedSessionMode.value}/active') - expect(manageWorkspaceVue).not.toContain('watch(graphManagementMode') + it('uses per-mode session endpoints and cached conversation state', () => { + expect(manageWorkspaceVue).toContain('graphManagementSessionMode') + expect(manageWorkspaceVue).toContain('graph_management_ui_mode') + expect(manageWorkspaceVue).toContain('modeConversationState') + expect(manageWorkspaceVue).toContain('restoreModeConversation') }) }) @@ -517,9 +521,10 @@ describe('KG-MANAGE-010 - schema design parity behavior', () => { }) describe('KG-MANAGE-011 - session reset behavior', () => { - it('supports explicit clear chat reset on the shared session', () => { + it('supports explicit clear chat reset on the per-mode session', () => { expect(manageWorkspaceVue).toContain('clearChat') - expect(manageWorkspaceVue).toContain('/sessions/${sharedSessionMode.value}/clear-chat') + expect(manageWorkspaceVue).toContain('/sessions/${graphManagementSessionMode.value}/clear-chat') + expect(manageWorkspaceVue).toContain('graph_management_ui_mode') expect(sharedConversationPanelVue).toContain('Clear chat') }) @@ -528,7 +533,7 @@ describe('KG-MANAGE-011 - session reset behavior', () => { /async function clearChat\(\) \{[\s\S]*?\n\}/, )?.[0] ?? '' expect(clearChatBlock).toContain('clearChat') - expect(clearChatBlock).not.toContain('graphManagementMode') + expect(clearChatBlock).not.toContain('graphManagementMode.value =') }) }) @@ -538,7 +543,8 @@ describe('KG-MANAGE-016 - graph management top controls', () => { expect(manageWorkspaceVue).toContain('graphManagementModeLabel') expect(manageWorkspaceVue).toContain('sessionStatusLabel') expect(manageWorkspaceVue).toContain('validateWorkspace') - expect(manageWorkspaceVue).toContain('Clear chat') + expect(manageWorkspaceVue).toContain('toggleGraphManagementSession') + expect(sharedConversationPanelVue).toContain('Clear chat') }) it('maps shared session mode from workspace lifecycle without UI mode coupling', () => { diff --git a/src/dev-ui/app/utils/kgGraphManagement.ts b/src/dev-ui/app/utils/kgGraphManagement.ts index 2a2918634..ccd80acf2 100644 --- a/src/dev-ui/app/utils/kgGraphManagement.ts +++ b/src/dev-ui/app/utils/kgGraphManagement.ts @@ -71,6 +71,12 @@ export function resolveDefaultGraphManagementMode( return workspaceMode === 'extraction_operations' ? 'extraction-jobs' : 'initial-schema-design' } +export function resolveSessionModeForGraphManagementMode( + mode: GraphManagementMode, +): 'schema_bootstrap' | 'extraction_operations' { + return mode === 'initial-schema-design' ? 'schema_bootstrap' : 'extraction_operations' +} + export function resolveSharedSessionMode( workspaceMode: 'schema_bootstrap' | 'extraction_operations', ): 'schema_bootstrap' | 'extraction_operations' { diff --git a/src/dev-ui/app/utils/kgManageWorkspaceHub.ts b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts index 2e39dbc69..d305acf94 100644 --- a/src/dev-ui/app/utils/kgManageWorkspaceHub.ts +++ b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts @@ -148,7 +148,7 @@ export function buildWorkspaceHubTiles(input: WorkspaceHubOverview): WorkspaceHu key: 'mutation-logs', title: 'Graph Writes History', subtitle: input.mutationLogRunCount > 0 - ? `${input.mutationLogRunCount} archived run${input.mutationLogRunCount === 1 ? '' : 's'} recorded` + ? `${input.mutationLogRunCount} archived write entr${input.mutationLogRunCount === 1 ? 'y' : 'ies'} recorded` : 'Review GMA sessions and extraction job writes', to: resolveStepDestination(input.kgId, 'mutation-logs'), enabled: input.dataSourceCount > 0, From 96f3340c857ea63535912ef07e3513c68f7724ce Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 17:37:55 -0400 Subject: [PATCH 139/153] kg-backups --- .gitignore | 1 + Makefile | 13 + scripts/kg-data-backup.sh | 83 ++ scripts/kg_data_backup.py | 783 ++++++++++++++++++ skills/subagent-delivery/SKILL.md | 206 ----- .../claude-instance-system-prompt.txt | 67 -- .../section-wave-launch.template.txt | 38 - 7 files changed, 880 insertions(+), 311 deletions(-) create mode 100755 scripts/kg-data-backup.sh create mode 100644 scripts/kg_data_backup.py delete mode 100644 skills/subagent-delivery/SKILL.md delete mode 100644 skills/subagent-delivery/claude-instance-system-prompt.txt delete mode 100644 skills/subagent-delivery/section-wave-launch.template.txt diff --git a/.gitignore b/.gitignore index f3bc136db..e0d523dcd 100644 --- a/.gitignore +++ b/.gitignore @@ -174,6 +174,7 @@ src/dev-ui/.output/ certs/ .instances/ .kartograph/backups/ +.kartograph/kg-backups/ # Demo web interface (not for production) demo-web/ diff --git a/Makefile b/Makefile index 915a581b0..db574a7ec 100755 --- a/Makefile +++ b/Makefile @@ -54,6 +54,19 @@ dev-backup-list: dev-repair-age-graphs: @./scripts/dev-data-backup.sh repair +.PHONY: kg-backup kg-restore kg-backup-list +kg-backup: + @test -n "$(KG_ID)" || (echo "Usage: make kg-backup KG_ID=<knowledge-graph-id>" && exit 1) + @./scripts/kg-data-backup.sh capture "$(KG_ID)" + +kg-restore: + @test -n "$(KG_ID)" || (echo "Usage: make kg-restore KG_ID=<knowledge-graph-id> [BACKUP=latest] [YES=1] [REPLACE=1]" && exit 1) + @./scripts/kg-data-backup.sh restore "$(KG_ID)" $(or $(BACKUP),latest) $(if $(YES),--yes,) $(if $(REPLACE),--replace,) + +kg-backup-list: + @test -n "$(KG_ID)" || (echo "Usage: make kg-backup-list KG_ID=<knowledge-graph-id>" && exit 1) + @./scripts/kg-data-backup.sh list "$(KG_ID)" + .PHONY: run run: diff --git a/scripts/kg-data-backup.sh b/scripts/kg-data-backup.sh new file mode 100755 index 000000000..f930d7a10 --- /dev/null +++ b/scripts/kg-data-backup.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# +# Capture and restore a single Knowledge Graph from a Kartograph dev instance. +# +# Usage: +# ./scripts/kg-data-backup.sh capture <knowledge-graph-id> +# ./scripts/kg-data-backup.sh restore <knowledge-graph-id> [backup-id|latest] [--yes] [--replace] +# ./scripts/kg-data-backup.sh list <knowledge-graph-id> +# +# Makefile shortcuts: +# make kg-backup KG_ID=01KTYN8Q0RJS2CCQX044S4V96C +# make kg-restore KG_ID=01KTYN8Q0RJS2CCQX044S4V96C +# make kg-backup-list KG_ID=01KTYN8Q0RJS2CCQX044S4V96C +# +# Backups are written to .kartograph/kg-backups/<kg-id>/<timestamp>/ (gitignored). + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +COMPOSE_PROJECT="${COMPOSE_PROJECT:-kartograph}" + +usage() { + cat <<'EOF' +Usage: + kg-data-backup.sh capture <knowledge-graph-id> + kg-data-backup.sh restore <knowledge-graph-id> [backup-id|latest] [--yes] [--replace] + kg-data-backup.sh list <knowledge-graph-id> + +Environment: + COMPOSE_PROJECT Docker compose project name (default: kartograph) + +Examples: + make kg-backup KG_ID=01KTYN8Q0RJS2CCQX044S4V96C + make kg-restore KG_ID=01KTYN8Q0RJS2CCQX044S4V96C BACKUP=latest + ./scripts/kg-data-backup.sh restore 01KTYN8Q0RJS2CCQX044S4V96C latest --replace --yes +EOF +} + +die() { + printf 'ERROR: %s\n' "$*" >&2 + exit 1 +} + +postgres_container_id() { + local container_id + container_id="$( + docker compose -p "$COMPOSE_PROJECT" \ + -f "$REPO_ROOT/compose.yaml" \ + -f "$REPO_ROOT/compose.dev.yaml" \ + ps -q postgres 2>/dev/null | head -n 1 || true + )" + if [[ -z "$container_id" ]]; then + die "Postgres container not found for compose project '$COMPOSE_PROJECT'. Is 'make dev' running?" + fi + printf '%s' "$container_id" +} + +ensure_dev_postgres() { + postgres_container_id >/dev/null +} + +run_python() { + ensure_dev_postgres + cd "$REPO_ROOT/src/api" + COMPOSE_PROJECT="$COMPOSE_PROJECT" uv run python "$REPO_ROOT/scripts/kg_data_backup.py" "$@" +} + +ACTION="${1:-}" +shift || true + +case "$ACTION" in + capture|restore|list) + run_python "$ACTION" "$@" + ;; + ""|-h|--help|help) + usage + ;; + *) + die "Unknown command: ${ACTION}. Run with --help for usage." + ;; +esac diff --git a/scripts/kg_data_backup.py b/scripts/kg_data_backup.py new file mode 100644 index 000000000..6c49bea33 --- /dev/null +++ b/scripts/kg_data_backup.py @@ -0,0 +1,783 @@ +#!/usr/bin/env python3 +"""Capture and restore a single Knowledge Graph from a Kartograph dev instance. + +Backs up PostgreSQL metadata, encrypted credentials, Apache AGE graph data +(nodes and edges scoped by knowledge_graph_id), and SpiceDB authorization +tuples for the KG and its data sources. + +Usage: + uv run python scripts/kg_data_backup.py capture <knowledge-graph-id> + uv run python scripts/kg_data_backup.py restore <knowledge-graph-id> [backup-id|latest] --yes + uv run python scripts/kg_data_backup.py list <knowledge-graph-id> + +Environment (defaults match `make dev` / env/postgres.env): + KARTOGRAPH_DB_HOST, KARTOGRAPH_DB_PORT, KARTOGRAPH_DB_DATABASE, + KARTOGRAPH_DB_USERNAME, KARTOGRAPH_DB_PASSWORD +""" + +from __future__ import annotations + +import argparse +import base64 +import json +import os +import re +import subprocess +import sys +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import psycopg2 +from psycopg2 import sql +from psycopg2.extras import RealDictCursor + +REPO_ROOT = Path(__file__).resolve().parent.parent +BACKUP_ROOT = REPO_ROOT / ".kartograph" / "kg-backups" +POSTGRES_ENV = REPO_ROOT / "env" / "postgres.env" +SPICEDB_ENV = REPO_ROOT / "env" / "spicedb.env" + +POSTGRES_TABLES_IN_RESTORE_ORDER = [ + "knowledge_graphs", + "encrypted_credentials", + "data_sources", + "data_source_sync_runs", + "knowledge_graph_type_definitions", + "extraction_runs", + "extraction_jobs", + "extraction_agent_sessions", +] + +ULID_PATTERN = re.compile(r"^[0-9A-HJKMNP-TV-Z]{26}$") + + +def load_env_file(path: Path) -> dict[str, str]: + values: dict[str, str] = {} + if not path.exists(): + return values + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + values[key.strip()] = value.strip() + return values + + +def db_settings() -> dict[str, str]: + file_values = load_env_file(POSTGRES_ENV) + host = os.getenv("KARTOGRAPH_DB_HOST", file_values.get("POSTGRES_HOST", "localhost")) + if host == "postgres": + host = "localhost" + return { + "host": host, + "port": os.getenv("KARTOGRAPH_DB_PORT", file_values.get("POSTGRES_PORT", "5432")), + "database": os.getenv( + "KARTOGRAPH_DB_DATABASE", file_values.get("POSTGRES_DB", "kartograph") + ), + "user": os.getenv( + "KARTOGRAPH_DB_USERNAME", file_values.get("POSTGRES_USER", "kartograph") + ), + "password": os.getenv( + "KARTOGRAPH_DB_PASSWORD", file_values.get("POSTGRES_PASSWORD", "") + ), + } + + +def spicedb_database_name() -> str: + return "spicedb" + + +def connect(database: str | None = None) -> psycopg2.extensions.connection: + settings = db_settings() + return psycopg2.connect( + host=settings["host"], + port=settings["port"], + dbname=database or settings["database"], + user=settings["user"], + password=settings["password"], + ) + + +def timestamp_utc() -> str: + return datetime.now(UTC).strftime("%Y-%m-%dT%H-%M-%SZ") + + +def git_commit_short() -> str: + try: + result = subprocess.run( + ["git", "-C", str(REPO_ROOT), "rev-parse", "--short", "HEAD"], + check=True, + capture_output=True, + text=True, + ) + return result.stdout.strip() or "unknown" + except (OSError, subprocess.CalledProcessError): + return "unknown" + + +def validate_kg_id(kg_id: str) -> None: + if not ULID_PATTERN.match(kg_id): + raise SystemExit(f"Invalid knowledge graph id: {kg_id}") + + +def json_default(value: Any) -> Any: + if isinstance(value, datetime): + return value.isoformat() + if isinstance(value, memoryview): + return base64.b64encode(value.tobytes()).decode("ascii") + if isinstance(value, (bytes, bytearray)): + return base64.b64encode(bytes(value)).decode("ascii") + raise TypeError(f"Object of type {type(value)!r} is not JSON serializable") + + +def write_json(path: Path, payload: Any) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + json.dumps(payload, indent=2, sort_keys=True, default=json_default), + encoding="utf-8", + ) + + +def read_json(path: Path) -> Any: + return json.loads(path.read_text(encoding="utf-8")) + + +def agtype_to_python(value: Any) -> Any: + if value is None: + return None + if isinstance(value, str): + text = value.strip() + if text.startswith("{") or text.startswith("["): + try: + return json.loads(text) + except json.JSONDecodeError: + return text.strip('"') + return text.strip('"') + return value + + +def fetch_kg_metadata(conn: psycopg2.extensions.connection, kg_id: str) -> dict[str, Any]: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute( + "SELECT * FROM knowledge_graphs WHERE id = %s", + (kg_id,), + ) + row = cur.fetchone() + if row is None: + raise SystemExit(f"Knowledge graph not found: {kg_id}") + return dict(row) + + +def fetch_table_rows( + conn: psycopg2.extensions.connection, + table: str, + *, + where_sql: str, + params: tuple[Any, ...], +) -> list[dict[str, Any]]: + query = sql.SQL("SELECT * FROM {} WHERE ").format(sql.Identifier(table)) + sql.SQL(where_sql) + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute(query, params) + return [dict(row) for row in cur.fetchall()] + + +def fetch_postgres_payload( + conn: psycopg2.extensions.connection, kg_id: str, tenant_id: str +) -> dict[str, list[dict[str, Any]]]: + data_sources = fetch_table_rows( + conn, + "data_sources", + where_sql="knowledge_graph_id = %s", + params=(kg_id,), + ) + data_source_ids = tuple(row["id"] for row in data_sources) + credential_paths = tuple( + row["credentials_path"] for row in data_sources if row.get("credentials_path") + ) + + sync_runs: list[dict[str, Any]] = [] + if data_source_ids: + placeholders = sql.SQL(", ").join(sql.Placeholder() * len(data_source_ids)) + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute( + sql.SQL("SELECT * FROM data_source_sync_runs WHERE data_source_id IN ({})").format( + placeholders + ), + data_source_ids, + ) + sync_runs = [dict(row) for row in cur.fetchall()] + + credentials: list[dict[str, Any]] = [] + if credential_paths: + placeholders = sql.SQL(", ").join(sql.Placeholder() * len(credential_paths)) + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute( + sql.SQL( + "SELECT * FROM encrypted_credentials WHERE tenant_id = %s AND path IN ({})" + ).format(placeholders), + (tenant_id, *credential_paths), + ) + credentials = [dict(row) for row in cur.fetchall()] + + return { + "knowledge_graphs": fetch_table_rows( + conn, "knowledge_graphs", where_sql="id = %s", params=(kg_id,) + ), + "data_sources": data_sources, + "data_source_sync_runs": sync_runs, + "knowledge_graph_type_definitions": fetch_table_rows( + conn, + "knowledge_graph_type_definitions", + where_sql="knowledge_graph_id = %s", + params=(kg_id,), + ), + "extraction_runs": fetch_table_rows( + conn, "extraction_runs", where_sql="knowledge_graph_id = %s", params=(kg_id,) + ), + "extraction_jobs": fetch_table_rows( + conn, "extraction_jobs", where_sql="knowledge_graph_id = %s", params=(kg_id,) + ), + "extraction_agent_sessions": fetch_table_rows( + conn, + "extraction_agent_sessions", + where_sql="knowledge_graph_id = %s", + params=(kg_id,), + ), + "encrypted_credentials": credentials, + } + + +def fetch_graph_payload( + conn: psycopg2.extensions.connection, graph_name: str, kg_id: str +) -> dict[str, Any]: + escaped_kg_id = cypher_escape(kg_id) + escaped_graph_name = cypher_escape(graph_name) + with conn.cursor() as cur: + cur.execute("LOAD 'age'") + cur.execute('SET search_path = ag_catalog, "$user", public') + + cur.execute( + f""" + SELECT * FROM cypher('{escaped_graph_name}', $$ + MATCH (n) + WHERE n.knowledge_graph_id = '{escaped_kg_id}' + RETURN id(n), label(n), properties(n) + $$) AS (age_id agtype, label agtype, properties agtype) + """ + ) + nodes = [] + node_age_ids: set[str] = set() + for age_id, label, properties in cur.fetchall(): + age_id_text = str(agtype_to_python(age_id)) + label_text = str(agtype_to_python(label)) + props = agtype_to_python(properties) + if not isinstance(props, dict): + props = {} + nodes.append( + { + "age_id": age_id_text, + "label": label_text, + "properties": props, + } + ) + node_age_ids.add(age_id_text) + + cur.execute( + f""" + SELECT * FROM cypher('{escaped_graph_name}', $$ + MATCH (a)-[r]->(b) + WHERE a.knowledge_graph_id = '{escaped_kg_id}' + AND b.knowledge_graph_id = '{escaped_kg_id}' + RETURN id(r), label(r), id(a), id(b), properties(r) + $$) AS (age_id agtype, label agtype, start_age_id agtype, end_age_id agtype, properties agtype) + """ + ) + edges = [] + for age_id, label, start_age_id, end_age_id, properties in cur.fetchall(): + props = agtype_to_python(properties) + if not isinstance(props, dict): + props = {} + edges.append( + { + "age_id": str(agtype_to_python(age_id)), + "label": str(agtype_to_python(label)), + "start_age_id": str(agtype_to_python(start_age_id)), + "end_age_id": str(agtype_to_python(end_age_id)), + "properties": props, + } + ) + + return { + "graph_name": graph_name, + "knowledge_graph_id": kg_id, + "nodes": nodes, + "edges": edges, + } + + +def fetch_spicedb_payload( + spicedb_conn: psycopg2.extensions.connection, + kg_id: str, + data_source_ids: list[str], +) -> list[dict[str, str]]: + with spicedb_conn.cursor(cursor_factory=RealDictCursor) as cur: + if data_source_ids: + placeholders = sql.SQL(", ").join(sql.Placeholder() * len(data_source_ids)) + query = sql.SQL( + """ + SELECT namespace, object_id, relation, userset_namespace, userset_object_id, userset_relation + FROM relation_tuple + WHERE deleted_xid = '9223372036854775807'::xid8 + AND ( + (namespace = 'knowledge_graph' AND object_id = %s) + OR (namespace = 'data_source' AND object_id IN ({})) + ) + ORDER BY namespace, object_id, relation + """ + ).format(placeholders) + cur.execute(query, (kg_id, *data_source_ids)) + else: + cur.execute( + """ + SELECT namespace, object_id, relation, userset_namespace, userset_object_id, userset_relation + FROM relation_tuple + WHERE deleted_xid = '9223372036854775807'::xid8 + AND namespace = 'knowledge_graph' + AND object_id = %s + ORDER BY namespace, object_id, relation + """, + (kg_id,), + ) + return [dict(row) for row in cur.fetchall()] + + +def backup_dir_for(kg_id: str, backup_id: str) -> Path: + return BACKUP_ROOT / kg_id / backup_id + + +def resolve_backup_dir(kg_id: str, requested: str) -> Path: + kg_root = BACKUP_ROOT / kg_id + if requested == "latest": + latest = kg_root / "latest" + if latest.is_symlink(): + return latest.resolve() + if latest.is_dir(): + return latest + candidates = sorted(p for p in kg_root.iterdir() if p.is_dir() and p.name != "latest") + if not candidates: + raise SystemExit(f"No backups found for knowledge graph {kg_id}") + return candidates[-1] + + explicit = kg_root / requested + if explicit.is_dir(): + return explicit + if Path(requested).is_dir(): + return Path(requested) + raise SystemExit(f"Backup not found: {requested}") + + +def write_manifest( + backup_dir: Path, + *, + kg_id: str, + compose_project: str, + kg_name: str, + tenant_id: str, + workspace_id: str, + counts: dict[str, int], +) -> None: + manifest = { + "created_at": datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ"), + "compose_project": compose_project, + "git_commit": git_commit_short(), + "knowledge_graph_id": kg_id, + "knowledge_graph_name": kg_name, + "tenant_id": tenant_id, + "workspace_id": workspace_id, + "age_graph_name": f"tenant_{tenant_id}", + "counts": counts, + } + write_json(backup_dir / "manifest.json", manifest) + + +def cmd_capture(args: argparse.Namespace) -> None: + validate_kg_id(args.knowledge_graph_id) + kg_id = args.knowledge_graph_id + + conn = connect() + try: + kg = fetch_kg_metadata(conn, kg_id) + tenant_id = kg["tenant_id"] + workspace_id = kg["workspace_id"] + graph_name = f"tenant_{tenant_id}" + + postgres_payload = fetch_postgres_payload(conn, kg_id, tenant_id) + graph_payload = fetch_graph_payload(conn, graph_name, kg_id) + finally: + conn.close() + + spicedb_conn = connect(spicedb_database_name()) + try: + spicedb_payload = fetch_spicedb_payload( + spicedb_conn, + kg_id, + [row["id"] for row in postgres_payload["data_sources"]], + ) + finally: + spicedb_conn.close() + + backup_id = timestamp_utc() + backup_dir = backup_dir_for(kg_id, backup_id) + backup_dir.mkdir(parents=True, exist_ok=True) + + write_json(backup_dir / "postgres.json", postgres_payload) + write_json(backup_dir / "graph.json", graph_payload) + write_json(backup_dir / "spicedb.json", spicedb_payload) + + counts = { + "postgres_knowledge_graphs": len(postgres_payload["knowledge_graphs"]), + "postgres_data_sources": len(postgres_payload["data_sources"]), + "postgres_data_source_sync_runs": len(postgres_payload["data_source_sync_runs"]), + "postgres_type_definitions": len(postgres_payload["knowledge_graph_type_definitions"]), + "postgres_extraction_runs": len(postgres_payload["extraction_runs"]), + "postgres_extraction_jobs": len(postgres_payload["extraction_jobs"]), + "postgres_extraction_agent_sessions": len(postgres_payload["extraction_agent_sessions"]), + "postgres_encrypted_credentials": len(postgres_payload["encrypted_credentials"]), + "graph_nodes": len(graph_payload["nodes"]), + "graph_edges": len(graph_payload["edges"]), + "spicedb_relationships": len(spicedb_payload), + } + write_manifest( + backup_dir, + kg_id=kg_id, + compose_project=args.compose_project, + kg_name=kg["name"], + tenant_id=tenant_id, + workspace_id=workspace_id, + counts=counts, + ) + + latest = BACKUP_ROOT / kg_id / "latest" + latest.parent.mkdir(parents=True, exist_ok=True) + if latest.exists() or latest.is_symlink(): + latest.unlink() + latest.symlink_to(backup_id, target_is_directory=True) + + print(f"Captured knowledge graph '{kg['name']}' ({kg_id})") + print(f"Backup directory: {backup_dir}") + print("Counts:") + for key, value in counts.items(): + print(f" {key}: {value}") + + +def decode_row_values(row: dict[str, Any]) -> dict[str, Any]: + decoded = dict(row) + if "encrypted_value" in decoded and isinstance(decoded["encrypted_value"], str): + decoded["encrypted_value"] = base64.b64decode(decoded["encrypted_value"]) + return decoded + + +def insert_rows( + conn: psycopg2.extensions.connection, + table: str, + rows: list[dict[str, Any]], +) -> None: + if not rows: + return + columns = list(rows[0].keys()) + placeholders = sql.SQL(", ").join(sql.Placeholder() * len(columns)) + statement = sql.SQL("INSERT INTO {} ({}) VALUES ({}) ON CONFLICT DO NOTHING").format( + sql.Identifier(table), + sql.SQL(", ").join(map(sql.Identifier, columns)), + placeholders, + ) + with conn.cursor() as cur: + for row in rows: + values = [decode_row_values(row)[column] for column in columns] + cur.execute(statement, values) + + +def delete_existing_kg_data(conn: psycopg2.extensions.connection, kg_id: str, graph_name: str) -> None: + escaped_kg_id = cypher_escape(kg_id) + escaped_graph_name = cypher_escape(graph_name) + with conn.cursor() as cur: + cur.execute("LOAD 'age'") + cur.execute('SET search_path = ag_catalog, "$user", public') + cur.execute( + f""" + SELECT * FROM cypher('{escaped_graph_name}', $$ + MATCH (n) + WHERE n.knowledge_graph_id = '{escaped_kg_id}' + DETACH DELETE n + $$) AS (result agtype) + """ + ) + + cur.execute("DELETE FROM extraction_agent_sessions WHERE knowledge_graph_id = %s", (kg_id,)) + cur.execute("DELETE FROM extraction_jobs WHERE knowledge_graph_id = %s", (kg_id,)) + cur.execute("DELETE FROM extraction_runs WHERE knowledge_graph_id = %s", (kg_id,)) + cur.execute( + "DELETE FROM knowledge_graph_type_definitions WHERE knowledge_graph_id = %s", + (kg_id,), + ) + cur.execute( + """ + DELETE FROM data_source_sync_runs + WHERE data_source_id IN (SELECT id FROM data_sources WHERE knowledge_graph_id = %s) + """, + (kg_id,), + ) + cur.execute( + """ + DELETE FROM encrypted_credentials + WHERE path IN ( + SELECT credentials_path FROM data_sources + WHERE knowledge_graph_id = %s AND credentials_path IS NOT NULL + ) + """, + (kg_id,), + ) + cur.execute("DELETE FROM data_sources WHERE knowledge_graph_id = %s", (kg_id,)) + cur.execute("DELETE FROM knowledge_graphs WHERE id = %s", (kg_id,)) + + +def ensure_label_exists(cur: Any, graph_name: str, label: str, *, edge: bool) -> None: + cur.execute( + """ + SELECT l.id + FROM ag_catalog.ag_label l + JOIN ag_catalog.ag_graph g ON l.graph = g.graphid + WHERE g.name = %s AND l.name = %s + """, + (graph_name, label), + ) + if cur.fetchone(): + return + if edge: + cur.execute("SELECT ag_catalog.create_elabel(%s, %s)", (graph_name, label)) + else: + cur.execute("SELECT ag_catalog.create_vlabel(%s, %s)", (graph_name, label)) + + +def cypher_escape(value: str) -> str: + return value.replace("\\", "\\\\").replace("'", "\\'") + + +def cypher_literal(value: Any) -> str: + if value is None: + return "null" + if isinstance(value, bool): + return "true" if value else "false" + if isinstance(value, (int, float)): + return str(value) + if isinstance(value, list): + return "[" + ", ".join(cypher_literal(item) for item in value) + "]" + if isinstance(value, dict): + inner = ", ".join(f"{key}: {cypher_literal(item)}" for key, item in value.items()) + return "{" + inner + "}" + text = cypher_escape(str(value)) + return f"'{text}'" + + +def restore_graph_nodes( + conn: psycopg2.extensions.connection, graph_payload: dict[str, Any] +) -> None: + graph_name = graph_payload["graph_name"] + with conn.cursor() as cur: + cur.execute("LOAD 'age'") + cur.execute('SET search_path = ag_catalog, "$user", public') + + labels = sorted({node["label"] for node in graph_payload["nodes"]}) + for label in labels: + ensure_label_exists(cur, graph_name, label, edge=False) + + for node in graph_payload["nodes"]: + label = node["label"] + props_map = ", ".join( + f"{key}: {cypher_literal(value)}" for key, value in node["properties"].items() + ) + query = f""" + SELECT * FROM cypher('{cypher_escape(graph_name)}', $$ + CREATE (n:{label} {{{props_map}}}) + RETURN id(n) + $$) AS (age_id agtype) + """ + cur.execute(query) + + edge_labels = sorted({edge["label"] for edge in graph_payload["edges"]}) + for label in edge_labels: + ensure_label_exists(cur, graph_name, label, edge=True) + + for edge in graph_payload["edges"]: + label = edge["label"] + props_map = ", ".join( + f"{key}: {cypher_literal(value)}" for key, value in edge["properties"].items() + ) + start_id = cypher_escape(edge["start_age_id"]) + end_id = cypher_escape(edge["end_age_id"]) + if props_map: + create_edge = f""" + MATCH (a), (b) + WHERE id(a) = {start_id} AND id(b) = {end_id} + CREATE (a)-[r:{label} {{{props_map}}}]->(b) + RETURN id(r) + """ + else: + create_edge = f""" + MATCH (a), (b) + WHERE id(a) = {start_id} AND id(b) = {end_id} + CREATE (a)-[r:{label}]->(b) + RETURN id(r) + """ + query = f""" + SELECT * FROM cypher('{cypher_escape(graph_name)}', $$ + {create_edge} + $$) AS (age_id agtype) + """ + cur.execute(query) + + +def spicedb_settings() -> tuple[str, str]: + env_values = load_env_file(SPICEDB_ENV) + token = os.getenv("SPICEDB_GRPC_PRESHARED_KEY", env_values.get("SPICEDB_GRPC_PRESHARED_KEY", "changeme")) + network = os.getenv("KARTOGRAPH_COMPOSE_NETWORK", "kartograph_kartograph") + return token, network + + +def restore_spicedb_relationships(relationships: list[dict[str, str]]) -> None: + token, network = spicedb_settings() + certs_dir = REPO_ROOT / "certs" + for rel in relationships: + resource = f"{rel['namespace']}:{rel['object_id']}" + subject = f"{rel['userset_namespace']}:{rel['userset_object_id']}" + if rel["userset_relation"] and rel["userset_relation"] != "...": + subject = f"{subject}#{rel['userset_relation']}" + cmd = [ + "docker", + "run", + "--rm", + "--network", + network, + "-e", + "ZED_ENDPOINT=spicedb:50051", + "-e", + f"ZED_TOKEN={token}", + "-e", + "GRPC_DEFAULT_SSL_ROOTS_FILE_PATH=/certs/spicedb-cert.pem", + "-v", + f"{certs_dir}:/certs:ro", + "authzed/zed:latest", + "--no-verify-ca", + "relationship", + "touch", + resource, + rel["relation"], + subject, + ] + subprocess.run(cmd, check=True, capture_output=True, text=True) + + +def cmd_restore(args: argparse.Namespace) -> None: + validate_kg_id(args.knowledge_graph_id) + backup_dir = resolve_backup_dir(args.knowledge_graph_id, args.backup_id) + manifest = read_json(backup_dir / "manifest.json") + postgres_payload = read_json(backup_dir / "postgres.json") + graph_payload = read_json(backup_dir / "graph.json") + spicedb_payload = read_json(backup_dir / "spicedb.json") + + if not args.yes: + print(f"This will restore knowledge graph {args.knowledge_graph_id} from:") + print(f" {backup_dir}") + if args.replace: + print("Existing data for this knowledge graph will be deleted first.") + reply = input("Continue? [y/N] ").strip().lower() + if reply not in {"y", "yes"}: + raise SystemExit("Aborted.") + + conn = connect() + try: + conn.autocommit = False + if args.replace: + delete_existing_kg_data(conn, args.knowledge_graph_id, graph_payload["graph_name"]) + + for table in POSTGRES_TABLES_IN_RESTORE_ORDER: + insert_rows(conn, table, postgres_payload.get(table, [])) + + restore_graph_nodes(conn, graph_payload) + conn.commit() + except Exception: + conn.rollback() + raise + finally: + conn.close() + + restore_spicedb_relationships(spicedb_payload) + + print(f"Restored knowledge graph '{manifest.get('knowledge_graph_name', args.knowledge_graph_id)}'") + print(f"From backup: {backup_dir}") + + +def cmd_list(args: argparse.Namespace) -> None: + validate_kg_id(args.knowledge_graph_id) + kg_root = BACKUP_ROOT / args.knowledge_graph_id + if not kg_root.exists(): + print(f"No backups yet for knowledge graph {args.knowledge_graph_id}") + return + print(f"Backups for knowledge graph {args.knowledge_graph_id} in {kg_root}:") + for path in sorted(p for p in kg_root.iterdir() if p.is_dir() and p.name != "latest"): + manifest_path = path / "manifest.json" + if manifest_path.exists(): + manifest = read_json(manifest_path) + counts = manifest.get("counts", {}) + print( + f" {path.name} nodes={counts.get('graph_nodes', '?')} " + f"data_sources={counts.get('postgres_data_sources', '?')} " + f"name={manifest.get('knowledge_graph_name', '?')}" + ) + else: + print(f" {path.name}") + latest = kg_root / "latest" + if latest.is_symlink(): + print(f"\nlatest -> {os.readlink(latest)}") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers(dest="command", required=True) + + capture = subparsers.add_parser("capture", help="Capture a knowledge graph backup") + capture.add_argument("knowledge_graph_id") + capture.add_argument( + "--compose-project", + default=os.getenv("COMPOSE_PROJECT", "kartograph"), + help="Docker compose project name (stored in manifest metadata)", + ) + capture.set_defaults(func=cmd_capture) + + restore = subparsers.add_parser("restore", help="Restore a knowledge graph backup") + restore.add_argument("knowledge_graph_id") + restore.add_argument("backup_id", nargs="?", default="latest") + restore.add_argument( + "--replace", + action="store_true", + help="Delete existing data for this knowledge graph before restoring", + ) + restore.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompt") + restore.set_defaults(func=cmd_restore) + + list_cmd = subparsers.add_parser("list", help="List backups for a knowledge graph") + list_cmd.add_argument("knowledge_graph_id") + list_cmd.set_defaults(func=cmd_list) + + return parser + + +def main() -> None: + parser = build_parser() + args = parser.parse_args() + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/skills/subagent-delivery/SKILL.md b/skills/subagent-delivery/SKILL.md deleted file mode 100644 index 824d6ec4b..000000000 --- a/skills/subagent-delivery/SKILL.md +++ /dev/null @@ -1,206 +0,0 @@ ---- -name: subagent-delivery -description: > - Executes a GitHub issue end-to-end with consistent branch, test, PR, and merge behavior. - Use when implementing units of work with sub-agents, preparing pull requests, resolving merge - conflicts, or when the user asks to run issue-by-issue delivery into feature/manage-knowledge-graph. - Supports parallel delivery waves with explicit blocker-question escalation. ---- - -# Subagent Delivery Protocol - -Follow this protocol for every assigned issue. - -System prompt template for spawned Claude instances: - -- `skills/subagent-delivery/claude-instance-system-prompt.txt` - -## Parallel Execution Model - -Use this model whenever multiple issues are independent: - -1. One subagent per issue branch. -2. Shared target branch: `feature/manage-knowledge-graph`. -3. No shared working branch between agents. -4. Each subagent works to PR-ready state independently. -5. Merge in dependency order (foundational backend before UI polish when coupled). - -If two issues touch the same files heavily, either: -- serialize those two issues, or -- split scope so each agent owns non-overlapping symbols. - -## Section-Wave Execution Model (Required) - -When the user asks for "whole sections at a time", execute in waves aligned to tracker sections: - -1. **Section A: Core lifecycle/data** - - `#643 #644 #645 #646 #659 #660 #661 #662 #663` -2. **Section B: Extraction runtime/session** - - `#649 #650 #651 #652 #653 #654` -3. **Section C: Operations/security/integration** - - `#665 #667 #670 #671 #672 #673` - -Wave rules: - -1. Run independent issues in parallel with one Claude instance per issue. -2. Respect dependencies inside the section (foundation issues first). -3. Keep all PRs targeting `feature/manage-knowledge-graph`. -4. Do not start the next section until current section is merged or explicitly deferred. -5. For each section, maintain a live status board: - - `queued`, `in_progress`, `blocked`, `in_review`, `merged` - -## Scope and Inputs - -Before coding, gather: - -1. Issue number and acceptance criteria. -2. Target branch: `feature/manage-knowledge-graph`. -3. Current repository state (`git status`, `git branch -vv`). -4. Context pack (required): - - relevant specs under `specs/` - - bounded context ownership (management/ingestion/extraction/graph/querying/ui) - - existing tests near touched code - - architectural constraints from `AGENTS.md` - -If acceptance criteria are ambiguous, ask one focused question before implementation. - -## Claude Instance Spawn Contract - -For each issue, provide the Claude instance: - -1. Issue ID + title + acceptance criteria summary. -2. Branch naming requirement: - - `feat/issue-<id>-<short-scope>` or `fix/issue-<id>-<short-scope>` -3. Required reads: - - `AGENTS.md` - - relevant `specs/*.spec.md` - - related tests in touched context -4. TDD requirement: - - tests first, then implementation, then verification -5. Output contract: - - branch - - commit(s) - - test commands and results - - PR URL - - blockers/questions - -## Blocker Question Protocol (Required) - -Subagents must be able to stop and ask questions immediately. - -Trigger a blocker question when any of these is true: - -1. More than one valid interpretation of acceptance criteria. -2. Missing security/tenancy/authorization decision. -3. Required external behavior is unspecified. -4. You would otherwise make an irreversible guess. - -When blocked: - -1. Stop implementation at the decision boundary. -2. Ask one concise question in the active agent chat immediately. -3. Include: - - what is ambiguous - - 2-3 concrete options - - recommended option and why -4. If working from a GitHub issue, mirror the same question as an issue comment so the orchestrator can batch unresolved questions across agents. -5. Continue only non-blocked work; do not guess on blocked decisions. - -If a blocker impacts multiple active instances: - -1. Pause affected issues. -2. Continue unaffected issues in parallel. -3. Post one consolidated orchestrator decision update. -4. Resume paused issues with explicit instruction delta. - -## Git Workflow - -1. Ensure local target branch is up to date: - - `git checkout feature/manage-knowledge-graph` - - `git pull --ff-only` -2. Create a dedicated branch per issue: - - `feat/issue-<id>-<short-scope>` for features - - `fix/issue-<id>-<short-scope>` for fixes -3. Never mix multiple issues in one branch. -4. Keep commits atomic and conventional (`feat:`, `fix:`, `refactor:`, `test:`). - -## Implementation Workflow (TDD Required) - -1. Read relevant spec(s) and affected bounded context code first. -2. Write/adjust tests for expected behavior before implementation. -3. Implement minimal code to satisfy tests. -4. Run focused tests first, then broader suite for touched context. -5. Run lints/type checks for changed files when applicable. -6. If behavior depends on configuration, use settings/DI instead of hardcoding. -7. If new ambiguity appears mid-implementation, invoke the Blocker Question Protocol. - -## PR Workflow - -1. Push branch to origin with upstream tracking. -2. Open PR against `feature/manage-knowledge-graph`. -3. Use this body structure: - -```markdown -## Summary -- <what changed and why> -- <important architectural/security note> - -## Testing -- [x] <unit tests run> -- [x] <integration tests run if applicable> -- [ ] <manual verification if pending> - -## Risks -- <none> or <known risk + mitigation> -``` - -4. Link the issue in PR body using `Closes #<id>` when appropriate. -5. If any assumptions were made, include an explicit assumptions list in PR body. - -## Merge and Conflict Handling - -1. Before merge, ensure CI checks are green. -2. If branch is stale, rebase or merge target branch cleanly. -3. Resolve conflicts preserving: - - Spec-required behavior - - Existing user changes - - Authorization and tenancy boundaries -4. Re-run tests after conflict resolution. -5. Merge into `feature/manage-knowledge-graph` only after verification. - -## Orchestrator Monitoring Loop (Required) - -During active waves, run this loop continuously: - -1. Poll each PR for: - - mergeability - - CI status - - review comments requiring changes -2. If merge conflict appears: - - rebase/merge target branch into issue branch - - resolve conflicts preserving spec behavior - - rerun relevant tests - - push and re-check PR -3. If CI fails: - - fix in same issue branch - - do not move issue scope -4. Update section status board and report progress to user. - -## Orchestrator Handoff Contract - -Each subagent must hand back: - -1. Branch name and PR URL. -2. Test commands run with pass/fail status. -3. Any unresolved questions (if still blocked). -4. Any assumptions that were taken and why they are safe. - -## Non-Negotiables - -- Do not use destructive git commands. -- Do not skip tests. -- Do not disable hooks. -- Do not commit secrets or credentials. -- Prefer fakes over mocks in unit tests when testing domain/application behavior. -- Do not invent acceptance criteria beyond the issue/spec without asking. - diff --git a/skills/subagent-delivery/claude-instance-system-prompt.txt b/skills/subagent-delivery/claude-instance-system-prompt.txt deleted file mode 100644 index 0c3f3260e..000000000 --- a/skills/subagent-delivery/claude-instance-system-prompt.txt +++ /dev/null @@ -1,67 +0,0 @@ -You are a focused delivery Claude instance assigned to exactly one Kartograph GitHub issue. - -Mission: -- Deliver the assigned issue end-to-end with TDD discipline. -- Open a PR against `feature/manage-knowledge-graph`. -- Stop and ask immediately when blocked by ambiguity. - -Hard constraints: -1. Scope - - Work only on the assigned issue. - - Do not expand scope to neighboring issues. -2. Branching - - Start from latest `feature/manage-knowledge-graph`. - - Use branch `feat/issue-<id>-<short-scope>` or `fix/issue-<id>-<short-scope>`. -3. Specs and architecture - - Read `AGENTS.md` first. - - Read all relevant `specs/*.spec.md` for your issue. - - Preserve bounded-context boundaries and authorization rules. -4. TDD - - Write/adjust tests first. - - Implement minimal code to satisfy tests. - - Run focused tests; run broader suite as needed by touched context. -5. Safety - - Never use destructive git commands. - - Never commit secrets. - - Never skip required checks. - -Blocker protocol (mandatory): -- Trigger if acceptance criteria are ambiguous, security/tenancy decision is unclear, or behavior is unspecified. -- Stop at decision boundary and ask one concise question with: - - ambiguity summary - - 2-3 concrete options - - recommended option with rationale -- Mirror the blocker question on the GitHub issue as a comment. -- Continue only non-blocked work. - -Execution checklist: -1. Parse issue acceptance criteria. -2. Inspect affected code and tests. -3. Add failing tests for required behavior. -4. Implement and make tests pass. -5. Run lint/type/test for touched area. -6. Commit atomically using conventional commit message. -7. Push branch and open PR to `feature/manage-knowledge-graph`. - -PR body format: -## Summary -- what changed and why -- key architecture/security note - -## Testing -- [x] commands run and results -- [ ] any pending verification - -## Risks -- none or explicit risk + mitigation - -Include `Closes #<issue-id>` where appropriate. - -Required handoff output: -1. Issue ID -2. Branch name -3. Commit SHA(s) -4. Test commands and pass/fail -5. PR URL -6. Open blockers/questions (if any) -7. Assumptions made diff --git a/skills/subagent-delivery/section-wave-launch.template.txt b/skills/subagent-delivery/section-wave-launch.template.txt deleted file mode 100644 index f6ce65822..000000000 --- a/skills/subagent-delivery/section-wave-launch.template.txt +++ /dev/null @@ -1,38 +0,0 @@ -Section wave launch template (one Claude instance per issue) - -Prerequisites: -- Read: `skills/subagent-delivery/SKILL.md` -- System prompt: `skills/subagent-delivery/claude-instance-system-prompt.txt` -- Base branch: `feature/manage-knowledge-graph` - -Per-instance launch packet: - -ISSUE: <id> - <title> -TARGET BRANCH: feature/manage-knowledge-graph -WORK BRANCH: feat/issue-<id>-<short-scope> - -Required context files: -- AGENTS.md -- <relevant spec files> -- <relevant code files> -- <relevant tests> - -Acceptance criteria summary: -- <criterion 1> -- <criterion 2> - -Execution requirements: -1) TDD: tests first -2) Implement minimal passing code -3) Run focused tests + lint -4) Commit atomically (conventional commit) -5) Open PR to feature/manage-knowledge-graph -6) Report branch, tests, PR URL, blockers - -Blocker handling: -- Ask one focused blocker question immediately. -- Include options + recommendation. -- Mirror blocker question on issue comment. - -Orchestrator status line format: -[Issue #<id>] <queued|in_progress|blocked|in_review|merged> | Branch: <branch> | PR: <url-or-pending> From fe56f75411c950dbea52e7eaabe1fd2a45ea7743 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 14 Jun 2026 18:20:59 -0400 Subject: [PATCH 140/153] feat(extraction): harden sticky runtimes and add OpenShell sandbox backend Secure GMA agent containers with session-bound /v1/turn auth, Docker hardening flags, and per-turn workload tokens instead of long-lived env JWTs. Add OpenShell-backed sticky sessions and extraction jobs with per-mode network policies, dev compose wiring, and prod manifest stubs. Co-authored-by: Cursor <cursoragent@cursor.com> --- compose.dev.yaml | 25 +- .../apps/kartograph/base/api-deployment.yaml | 36 ++ deploy/apps/kartograph/base/configmap.yaml | 9 + .../apps/kartograph/base/kustomization.yaml | 2 + .../base/networkpolicy-sticky-runtime.yaml | 33 ++ .../base/openshell-policies-configmap.yaml | 63 +++ deploy/openshell/policies/extraction-job.yaml | 17 + .../policies/gma-extraction-jobs.yaml | 10 + .../policies/gma-initial-schema-design.yaml | 10 + .../policies/gma-one-off-mutations.yaml | 10 + .../openshell/policies/gma-sticky-base.yaml | 9 + .../kartograph_agent_runtime/runtime_auth.py | 14 + .../kartograph_agent_runtime/server.py | 23 +- .../kartograph_agent_runtime/settings.py | 1 + src/agent-runtime/tests/test_server.py | 25 ++ .../sticky_session_runtime_service.py | 15 +- .../container_workload_runtime.py | 29 +- .../extraction_job_runner_factory.py | 8 +- .../infrastructure/openshell/__init__.py | 1 + .../infrastructure/openshell/audit.py | 65 +++ .../infrastructure/openshell/cli.py | 72 ++++ .../infrastructure/openshell/gateway.py | 29 ++ ...penshell_sticky_session_runtime_manager.py | 371 ++++++++++++++++++ .../openshell/policies/extraction-job.yaml | 17 + .../policies/gma-extraction-jobs.yaml | 10 + .../policies/gma-initial-schema-design.yaml | 10 + .../policies/gma-one-off-mutations.yaml | 10 + .../openshell/policies/gma-sticky-base.yaml | 9 + .../infrastructure/openshell/policy.py | 111 ++++++ .../infrastructure/openshell/sandbox.py | 198 ++++++++++ .../openshell_extraction_job_runner.py | 282 +++++++++++++ .../remote_sticky_container_chat_agent.py | 7 +- .../infrastructure/runtime_session_auth.py | 19 + .../sticky_session_bootstrap_builder.py | 4 +- .../workload_runtime_factory.py | 33 +- .../workload_runtime_settings.py | 51 ++- src/api/extraction/ports/runtime.py | 2 + .../ports/sticky_session_bootstrap.py | 1 + .../container_runtime/cli_runtime.py | 14 + .../shared_kernel/container_runtime/ports.py | 7 + .../infrastructure/test_openshell_policy.py | 39 ++ ...penshell_sticky_session_runtime_manager.py | 115 ++++++ .../test_runtime_session_auth.py | 29 ++ ...test_sticky_session_container_bootstrap.py | 10 +- .../test_workload_runtime_factory.py | 11 + .../container_runtime/test_cli_runtime.py | 29 ++ 46 files changed, 1872 insertions(+), 23 deletions(-) create mode 100644 deploy/apps/kartograph/base/networkpolicy-sticky-runtime.yaml create mode 100644 deploy/apps/kartograph/base/openshell-policies-configmap.yaml create mode 100644 deploy/openshell/policies/extraction-job.yaml create mode 100644 deploy/openshell/policies/gma-extraction-jobs.yaml create mode 100644 deploy/openshell/policies/gma-initial-schema-design.yaml create mode 100644 deploy/openshell/policies/gma-one-off-mutations.yaml create mode 100644 deploy/openshell/policies/gma-sticky-base.yaml create mode 100644 src/agent-runtime/kartograph_agent_runtime/runtime_auth.py create mode 100644 src/api/extraction/infrastructure/openshell/__init__.py create mode 100644 src/api/extraction/infrastructure/openshell/audit.py create mode 100644 src/api/extraction/infrastructure/openshell/cli.py create mode 100644 src/api/extraction/infrastructure/openshell/gateway.py create mode 100644 src/api/extraction/infrastructure/openshell/openshell_sticky_session_runtime_manager.py create mode 100644 src/api/extraction/infrastructure/openshell/policies/extraction-job.yaml create mode 100644 src/api/extraction/infrastructure/openshell/policies/gma-extraction-jobs.yaml create mode 100644 src/api/extraction/infrastructure/openshell/policies/gma-initial-schema-design.yaml create mode 100644 src/api/extraction/infrastructure/openshell/policies/gma-one-off-mutations.yaml create mode 100644 src/api/extraction/infrastructure/openshell/policies/gma-sticky-base.yaml create mode 100644 src/api/extraction/infrastructure/openshell/policy.py create mode 100644 src/api/extraction/infrastructure/openshell/sandbox.py create mode 100644 src/api/extraction/infrastructure/openshell_extraction_job_runner.py create mode 100644 src/api/extraction/infrastructure/runtime_session_auth.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_openshell_policy.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_openshell_sticky_session_runtime_manager.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_runtime_session_auth.py diff --git a/compose.dev.yaml b/compose.dev.yaml index 0642f74bb..bcdd2ab81 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -5,7 +5,7 @@ services: context: ./src/agent-runtime dockerfile: Dockerfile image: kartograph-agent-runtime:dev - profiles: ["build-only"] + profiles: [ "build-only" ] api: # Root required for Docker-out-of-Docker via mounted /var/run/docker.sock in dev @@ -14,7 +14,6 @@ services: UV_CACHE_DIR: /tmp/uv-cache HOST_UID: ${HOST_UID} HOST_GID: ${HOST_GID} - KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND: container KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_ENGINE: auto KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_NETWORK: kartograph_kartograph KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_IMAGE: kartograph-agent-runtime:dev @@ -25,7 +24,18 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID: ${HOST_GID} KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_TURN_TIMEOUT_SECONDS: "3600" KARTOGRAPH_EXTRACTION_RUNTIME_STICKY_MAX_TURNS: "500" - KARTOGRAPH_EXTRACTION_RUNTIME_JOB_RUNNER: agentic_ci + KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_HARDENING_ENABLED: "true" + ## Docker (Track A): + # KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND: container + # KARTOGRAPH_EXTRACTION_RUNTIME_JOB_RUNNER: agentic_ci + ## OpenShell (Track B): uncomment backend/job_runner after `openshell gateway add` on the host + KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND: openshell + KARTOGRAPH_EXTRACTION_RUNTIME_JOB_RUNNER: openshell + ## End Track A / Track B selection + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_GATEWAY_URL: https://host.docker.internal:17670 + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_RUNTIME_HOST: host.docker.internal + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_POLICY_DIR: /etc/openshell/policies + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_GATEWAY_NAME: openshell KARTOGRAPH_EXTRACTION_RUNTIME_AGENTIC_CI_IMAGE: ghcr.io/opendatahub-io/ai-helpers:latest KARTOGRAPH_EXTRACTION_RUNTIME_AGENTIC_CI_HARNESS: claude-code KARTOGRAPH_EXTRACTION_RUNTIME_EXTRACTION_JOB_WORK_DIR: /tmp/kartograph/extraction_jobs @@ -47,6 +57,13 @@ services: - /var/run/docker.sock:/var/run/docker.sock # Docker/Podman CLI from host (required for container runtime backend) - ${DOCKER_BIN:-/usr/bin/docker}:/usr/bin/docker:ro + # OpenShell CLI + mTLS config (host gateway; API container invokes openshell subprocess) + - /usr/bin/openshell:/usr/bin/openshell:ro + - ${HOME}/.config/openshell:/root/.config/openshell:ro,z + # OpenShell policy templates (Phase 3) when backend=openshell + - ./src/api/extraction/infrastructure/openshell/policies:/etc/openshell/policies:ro,z + extra_hosts: + - "host.docker.internal:host-gateway" command: - /bin/bash - -c @@ -68,7 +85,7 @@ services: - /app/.output environment: - HOST=0.0.0.0 - command: ["pnpm", "run", "dev"] + command: [ "pnpm", "run", "dev" ] ports: - "3000:3000" - "24678:24678" diff --git a/deploy/apps/kartograph/base/api-deployment.yaml b/deploy/apps/kartograph/base/api-deployment.yaml index 1de0bc5ee..d036d53a4 100644 --- a/deploy/apps/kartograph/base/api-deployment.yaml +++ b/deploy/apps/kartograph/base/api-deployment.yaml @@ -155,11 +155,44 @@ spec: secretKeyRef: name: kartograph-sso-client-swagger-docs key: client_id + - name: KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND + valueFrom: + configMapKeyRef: + name: kartograph-config + key: KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND + optional: true + - name: KARTOGRAPH_EXTRACTION_RUNTIME_API_BASE_URL + valueFrom: + configMapKeyRef: + name: kartograph-config + key: KARTOGRAPH_EXTRACTION_RUNTIME_API_BASE_URL + optional: true + - name: KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_HARDENING_ENABLED + valueFrom: + configMapKeyRef: + name: kartograph-config + key: KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_HARDENING_ENABLED + optional: true + - name: KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_POLICY_DIR + valueFrom: + configMapKeyRef: + name: kartograph-config + key: KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_POLICY_DIR + optional: true + - name: KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_POLICY_ENFORCEMENT + valueFrom: + configMapKeyRef: + name: kartograph-config + key: KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_POLICY_ENFORCEMENT + optional: true volumeMounts: - name: spicedb-ca mountPath: /etc/spicedb-ca readOnly: true + - name: openshell-policies + mountPath: /etc/openshell/policies + readOnly: true livenessProbe: httpGet: path: /health @@ -190,3 +223,6 @@ spec: items: - key: service-ca.crt path: service-ca.crt + - name: openshell-policies + configMap: + name: kartograph-openshell-policies diff --git a/deploy/apps/kartograph/base/configmap.yaml b/deploy/apps/kartograph/base/configmap.yaml index 03c6c8fad..e08f3021e 100644 --- a/deploy/apps/kartograph/base/configmap.yaml +++ b/deploy/apps/kartograph/base/configmap.yaml @@ -22,3 +22,12 @@ data: DEV_UI_KEYCLOAK_URL: "http://keycloak:8080" DEV_UI_KEYCLOAK_REALM: "kartograph" DEV_UI_KEYCLOAK_CLIENT_ID: "kartograph-ui" + # Extraction runtime (container backend with Phase 0 hardening; switch to openshell in overlay) + KARTOGRAPH_EXTRACTION_RUNTIME_BACKEND: "container" + KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_NETWORK: "kartograph" + KARTOGRAPH_EXTRACTION_RUNTIME_API_BASE_URL: "http://kartograph-api:8000" + KARTOGRAPH_EXTRACTION_RUNTIME_AGENTIC_CI_API_BASE_URL: "http://kartograph-api:8000" + KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_HARDENING_ENABLED: "true" + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_POLICY_DIR: "/etc/openshell/policies" + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_POLICY_ENFORCEMENT: "hard_requirement" + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_RUNTIME_HOST: "127.0.0.1" diff --git a/deploy/apps/kartograph/base/kustomization.yaml b/deploy/apps/kartograph/base/kustomization.yaml index 4ffbd8134..d4709be85 100644 --- a/deploy/apps/kartograph/base/kustomization.yaml +++ b/deploy/apps/kartograph/base/kustomization.yaml @@ -28,6 +28,8 @@ resources: - configmap.yaml - spicedb-schema-configmap.yaml - spicedb-ca-configmap.yaml + - networkpolicy-sticky-runtime.yaml + - openshell-policies-configmap.yaml commonLabels: app.kubernetes.io/name: kartograph diff --git a/deploy/apps/kartograph/base/networkpolicy-sticky-runtime.yaml b/deploy/apps/kartograph/base/networkpolicy-sticky-runtime.yaml new file mode 100644 index 000000000..d227ac2eb --- /dev/null +++ b/deploy/apps/kartograph/base/networkpolicy-sticky-runtime.yaml @@ -0,0 +1,33 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: kartograph-sticky-runtime-egress + labels: + app.kubernetes.io/component: sticky-runtime +spec: + podSelector: + matchLabels: + kartograph.runtime.kind: sticky + policyTypes: + - Egress + egress: + # Kartograph workload API (in-cluster DNS name) + - to: + - podSelector: + matchLabels: + app.kubernetes.io/component: api + ports: + - protocol: TCP + port: 8000 + # DNS resolution inside the cluster + - to: + - namespaceSelector: {} + ports: + - protocol: UDP + port: 53 + - protocol: TCP + port: 53 + # Vertex / Anthropic inference via OpenShell inference.local routing when enabled + - ports: + - protocol: TCP + port: 443 diff --git a/deploy/apps/kartograph/base/openshell-policies-configmap.yaml b/deploy/apps/kartograph/base/openshell-policies-configmap.yaml new file mode 100644 index 000000000..d39012f7d --- /dev/null +++ b/deploy/apps/kartograph/base/openshell-policies-configmap.yaml @@ -0,0 +1,63 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: kartograph-openshell-policies + labels: + app.kubernetes.io/component: openshell +data: + gma-sticky-base.yaml: | + version: 1 + name: gma-sticky-base + enforcement: hard_requirement + endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" + l7_allowed_paths: + - "/extraction/workloads/*" + gma-initial-schema-design.yaml: | + version: 1 + name: gma-initial-schema-design + enforcement: hard_requirement + endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" + l7_allowed_paths: + - "/extraction/workloads/schema/*" + - "/extraction/workloads/graph/*" + gma-extraction-jobs.yaml: | + version: 1 + name: gma-extraction-jobs + enforcement: hard_requirement + endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" + l7_allowed_paths: + - "/extraction/workloads/jobs/*" + - "/extraction/workloads/graph/*" + gma-one-off-mutations.yaml: | + version: 1 + name: gma-one-off-mutations + enforcement: hard_requirement + endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" + l7_allowed_paths: + - "/extraction/workloads/mutations/*" + - "/extraction/workloads/graph/*" + extraction-job.yaml: | + version: 1 + name: extraction-job + enforcement: hard_requirement + endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" + - "github.com:443:full" + - "*.github.com:443:full" + - "pypi.org:443:read-only" + - "files.pythonhosted.org:443:read-only" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" + - "api.anthropic.com:443:read-write" + l7_allowed_paths: + - "/extraction/workloads/*" diff --git a/deploy/openshell/policies/extraction-job.yaml b/deploy/openshell/policies/extraction-job.yaml new file mode 100644 index 000000000..6c5aaca75 --- /dev/null +++ b/deploy/openshell/policies/extraction-job.yaml @@ -0,0 +1,17 @@ +version: 1 +name: extraction-job +enforcement: hard_requirement +description: Production network policy for batch extraction jobs. +endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" + - "github.com:443:full" + - "*.github.com:443:full" + - "pypi.org:443:read-only" + - "files.pythonhosted.org:443:read-only" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" + - "api.anthropic.com:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/*" diff --git a/deploy/openshell/policies/gma-extraction-jobs.yaml b/deploy/openshell/policies/gma-extraction-jobs.yaml new file mode 100644 index 000000000..6f6d53d31 --- /dev/null +++ b/deploy/openshell/policies/gma-extraction-jobs.yaml @@ -0,0 +1,10 @@ +version: 1 +name: gma-extraction-jobs +enforcement: hard_requirement +description: Production policy for extraction-jobs graph-management mode. +endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/jobs/*" + - "/extraction/workloads/graph/*" diff --git a/deploy/openshell/policies/gma-initial-schema-design.yaml b/deploy/openshell/policies/gma-initial-schema-design.yaml new file mode 100644 index 000000000..c138d4bfc --- /dev/null +++ b/deploy/openshell/policies/gma-initial-schema-design.yaml @@ -0,0 +1,10 @@ +version: 1 +name: gma-initial-schema-design +enforcement: hard_requirement +description: Production policy for initial schema design mode. +endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/schema/*" + - "/extraction/workloads/graph/*" diff --git a/deploy/openshell/policies/gma-one-off-mutations.yaml b/deploy/openshell/policies/gma-one-off-mutations.yaml new file mode 100644 index 000000000..28350ee57 --- /dev/null +++ b/deploy/openshell/policies/gma-one-off-mutations.yaml @@ -0,0 +1,10 @@ +version: 1 +name: gma-one-off-mutations +enforcement: hard_requirement +description: Production policy for one-off graph mutations mode. +endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/mutations/*" + - "/extraction/workloads/graph/*" diff --git a/deploy/openshell/policies/gma-sticky-base.yaml b/deploy/openshell/policies/gma-sticky-base.yaml new file mode 100644 index 000000000..c28ad9828 --- /dev/null +++ b/deploy/openshell/policies/gma-sticky-base.yaml @@ -0,0 +1,9 @@ +version: 1 +name: gma-sticky-base +enforcement: hard_requirement +description: Production base policy for graph-management sticky sessions. +endpoints: + - "kartograph-api:8000:read-write" + - "inference.local:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/*" diff --git a/src/agent-runtime/kartograph_agent_runtime/runtime_auth.py b/src/agent-runtime/kartograph_agent_runtime/runtime_auth.py new file mode 100644 index 000000000..9825d864a --- /dev/null +++ b/src/agent-runtime/kartograph_agent_runtime/runtime_auth.py @@ -0,0 +1,14 @@ +"""Runtime HTTP auth helpers for sticky session agent containers.""" + +from __future__ import annotations + +import secrets + +RUNTIME_AUTH_HEADER = "X-Kartograph-Runtime-Auth" + + +def runtime_auth_matches(*, expected: str, provided: str) -> bool: + """Constant-time comparison for runtime auth header values.""" + if not expected or not provided: + return False + return secrets.compare_digest(expected.strip(), provided.strip()) diff --git a/src/agent-runtime/kartograph_agent_runtime/server.py b/src/agent-runtime/kartograph_agent_runtime/server.py index 82e414465..ae2180445 100644 --- a/src/agent-runtime/kartograph_agent_runtime/server.py +++ b/src/agent-runtime/kartograph_agent_runtime/server.py @@ -10,12 +10,13 @@ from pathlib import Path -from fastapi import FastAPI +from fastapi import FastAPI, Header, HTTPException from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel, Field from kartograph_agent_runtime.executor import stream_turn_events from kartograph_agent_runtime.settings import AgentRuntimeSettings +from kartograph_agent_runtime.runtime_auth import runtime_auth_matches, RUNTIME_AUTH_HEADER logger = logging.getLogger(__name__) @@ -52,8 +53,26 @@ async def health(): return {"status": "ok", "session_id": settings.session_id} +def _require_runtime_auth(runtime_auth: str | None) -> None: + expected = settings.runtime_auth_token.strip() + if not expected: + return + if not runtime_auth_matches(expected=expected, provided=runtime_auth or ""): + raise HTTPException( + status_code=401, + detail={ + "code": "RUNTIME_AUTH_REQUIRED", + "message": "Missing or invalid runtime auth token.", + }, + ) + + @app.post("/v1/turn") -async def stream_turn(request: TurnRequest) -> StreamingResponse: +async def stream_turn( + request: TurnRequest, + x_kartograph_runtime_auth: str | None = Header(default=None, alias=RUNTIME_AUTH_HEADER), +) -> StreamingResponse: + _require_runtime_auth(x_kartograph_runtime_auth) logger.info( "agent_runtime_turn_started session_id=%s ui_mode=%s message_len=%s", settings.session_id, diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py index 880409afa..fb8fac234 100644 --- a/src/agent-runtime/kartograph_agent_runtime/settings.py +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -17,6 +17,7 @@ class AgentRuntimeSettings(BaseSettings): port: int = Field(default=8787) api_base_url: str = Field(default="http://api:8000", alias="KARTOGRAPH_API_BASE_URL") workload_token: str = Field(default="", alias="KARTOGRAPH_WORKLOAD_TOKEN") + runtime_auth_token: str = Field(default="", alias="KARTOGRAPH_RUNTIME_AUTH_TOKEN") tenant_id: str = Field(default="", alias="KARTOGRAPH_TENANT_ID") knowledge_graph_id: str = Field(default="", alias="KARTOGRAPH_KNOWLEDGE_GRAPH_ID") session_id: str = Field(default="", alias="KARTOGRAPH_SESSION_ID") diff --git a/src/agent-runtime/tests/test_server.py b/src/agent-runtime/tests/test_server.py index 131606439..9b1eda4be 100644 --- a/src/agent-runtime/tests/test_server.py +++ b/src/agent-runtime/tests/test_server.py @@ -3,17 +3,20 @@ from __future__ import annotations from pathlib import Path +from unittest.mock import patch import pytest from fastapi.testclient import TestClient from kartograph_agent_runtime import server +from kartograph_agent_runtime.runtime_auth import RUNTIME_AUTH_HEADER @pytest.fixture def client(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> TestClient: monkeypatch.setattr(server.settings, "workspace_dir", str(tmp_path)) monkeypatch.setattr(server.settings, "session_id", "session-test") + monkeypatch.setattr(server.settings, "runtime_auth_token", "runtime-secret") return TestClient(server.app) @@ -34,3 +37,25 @@ def test_health_returns_unavailable_when_workspace_marker_missing(client: TestCl assert response.status_code == 503 assert response.json()["status"] == "workspace_unavailable" + + +def test_turn_requires_runtime_auth_when_token_configured( + client: TestClient, + tmp_path: Path, +) -> None: + (tmp_path / "knowledge-graph-id").write_text("kg-1", encoding="utf-8") + + unauthorized = client.post("/v1/turn", json={"message": "hello"}) + assert unauthorized.status_code == 401 + + async def fake_stream(**_kwargs): + yield {"type": "done", "ok": True} + + with patch("kartograph_agent_runtime.server.stream_turn_events", side_effect=fake_stream): + authorized = client.post( + "/v1/turn", + json={"message": "hello"}, + headers={RUNTIME_AUTH_HEADER: "runtime-secret"}, + ) + + assert authorized.status_code == 200 diff --git a/src/api/extraction/application/sticky_session_runtime_service.py b/src/api/extraction/application/sticky_session_runtime_service.py index c740b2015..c5dded90e 100644 --- a/src/api/extraction/application/sticky_session_runtime_service.py +++ b/src/api/extraction/application/sticky_session_runtime_service.py @@ -23,6 +23,11 @@ from extraction.ports.sticky_session_bootstrap import IStickySessionBootstrapBuilder from shared_kernel.container_runtime.ports import ContainerRuntimeError +try: + from extraction.infrastructure.openshell.cli import OpenShellCliError +except ImportError: # pragma: no cover - defensive import ordering + OpenShellCliError = RuntimeError # type: ignore[misc,assignment] + from extraction.application.thinking_activity import thinking_event NDJSON_STREAM_HEADERS = { @@ -216,7 +221,7 @@ async def _stream_prepare_runtime( } return - if self._runtime_backend != "container": + if self._runtime_backend not in {"container", "openshell"}: lease = await asyncio.to_thread( self._sticky_runtime_manager.get_or_start_runtime, session_id=session.id, @@ -252,6 +257,7 @@ async def _stream_prepare_runtime( knowledge_graph_id=knowledge_graph_id, session_id=session.id, include_job_packages=include_job_packages, + ui_mode=ui_mode.value, ) session.runtime_context["workspace_materialization"] = { "job_package_ids": [source.package_id for source in job_packages], @@ -269,7 +275,7 @@ async def _stream_prepare_runtime( mode=mode.value, bootstrap=bootstrap, ) - except ContainerRuntimeError as exc: + except (ContainerRuntimeError, OpenShellCliError) as exc: session.runtime_context["sticky_runtime"] = { "phase": "failed", "status": "failed", @@ -326,10 +332,13 @@ async def _stream_prepare_runtime( @staticmethod def _lease_context(lease: StickySessionRuntimeLease, *, phase: str) -> dict[str, Any]: - return { + context: dict[str, Any] = { "container_id": lease.container_id, "status": lease.status, "expires_at": lease.expires_at.isoformat(), "runtime_base_url": lease.runtime_base_url, "phase": phase, } + if lease.runtime_auth_token: + context["runtime_auth_token"] = lease.runtime_auth_token + return context diff --git a/src/api/extraction/infrastructure/container_workload_runtime.py b/src/api/extraction/infrastructure/container_workload_runtime.py index c6208988a..79a586e10 100644 --- a/src/api/extraction/infrastructure/container_workload_runtime.py +++ b/src/api/extraction/infrastructure/container_workload_runtime.py @@ -8,6 +8,7 @@ from ulid import ULID +from extraction.infrastructure.runtime_session_auth import issue_runtime_auth_token from extraction.infrastructure.sticky_session_workspace_binds import ( build_sticky_session_workspace_binds, ) @@ -57,6 +58,15 @@ def __init__( container_run_gid: int | None = None, agent_turn_timeout_seconds: float = 1000.0, agent_max_turns: int = 500, + container_hardening_enabled: bool = True, + container_cap_drop_all: bool = True, + container_read_only_rootfs: bool = True, + container_no_new_privileges: bool = True, + container_pids_limit: int | None = 256, + container_memory_limit: str | None = "2g", + container_tmpfs_mounts: tuple[str, ...] = ( + "/tmp:rw,noexec,nosuid,size=512m", + ), ) -> None: self._container_runtime = container_runtime self._sticky_image = sticky_image @@ -74,6 +84,13 @@ def __init__( self._container_run_gid = container_run_gid self._agent_turn_timeout_seconds = agent_turn_timeout_seconds self._agent_max_turns = agent_max_turns + self._container_hardening_enabled = container_hardening_enabled + self._container_cap_drop_all = container_cap_drop_all + self._container_read_only_rootfs = container_read_only_rootfs + self._container_no_new_privileges = container_no_new_privileges + self._container_pids_limit = container_pids_limit + self._container_memory_limit = container_memory_limit + self._container_tmpfs_mounts = container_tmpfs_mounts self._leases: dict[str, StickySessionRuntimeLease] = {} def get_or_start_runtime( @@ -269,6 +286,7 @@ def _adopt_running_container_if_present( last_activity_at=now, expires_at=now + self._session_ttl, runtime_base_url=runtime_base_url, + runtime_auth_token=None, ) def _start_runtime( @@ -282,6 +300,7 @@ def _start_runtime( bootstrap: StickySessionRuntimeBootstrap | None, ) -> StickySessionRuntimeLease: container_name = _sanitize_container_name("kartograph-sticky-", session_id) + runtime_auth_token = issue_runtime_auth_token() env: dict[str, str] = { "KARTOGRAPH_SESSION_ID": session_id, "KARTOGRAPH_KNOWLEDGE_GRAPH_ID": knowledge_graph_id, @@ -290,6 +309,7 @@ def _start_runtime( "KARTOGRAPH_WORKSPACE_DIR": self._container_work_mount, "KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS": str(int(self._agent_turn_timeout_seconds)), "KARTOGRAPH_AGENT_MAX_TURNS": str(int(self._agent_max_turns)), + "KARTOGRAPH_RUNTIME_AUTH_TOKEN": runtime_auth_token, } binds: list[str] = [] if bootstrap is not None: @@ -304,7 +324,6 @@ def _start_runtime( raise ValueError("sticky session credentials are expired") env.update( { - "KARTOGRAPH_WORKLOAD_TOKEN": bootstrap.credentials.token, "KARTOGRAPH_TENANT_ID": bootstrap.tenant_id, "KARTOGRAPH_API_BASE_URL": bootstrap.api_base_url, } @@ -334,6 +353,7 @@ def _start_runtime( self._remove_stale_container_name(container_name) + hardening = self._container_hardening_enabled launched = self._container_runtime.run( ContainerRunSpec( image=self._sticky_image, @@ -350,6 +370,12 @@ def _start_runtime( "kartograph.mode": mode, }, command=self._sticky_command, + cap_drop_all=hardening and self._container_cap_drop_all, + read_only_rootfs=hardening and self._container_read_only_rootfs, + no_new_privileges=hardening and self._container_no_new_privileges, + pids_limit=self._container_pids_limit if hardening else None, + memory_limit=self._container_memory_limit if hardening else None, + tmpfs_mounts=self._container_tmpfs_mounts if hardening else (), ) ) runtime_base_url = f"http://{container_name}:{self._sticky_service_port}" @@ -363,6 +389,7 @@ def _start_runtime( last_activity_at=now, expires_at=now + self._session_ttl, runtime_base_url=runtime_base_url, + runtime_auth_token=runtime_auth_token, ) def _remove_stale_container_name(self, container_name: str) -> None: diff --git a/src/api/extraction/infrastructure/extraction_job_runner_factory.py b/src/api/extraction/infrastructure/extraction_job_runner_factory.py index 69069e6ef..57f0d473c 100644 --- a/src/api/extraction/infrastructure/extraction_job_runner_factory.py +++ b/src/api/extraction/infrastructure/extraction_job_runner_factory.py @@ -5,6 +5,7 @@ from pathlib import Path from extraction.infrastructure.agentic_ci_extraction_job_runner import AgenticCiExtractionJobRunner +from extraction.infrastructure.openshell_extraction_job_runner import OpenShellExtractionJobRunner from extraction.infrastructure.extraction_job_workdir_materializer import ( ExtractionJobWorkdirMaterializer, ) @@ -38,7 +39,7 @@ def create_extraction_job_runner( if resolved.job_runner == "stub": return StubExtractionJobRunner() if session is None: - raise ValueError("database session is required for agentic-ci extraction jobs") + raise ValueError("database session is required for extraction job runners") prepared_reader = SqlPreparedJobPackageReader( session=session, job_package_work_dir=Path(resolved.job_package_work_dir), @@ -69,6 +70,11 @@ def create_extraction_job_runner( schema_service=schema_service, ), ) + if resolved.job_runner == "openshell": + return OpenShellExtractionJobRunner( + settings=resolved, + workdir_materializer=materializer, + ) return AgenticCiExtractionJobRunner( settings=resolved, workdir_materializer=materializer, diff --git a/src/api/extraction/infrastructure/openshell/__init__.py b/src/api/extraction/infrastructure/openshell/__init__.py new file mode 100644 index 000000000..a87544f9d --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/__init__.py @@ -0,0 +1 @@ +"""OpenShell integration for extraction workload sandboxes.""" diff --git a/src/api/extraction/infrastructure/openshell/audit.py b/src/api/extraction/infrastructure/openshell/audit.py new file mode 100644 index 000000000..d68a6d6a2 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/audit.py @@ -0,0 +1,65 @@ +"""Domain observability probes for OpenShell sandbox lifecycle.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Protocol + + +@dataclass(frozen=True) +class OpenShellPolicyAppliedObservation: + sandbox_name: str + policy_name: str + enforcement: str + endpoint_count: int + ui_mode: str | None = None + job_id: str | None = None + + +@dataclass(frozen=True) +class OpenShellSandboxLifecycleObservation: + sandbox_name: str + action: str + image: str | None = None + forward_port: int | None = None + session_id: str | None = None + job_id: str | None = None + + +class OpenShellRuntimeProbe(Protocol): + def policy_applied(self, observation: OpenShellPolicyAppliedObservation) -> None: + """Emit when network/L7 policy is applied to a sandbox.""" + + def sandbox_lifecycle(self, observation: OpenShellSandboxLifecycleObservation) -> None: + """Emit when a sandbox is created, started, or deleted.""" + + +class LoggingOpenShellRuntimeProbe: + """Default probe aligned with OCSF network-activity semantics.""" + + def __init__(self, *, sink: Any | None = None) -> None: + import logging + + self._logger = sink or logging.getLogger("kartograph.extraction.openshell") + + def policy_applied(self, observation: OpenShellPolicyAppliedObservation) -> None: + self._logger.info( + "openshell_policy_applied sandbox=%s policy=%s enforcement=%s endpoints=%s ui_mode=%s job_id=%s", + observation.sandbox_name, + observation.policy_name, + observation.enforcement, + observation.endpoint_count, + observation.ui_mode, + observation.job_id, + ) + + def sandbox_lifecycle(self, observation: OpenShellSandboxLifecycleObservation) -> None: + self._logger.info( + "openshell_sandbox_lifecycle sandbox=%s action=%s image=%s forward_port=%s session_id=%s job_id=%s", + observation.sandbox_name, + observation.action, + observation.image, + observation.forward_port, + observation.session_id, + observation.job_id, + ) diff --git a/src/api/extraction/infrastructure/openshell/cli.py b/src/api/extraction/infrastructure/openshell/cli.py new file mode 100644 index 000000000..3fd54c115 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/cli.py @@ -0,0 +1,72 @@ +"""Subprocess wrapper for OpenShell CLI commands.""" + +from __future__ import annotations + +import logging +import subprocess +from typing import Sequence + +logger = logging.getLogger("kartograph.extraction.openshell.cli") + +_SECRET_PREFIXES = ("private_key=", "GCP_SA_ACCESS_TOKEN=", "KARTOGRAPH_RUNTIME_AUTH_TOKEN=") + + +class OpenShellCliError(RuntimeError): + """Raised when an OpenShell CLI command fails.""" + + +def redact_args(args: Sequence[str]) -> list[str]: + safe: list[str] = [] + for arg in args: + if any(arg.startswith(prefix) for prefix in _SECRET_PREFIXES): + key = arg.split("=", 1)[0] + safe.append(f"{key}=<redacted>") + else: + safe.append(arg) + return safe + + +def run_openshell( + args: Sequence[str], + *, + check: bool = True, + capture_output: bool = True, + timeout: float | None = 120.0, + text: bool = True, +) -> subprocess.CompletedProcess[str]: + command = ["openshell", *args] + logger.debug("openshell_exec command=%s", " ".join(redact_args(command))) + try: + result = subprocess.run( + command, + capture_output=capture_output, + text=text, + check=False, + timeout=timeout, + ) + except FileNotFoundError as exc: + raise OpenShellCliError( + "openshell CLI not found on PATH; install OpenShell to use the openshell backend" + ) from exc + except subprocess.TimeoutExpired as exc: + raise OpenShellCliError(f"openshell command timed out: {' '.join(redact_args(command))}") from exc + if check and result.returncode != 0: + detail = (result.stderr or result.stdout or "").strip() or "unknown error" + raise OpenShellCliError(f"openshell {' '.join(args)} failed: {detail}") + return result + + +def popen_openshell(args: Sequence[str]) -> subprocess.Popen[str]: + command = ["openshell", *args] + logger.debug("openshell_popen command=%s", " ".join(redact_args(command))) + try: + return subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + except FileNotFoundError as exc: + raise OpenShellCliError( + "openshell CLI not found on PATH; install OpenShell to use the openshell backend" + ) from exc diff --git a/src/api/extraction/infrastructure/openshell/gateway.py b/src/api/extraction/infrastructure/openshell/gateway.py new file mode 100644 index 000000000..e272a096f --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/gateway.py @@ -0,0 +1,29 @@ +"""OpenShell gateway lifecycle helpers.""" + +from __future__ import annotations + +from extraction.infrastructure.openshell.cli import run_openshell + + +def gateway_is_running() -> bool: + result = run_openshell(["status"], check=False) + if result.returncode != 0: + return False + return "No gateway configured" not in (result.stdout or "") + + +def ensure_gateway_registered(*, gateway_name: str, gateway_url: str) -> None: + """Ensure a gateway is registered without starting local podman services.""" + if gateway_is_running(): + return + run_openshell( + [ + "gateway", + "add", + gateway_url, + "--local", + "--name", + gateway_name, + ], + timeout=30.0, + ) diff --git a/src/api/extraction/infrastructure/openshell/openshell_sticky_session_runtime_manager.py b/src/api/extraction/infrastructure/openshell/openshell_sticky_session_runtime_manager.py new file mode 100644 index 000000000..8c83eace8 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/openshell_sticky_session_runtime_manager.py @@ -0,0 +1,371 @@ +"""OpenShell-backed sticky session runtime manager.""" + +from __future__ import annotations + +import re +from dataclasses import replace +from datetime import UTC, datetime, timedelta +from urllib.parse import urlparse + +from extraction.infrastructure.openshell import gateway as openshell_gateway +from extraction.infrastructure.openshell import sandbox as openshell_sandbox +from extraction.infrastructure.openshell.audit import LoggingOpenShellRuntimeProbe, OpenShellRuntimeProbe +from extraction.infrastructure.runtime_session_auth import issue_runtime_auth_token +from extraction.infrastructure.vertex_runtime_env import build_vertex_container_env +from extraction.ports.runtime import ( + IStickySessionRuntimeManager, + StickySessionRuntimeBootstrap, + StickySessionRuntimeLease, +) + +_CONTAINER_NAME_SAFE = re.compile(r"[^a-zA-Z0-9_.-]+") + + +def _sanitize_sandbox_name(session_id: str) -> str: + return openshell_sandbox.sanitize_sandbox_name("kartograph-gma-", session_id) + + +def _forward_port(*, session_id: str, base: int) -> int: + digest = sum(ord(char) for char in session_id) + return base + (digest % 900) + + +def _api_host_from_base_url(api_base_url: str) -> str: + parsed = urlparse(api_base_url) + if parsed.hostname: + port_suffix = f":{parsed.port}" if parsed.port else "" + return f"{parsed.hostname}{port_suffix}" + return "api:8000" + + +class OpenShellStickySessionRuntimeManager(IStickySessionRuntimeManager): + """Sticky runtime manager using OpenShell sandboxes with network policy.""" + + def __init__( + self, + *, + sticky_image: str, + session_ttl: timedelta = timedelta(minutes=60), + sticky_service_port: int = 8787, + container_work_mount: str = "/workspace", + vertex_project_id: str = "", + vertex_region: str = "us-east5", + vertex_enabled: bool = False, + agent_turn_timeout_seconds: float = 1000.0, + agent_max_turns: int = 500, + api_base_url: str = "http://api:8000", + gateway_name: str = "kartograph", + gateway_url: str = "https://localhost:17670", + provider_name: str = "kartograph-gma", + runtime_host: str = "host.docker.internal", + forward_port_base: int = 18787, + policy_dir: str | None = None, + policy_enforcement: str = "soft", + probe: OpenShellRuntimeProbe | None = None, + ) -> None: + self._sticky_image = sticky_image + self._session_ttl = session_ttl + self._sticky_service_port = sticky_service_port + self._container_work_mount = container_work_mount + self._vertex_project_id = vertex_project_id + self._vertex_region = vertex_region + self._vertex_enabled = vertex_enabled + self._agent_turn_timeout_seconds = agent_turn_timeout_seconds + self._agent_max_turns = agent_max_turns + self._api_base_url = api_base_url + self._gateway_name = gateway_name + self._gateway_url = gateway_url + self._provider_name = provider_name + self._runtime_host = runtime_host.rstrip("/") + self._forward_port_base = forward_port_base + self._policy_dir = policy_dir + self._policy_enforcement = policy_enforcement + self._probe = probe or LoggingOpenShellRuntimeProbe() + self._leases: dict[str, StickySessionRuntimeLease] = {} + + def get_or_start_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + bootstrap: StickySessionRuntimeBootstrap | None = None, + ) -> StickySessionRuntimeLease: + now = datetime.now(UTC) + existing = self._leases.get(session_id) + if existing is not None and existing.expires_at > now: + if openshell_sandbox.sandbox_exists(existing.container_id): + refreshed = replace( + existing, + last_activity_at=now, + expires_at=now + self._session_ttl, + status="active", + ) + self._leases[session_id] = refreshed + return refreshed + + if existing is not None: + self._terminate_sandbox(existing) + + lease = self._start_runtime( + session_id=session_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + now=now, + bootstrap=bootstrap, + ) + self._leases[session_id] = lease + return lease + + def reset_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + bootstrap: StickySessionRuntimeBootstrap | None = None, + ) -> StickySessionRuntimeLease: + existing = self._leases.pop(session_id, None) + if existing is not None: + self._terminate_sandbox(existing) + return self.get_or_start_runtime( + session_id=session_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + bootstrap=bootstrap, + ) + + def terminate_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + ) -> None: + existing = self._leases.pop(session_id, None) + if existing is not None: + self._terminate_sandbox(existing) + + def cleanup_expired(self, *, now: datetime) -> list[str]: + expired = [ + session_id + for session_id, lease in self._leases.items() + if lease.expires_at <= now + ] + terminated: list[str] = [] + for session_id in expired: + lease = self._leases.pop(session_id) + self._terminate_sandbox(lease) + terminated.append(lease.container_id) + return terminated + + def try_resolve_active_lease( + self, + *, + session_id: str, + user_id: str = "", + knowledge_graph_id: str = "", + mode: str = "", + container_id: str | None = None, + ) -> StickySessionRuntimeLease | None: + now = datetime.now(UTC) + lease = self._leases.get(session_id) + sandbox_name = container_id or _sanitize_sandbox_name(session_id) + if lease is not None and lease.expires_at > now: + if openshell_sandbox.sandbox_exists(lease.container_id): + refreshed = replace( + lease, + last_activity_at=now, + expires_at=now + self._session_ttl, + status="active", + ) + self._leases[session_id] = refreshed + return refreshed + if openshell_sandbox.sandbox_exists(sandbox_name): + forward_port = _forward_port(session_id=session_id, base=self._forward_port_base) + return StickySessionRuntimeLease( + session_id=session_id, + container_id=sandbox_name, + user_id=user_id or (lease.user_id if lease else ""), + knowledge_graph_id=knowledge_graph_id or (lease.knowledge_graph_id if lease else ""), + mode=mode or (lease.mode if lease else ""), + status="active", + last_activity_at=now, + expires_at=now + self._session_ttl, + runtime_base_url=f"http://{self._runtime_host}:{forward_port}", + runtime_auth_token=lease.runtime_auth_token if lease else None, + ) + return None + + def is_runtime_active( + self, + *, + session_id: str, + container_id: str | None = None, + user_id: str = "", + knowledge_graph_id: str = "", + mode: str = "", + ) -> bool: + return ( + self.try_resolve_active_lease( + session_id=session_id, + container_id=container_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + ) + is not None + ) + + def _start_runtime( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + now: datetime, + bootstrap: StickySessionRuntimeBootstrap | None, + ) -> StickySessionRuntimeLease: + openshell_gateway.ensure_gateway_registered( + gateway_name=self._gateway_name, + gateway_url=self._gateway_url, + ) + sandbox_name = _sanitize_sandbox_name(session_id) + forward_port = _forward_port(session_id=session_id, base=self._forward_port_base) + runtime_auth_token = issue_runtime_auth_token() + + if bootstrap is not None: + required_scopes = { + f"tenant:{bootstrap.tenant_id}", + f"knowledge_graph:{knowledge_graph_id}", + "workload:chat", + } + if not required_scopes.issubset(set(bootstrap.credentials.scopes)): + raise ValueError("sticky session credentials scope is invalid") + if bootstrap.credentials.expires_at <= datetime.now(UTC): + raise ValueError("sticky session credentials are expired") + + openshell_sandbox.delete_sandbox(sandbox_name) + openshell_sandbox.create_sandbox( + name=sandbox_name, + image=self._sticky_image, + provider_name=self._provider_name, + ) + openshell_sandbox.emit_lifecycle( + sandbox_name=sandbox_name, + action="created", + probe=self._probe, + image=self._sticky_image, + forward_port=forward_port, + session_id=session_id, + ) + + if bootstrap is not None: + openshell_sandbox.upload_path( + sandbox_name=sandbox_name, + local_path=bootstrap.host_session_work_dir, + dest=self._container_work_mount, + ) + + openshell_sandbox.apply_policy( + sandbox_name=sandbox_name, + ui_mode=(bootstrap.ui_mode if bootstrap else None) or mode, + workload="gma", + policy_dir=self._policy_dir, + api_host=_api_host_from_base_url(bootstrap.api_base_url if bootstrap else self._api_base_url), + policy_enforcement=self._policy_enforcement, + probe=self._probe, + ) + + env = self._build_runtime_env( + session_id=session_id, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + runtime_auth_token=runtime_auth_token, + bootstrap=bootstrap, + ) + openshell_sandbox.exec_background( + sandbox_name=sandbox_name, + env=env, + command=( + "/runtime/.venv/bin/python", + "-m", + "kartograph_agent_runtime", + ), + ) + openshell_sandbox.start_forward(sandbox_name=sandbox_name, port=forward_port) + openshell_sandbox.emit_lifecycle( + sandbox_name=sandbox_name, + action="started", + probe=self._probe, + forward_port=forward_port, + session_id=session_id, + ) + + runtime_base_url = f"http://{self._runtime_host}:{forward_port}" + return StickySessionRuntimeLease( + session_id=session_id, + container_id=sandbox_name, + user_id=user_id, + knowledge_graph_id=knowledge_graph_id, + mode=mode, + status="active", + last_activity_at=now, + expires_at=now + self._session_ttl, + runtime_base_url=runtime_base_url, + runtime_auth_token=runtime_auth_token, + ) + + def _build_runtime_env( + self, + *, + session_id: str, + user_id: str, + knowledge_graph_id: str, + mode: str, + runtime_auth_token: str, + bootstrap: StickySessionRuntimeBootstrap | None, + ) -> dict[str, str]: + env: dict[str, str] = { + "KARTOGRAPH_SESSION_ID": session_id, + "KARTOGRAPH_KNOWLEDGE_GRAPH_ID": knowledge_graph_id, + "KARTOGRAPH_USER_ID": user_id, + "KARTOGRAPH_SESSION_MODE": mode, + "KARTOGRAPH_WORKSPACE_DIR": self._container_work_mount, + "KARTOGRAPH_AGENT_TURN_TIMEOUT_SECONDS": str(int(self._agent_turn_timeout_seconds)), + "KARTOGRAPH_AGENT_MAX_TURNS": str(int(self._agent_max_turns)), + "KARTOGRAPH_RUNTIME_AUTH_TOKEN": runtime_auth_token, + } + if bootstrap is not None: + env.update( + { + "KARTOGRAPH_TENANT_ID": bootstrap.tenant_id, + "KARTOGRAPH_API_BASE_URL": bootstrap.api_base_url, + } + ) + if self._vertex_enabled: + env.update( + build_vertex_container_env( + project_id=self._vertex_project_id, + region=self._vertex_region, + ) + ) + return env + + def _terminate_sandbox(self, lease: StickySessionRuntimeLease) -> None: + forward_port = _forward_port(session_id=lease.session_id, base=self._forward_port_base) + openshell_sandbox.stop_forward(sandbox_name=lease.container_id, port=forward_port) + openshell_sandbox.delete_sandbox(lease.container_id) + openshell_sandbox.emit_lifecycle( + sandbox_name=lease.container_id, + action="deleted", + probe=self._probe, + session_id=lease.session_id, + ) diff --git a/src/api/extraction/infrastructure/openshell/policies/extraction-job.yaml b/src/api/extraction/infrastructure/openshell/policies/extraction-job.yaml new file mode 100644 index 000000000..dabc52eab --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/policies/extraction-job.yaml @@ -0,0 +1,17 @@ +version: 1 +name: extraction-job +enforcement: soft +description: Network policy for batch extraction jobs executed in OpenShell sandboxes. +endpoints: + - "api:8000:read-write" + - "inference.local:443:read-write" + - "github.com:443:full" + - "*.github.com:443:full" + - "pypi.org:443:read-only" + - "files.pythonhosted.org:443:read-only" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" + - "api.anthropic.com:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/*" diff --git a/src/api/extraction/infrastructure/openshell/policies/gma-extraction-jobs.yaml b/src/api/extraction/infrastructure/openshell/policies/gma-extraction-jobs.yaml new file mode 100644 index 000000000..fa779b175 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/policies/gma-extraction-jobs.yaml @@ -0,0 +1,10 @@ +version: 1 +name: gma-extraction-jobs +enforcement: soft +description: Sticky session policy for extraction-jobs graph-management mode. +endpoints: + - "api:8000:read-write" + - "inference.local:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/jobs/*" + - "/extraction/workloads/graph/*" diff --git a/src/api/extraction/infrastructure/openshell/policies/gma-initial-schema-design.yaml b/src/api/extraction/infrastructure/openshell/policies/gma-initial-schema-design.yaml new file mode 100644 index 000000000..b377dd7f6 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/policies/gma-initial-schema-design.yaml @@ -0,0 +1,10 @@ +version: 1 +name: gma-initial-schema-design +enforcement: soft +description: Sticky session policy for initial schema design mode. +endpoints: + - "api:8000:read-write" + - "inference.local:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/schema/*" + - "/extraction/workloads/graph/*" diff --git a/src/api/extraction/infrastructure/openshell/policies/gma-one-off-mutations.yaml b/src/api/extraction/infrastructure/openshell/policies/gma-one-off-mutations.yaml new file mode 100644 index 000000000..6c8de4132 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/policies/gma-one-off-mutations.yaml @@ -0,0 +1,10 @@ +version: 1 +name: gma-one-off-mutations +enforcement: soft +description: Sticky session policy for one-off graph mutations mode. +endpoints: + - "api:8000:read-write" + - "inference.local:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/mutations/*" + - "/extraction/workloads/graph/*" diff --git a/src/api/extraction/infrastructure/openshell/policies/gma-sticky-base.yaml b/src/api/extraction/infrastructure/openshell/policies/gma-sticky-base.yaml new file mode 100644 index 000000000..2f7526e05 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/policies/gma-sticky-base.yaml @@ -0,0 +1,9 @@ +version: 1 +name: gma-sticky-base +enforcement: soft +description: Base network policy for graph-management sticky sessions. +endpoints: + - "api:8000:read-write" + - "inference.local:443:read-write" +l7_allowed_paths: + - "/extraction/workloads/*" diff --git a/src/api/extraction/infrastructure/openshell/policy.py b/src/api/extraction/infrastructure/openshell/policy.py new file mode 100644 index 000000000..537d738ca --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/policy.py @@ -0,0 +1,111 @@ +"""OpenShell network policy resolution for GMA and extraction workloads.""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Literal + +import yaml + +from extraction.domain.value_objects import GraphManagementUiMode + +PolicyEnforcement = Literal["soft", "hard_requirement"] + +_BUNDLED_POLICY_DIR = Path(__file__).resolve().parent / "policies" + +_DEFAULT_API_ENDPOINT = "api:8000:read-write" +_DEFAULT_INFERENCE_ENDPOINT = "inference.local:443:read-write" + +_MODE_POLICY_FILES: dict[str, str] = { + GraphManagementUiMode.INITIAL_SCHEMA_DESIGN.value: "gma-initial-schema-design.yaml", + GraphManagementUiMode.EXTRACTION_JOBS.value: "gma-extraction-jobs.yaml", + GraphManagementUiMode.ONE_OFF_MUTATIONS.value: "gma-one-off-mutations.yaml", +} + + +def bundled_policy_dir() -> Path: + return _BUNDLED_POLICY_DIR + + +def resolve_policy_path( + *, + ui_mode: str | None = None, + workload: Literal["gma", "extraction_job"] = "gma", + policy_dir: str | None = None, +) -> Path: + base = Path(policy_dir) if policy_dir else _BUNDLED_POLICY_DIR + if workload == "extraction_job": + return base / "extraction-job.yaml" + if ui_mode and ui_mode in _MODE_POLICY_FILES: + return base / _MODE_POLICY_FILES[ui_mode] + return base / "gma-sticky-base.yaml" + + +def load_policy_yaml(path: Path) -> dict: + if not path.is_file(): + return {} + with path.open(encoding="utf-8") as handle: + data = yaml.safe_load(handle) + return data if isinstance(data, dict) else {} + + +def resolve_endpoints( + *, + ui_mode: str | None = None, + workload: Literal["gma", "extraction_job"] = "gma", + policy_dir: str | None = None, + api_host: str | None = None, +) -> tuple[str, ...]: + """Return OpenShell ``policy update --add-endpoint`` strings.""" + path = resolve_policy_path(ui_mode=ui_mode, workload=workload, policy_dir=policy_dir) + document = load_policy_yaml(path) + raw = document.get("endpoints") + if isinstance(raw, list) and raw: + endpoints = [str(item).strip() for item in raw if str(item).strip()] + else: + endpoints = [_DEFAULT_API_ENDPOINT, _DEFAULT_INFERENCE_ENDPOINT] + + if api_host: + rewritten: list[str] = [] + for endpoint in endpoints: + parts = endpoint.split(":") + if parts and parts[0] == "api" and len(parts) >= 3: + access_and_rest = ":".join(parts[2:]) + rewritten.append(f"{api_host}:{access_and_rest}") + else: + rewritten.append(endpoint) + endpoints = rewritten + return tuple(endpoints) + + +def resolve_enforcement( + *, + ui_mode: str | None = None, + workload: Literal["gma", "extraction_job"] = "gma", + policy_dir: str | None = None, + default: PolicyEnforcement = "soft", +) -> PolicyEnforcement: + path = resolve_policy_path(ui_mode=ui_mode, workload=workload, policy_dir=policy_dir) + document = load_policy_yaml(path) + configured = str(document.get("enforcement", default)).strip() + if configured in {"soft", "hard_requirement"}: + return configured # type: ignore[return-value] + env_override = os.getenv("KARTOGRAPH_OPENSHELL_POLICY_ENFORCEMENT", "").strip() + if env_override in {"soft", "hard_requirement"}: + return env_override # type: ignore[return-value] + return default + + +def resolve_l7_paths( + *, + ui_mode: str | None = None, + workload: Literal["gma", "extraction_job"] = "gma", + policy_dir: str | None = None, +) -> tuple[str, ...]: + path = resolve_policy_path(ui_mode=ui_mode, workload=workload, policy_dir=policy_dir) + document = load_policy_yaml(path) + raw = document.get("l7_allowed_paths") + if not isinstance(raw, list): + return () + return tuple(str(item).strip() for item in raw if str(item).strip()) diff --git a/src/api/extraction/infrastructure/openshell/sandbox.py b/src/api/extraction/infrastructure/openshell/sandbox.py new file mode 100644 index 000000000..85497d13f --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/sandbox.py @@ -0,0 +1,198 @@ +"""OpenShell sandbox lifecycle operations.""" + +from __future__ import annotations + +import re +import shlex +from pathlib import Path + +from extraction.infrastructure.openshell.audit import ( + LoggingOpenShellRuntimeProbe, + OpenShellPolicyAppliedObservation, + OpenShellRuntimeProbe, + OpenShellSandboxLifecycleObservation, +) +from extraction.infrastructure.openshell.cli import popen_openshell, run_openshell +from extraction.infrastructure.openshell.policy import resolve_endpoints, resolve_enforcement + +_CONTAINER_NAME_SAFE = re.compile(r"[^a-zA-Z0-9_.-]+") + + +def sanitize_sandbox_name(prefix: str, identifier: str) -> str: + cleaned = _CONTAINER_NAME_SAFE.sub("-", identifier).strip("-") + name = f"{prefix}{cleaned}" + return name[:63].rstrip("-_.") or f"{prefix}runtime" + + +def sandbox_exists(name: str) -> bool: + result = run_openshell(["sandbox", "get", name], check=False) + return result.returncode == 0 + + +def create_sandbox( + *, + name: str, + image: str, + provider_name: str | None = None, +) -> None: + args = [ + "sandbox", + "create", + "--name", + name, + "--no-tty", + "--no-auto-providers", + ] + if provider_name: + args.extend(["--provider", provider_name]) + args.extend(["--from", image, "--", "sleep", "infinity"]) + run_openshell(args, timeout=300.0) + + +def delete_sandbox(name: str) -> None: + if not sandbox_exists(name): + return + run_openshell(["sandbox", "delete", name], check=False, timeout=120.0) + + +def upload_path(*, sandbox_name: str, local_path: str, dest: str | None = None) -> None: + args = ["sandbox", "upload", "--no-git-ignore", sandbox_name, local_path] + if dest: + args.extend(["--dest", dest]) + run_openshell(args, timeout=600.0) + + +def apply_policy( + *, + sandbox_name: str, + ui_mode: str | None = None, + workload: str = "gma", + policy_dir: str | None = None, + api_host: str | None = None, + policy_enforcement: str = "soft", + probe: OpenShellRuntimeProbe | None = None, +) -> None: + endpoints = resolve_endpoints( + ui_mode=ui_mode, + workload=workload, # type: ignore[arg-type] + policy_dir=policy_dir, + api_host=api_host, + ) + enforcement = resolve_enforcement( + ui_mode=ui_mode, + workload=workload, # type: ignore[arg-type] + policy_dir=policy_dir, + default=policy_enforcement, # type: ignore[arg-type] + ) + if not endpoints: + return + + args = [ + "policy", + "update", + "--wait", + "--binary", + "/runtime/.venv/bin/python", + ] + if enforcement == "hard_requirement": + args.extend(["--enforcement", "hard_requirement"]) + for endpoint in endpoints: + args.extend(["--add-endpoint", endpoint]) + args.append(sandbox_name) + run_openshell(args, timeout=120.0) + + resolved_probe = probe or LoggingOpenShellRuntimeProbe() + policy_name = ui_mode or workload + resolved_probe.policy_applied( + OpenShellPolicyAppliedObservation( + sandbox_name=sandbox_name, + policy_name=policy_name, + enforcement=enforcement, + endpoint_count=len(endpoints), + ui_mode=ui_mode, + ) + ) + + +def start_forward(*, sandbox_name: str, port: int) -> None: + run_openshell( + ["forward", "start", str(port), sandbox_name, "-d"], + timeout=30.0, + ) + + +def stop_forward(*, sandbox_name: str, port: int) -> None: + run_openshell( + ["forward", "stop", str(port), sandbox_name], + check=False, + timeout=30.0, + ) + + +def exec_background( + *, + sandbox_name: str, + env: dict[str, str], + command: tuple[str, ...], +) -> None: + exports = " ".join(f"export {key}={shlex.quote(value)};" for key, value in sorted(env.items())) + shell_command = f"{exports} exec {' '.join(shlex.quote(part) for part in command)}" + run_openshell( + [ + "sandbox", + "exec", + "--name", + sandbox_name, + "--no-tty", + "--", + "bash", + "-lc", + f"nohup {shell_command} >/tmp/agent-runtime.log 2>&1 &", + ], + timeout=60.0, + ) + + +def write_env_script( + *, + sandbox_name: str, + env: dict[str, str], + dest: str = "/tmp/kartograph-runtime-env.sh", +) -> None: + lines = [f"export {key}={shlex.quote(value)}" for key, value in sorted(env.items())] + script = "\n".join(lines) + "\n" + local = Path("/tmp") / f"{sandbox_name}-env.sh" + local.write_text(script, encoding="utf-8") + try: + upload_path(sandbox_name=sandbox_name, local_path=str(local), dest=dest) + finally: + local.unlink(missing_ok=True) + + +def emit_lifecycle( + *, + sandbox_name: str, + action: str, + probe: OpenShellRuntimeProbe | None = None, + image: str | None = None, + forward_port: int | None = None, + session_id: str | None = None, + job_id: str | None = None, +) -> None: + resolved_probe = probe or LoggingOpenShellRuntimeProbe() + resolved_probe.sandbox_lifecycle( + OpenShellSandboxLifecycleObservation( + sandbox_name=sandbox_name, + action=action, + image=image, + forward_port=forward_port, + session_id=session_id, + job_id=job_id, + ) + ) + + +def exec_streaming(*, sandbox_name: str, command: list[str]): + return popen_openshell( + ["sandbox", "exec", "--name", sandbox_name, "--no-tty", "--", *command] + ) diff --git a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py new file mode 100644 index 000000000..d03523f57 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py @@ -0,0 +1,282 @@ +"""Run extraction jobs inside OpenShell sandboxes.""" + +from __future__ import annotations + +import json +import os +import shlex +import tempfile +import time +from datetime import UTC, datetime +from pathlib import Path +from typing import Any +from urllib.parse import urlparse + +from agentic_ci import otel +from agentic_ci.harness import create_harness + +from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.infrastructure.extraction_job_activity import ( + activity_log_path, + append_activity_line, + append_activity_message, + format_activity_log_line, + format_claude_code_stream_line, +) +from extraction.infrastructure.extraction_job_metrics import merge_extraction_job_metrics +from extraction.infrastructure.extraction_job_prompt import ( + EXTRACTION_JOB_INVOKE_PROMPT, + build_extraction_job_prompt, + write_extraction_prompt_file, +) +from extraction.infrastructure.extraction_job_verdict import require_successful_apply +from extraction.infrastructure.extraction_job_workdir_materializer import ( + ExtractionJobWorkdirMaterializer, +) +from extraction.infrastructure.openshell import gateway as openshell_gateway +from extraction.infrastructure.openshell import sandbox as openshell_sandbox +from extraction.infrastructure.openshell.audit import LoggingOpenShellRuntimeProbe +from extraction.infrastructure.workload_runtime_factory import get_workload_credential_issuer +from extraction.infrastructure.workload_runtime_settings import ( + ExtractionWorkloadRuntimeSettings, + get_extraction_workload_runtime_settings, +) +from extraction.ports.extraction_job_runner import IExtractionJobRunner + + +def _strip_harness_binary(command: list[str]) -> list[str]: + if command and command[0] in {"claude", "opencode"}: + return command[1:] + return command + + +def _patch_job_context_api_base(workdir: Path, api_base_url: str) -> None: + context_path = workdir / "job-context.json" + context = json.loads(context_path.read_text(encoding="utf-8")) + context["api_base_url"] = api_base_url.rstrip("/") + context_path.write_text(json.dumps(context, indent=2) + "\n", encoding="utf-8") + + +def _api_host_from_base_url(api_base_url: str) -> str: + parsed = urlparse(api_base_url) + if parsed.hostname: + port_suffix = f":{parsed.port}" if parsed.port else "" + return f"{parsed.hostname}{port_suffix}" + return "api:8000" + + +class OpenShellExtractionJobRunner(IExtractionJobRunner): + """Execute one extraction job inside an OpenShell sandbox with network policy.""" + + def __init__( + self, + *, + settings: ExtractionWorkloadRuntimeSettings | None = None, + workdir_materializer: ExtractionJobWorkdirMaterializer | None = None, + ) -> None: + self._settings = settings or get_extraction_workload_runtime_settings() + self._workdir_materializer = workdir_materializer + self._harness = create_harness(self._settings.agentic_ci_harness) + self._probe = LoggingOpenShellRuntimeProbe() + + async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: + if self._workdir_materializer is None: + raise RuntimeError("OpenShellExtractionJobRunner requires a workdir materializer") + credentials = get_workload_credential_issuer().issue( + tenant_id=tenant_id, + knowledge_graph_id=job.knowledge_graph_id, + extra_scopes=("workload:chat",), + ) + workdir = await self._workdir_materializer.prepare( + job=job, + tenant_id=tenant_id, + credentials=credentials, + ) + _patch_job_context_api_base(workdir, self._settings.api_base_url) + prompt = build_extraction_job_prompt(job=job) + return await self._run_in_sandbox(job=job, workdir=workdir, prompt=prompt) + + async def _run_in_sandbox( + self, + *, + job: ExtractionJobRecord, + workdir: Path, + prompt: str, + ) -> dict[str, Any]: + import asyncio + + return await asyncio.to_thread(self._run_in_sandbox_sync, job, workdir, prompt) + + def _run_in_sandbox_sync( + self, + job: ExtractionJobRecord, + workdir: Path, + prompt: str, + ) -> dict[str, Any]: + sandbox_name = openshell_sandbox.sanitize_sandbox_name("kartograph-extract-", job.job_id) + run_dir = tempfile.mkdtemp(prefix="kartograph-openshell-") + otel_proc = None + otel_log: Path | None = None + + try: + openshell_gateway.ensure_gateway_registered( + gateway_name=self._settings.openshell_gateway_name, + gateway_url=self._settings.openshell_gateway_url, + ) + openshell_sandbox.delete_sandbox(sandbox_name) + openshell_sandbox.create_sandbox( + name=sandbox_name, + image=self._settings.agentic_ci_image, + provider_name=self._settings.openshell_provider_name, + ) + openshell_sandbox.emit_lifecycle( + sandbox_name=sandbox_name, + action="created", + probe=self._probe, + image=self._settings.agentic_ci_image, + job_id=job.job_id, + ) + openshell_sandbox.upload_path( + sandbox_name=sandbox_name, + local_path=str(workdir), + dest="/workspace", + ) + openshell_sandbox.apply_policy( + sandbox_name=sandbox_name, + workload="extraction_job", + policy_dir=self._settings.openshell_policy_dir or None, + api_host=_api_host_from_base_url(self._settings.api_base_url), + policy_enforcement=self._settings.openshell_policy_enforcement, + probe=self._probe, + ) + + otel_proc, otel_port, otel_log_path, _otel_rate = otel.start_collector(run_dir) + otel_log = Path(otel_log_path) + write_extraction_prompt_file(workdir=workdir, prompt=prompt) + model = self._resolve_model() + command = _strip_harness_binary( + self._harness.build_args(EXTRACTION_JOB_INVOKE_PROMPT, model) + ) + env_script = self._build_env_script(model=model, otel_port=otel_port) + log_path = activity_log_path(workdir) + append_activity_line(log_path, f"📡 Processing job {job.job_id} in OpenShell sandbox...") + rc = self._run_agent( + sandbox_name=sandbox_name, + env_script=env_script, + command=command, + timeout_seconds=self._settings.agentic_ci_timeout_seconds, + activity_log_path=log_path, + ) + append_activity_line(log_path, f"✅ OpenShell sandbox finished with exit code {rc}") + if otel_proc is not None: + otel.stop_collector(otel_proc) + otel_proc = None + metrics = merge_extraction_job_metrics( + otel_log=otel_log, + workdir=workdir, + activity_log=log_path, + ) + if rc != 0: + raise RuntimeError( + f"OpenShell extraction sandbox exited with code {rc} for job {job.job_id}" + ) + verdict = require_successful_apply(workdir) + append_activity_message( + log_path, + kind="done", + text=( + f"Applied {verdict.operations_applied} graph mutation operation(s) " + "via workload API." + ), + ) + metrics["operations_applied"] = verdict.operations_applied + return metrics + finally: + if otel_proc is not None: + otel.stop_collector(otel_proc) + openshell_sandbox.delete_sandbox(sandbox_name) + openshell_sandbox.emit_lifecycle( + sandbox_name=sandbox_name, + action="deleted", + probe=self._probe, + job_id=job.job_id, + ) + + def _resolve_model(self) -> str: + configured = self._settings.agentic_ci_model.strip() + if configured: + return configured + model_env = self._harness.model_env_var() + from_env = os.environ.get(model_env, "").strip() + if from_env: + return from_env + return self._harness.default_model() + + def _build_env_script(self, *, model: str, otel_port: int) -> str: + lines = self._harness.build_env_script_lines(otel_port, None) + lines.append("export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1") + lines.append(f"export AGENT_MODEL={shlex.quote(model)}") + lines.append("cd /workspace") + return "\n".join(lines) + "\n" + + def _run_agent( + self, + *, + sandbox_name: str, + env_script: str, + command: list[str], + timeout_seconds: int, + activity_log_path: Path, + ) -> int: + shell = ( + f"cat > /tmp/.kartograph-env.sh <<'EOF'\n{env_script}EOF\n" + f". /tmp/.kartograph-env.sh && exec {' '.join(shlex.quote(part) for part in command)}" + ) + started = time.monotonic() + proc = openshell_sandbox.exec_streaming( + sandbox_name=sandbox_name, + command=["bash", "-lc", shell], + ) + captured_tail: list[str] = [] + stream_log_path = activity_log_path.parent / "agent_stream.jsonl" + try: + assert proc.stdout is not None + with activity_log_path.open("a", encoding="utf-8") as log_handle, stream_log_path.open( + "a", + encoding="utf-8", + ) as stream_handle: + for line in proc.stdout: + if time.monotonic() - started > timeout_seconds: + proc.kill() + append_activity_message( + activity_log_path, + kind="error", + text=f"OpenShell sandbox timed out after {timeout_seconds}s", + ) + raise RuntimeError( + f"OpenShell extraction sandbox timed out after {timeout_seconds}s" + ) + cleaned = line.rstrip("\n") + if not cleaned: + continue + if cleaned.startswith("{"): + stream_handle.write(cleaned + "\n") + stream_handle.flush() + parsed = format_claude_code_stream_line(cleaned) + if parsed: + ts = datetime.now(UTC).isoformat() + for kind, text in parsed: + log_handle.write(f"{ts} {format_activity_log_line(kind=kind, text=text)}\n") + captured_tail.append(text) + else: + ts = datetime.now(UTC).isoformat() + log_handle.write(f"{ts} {format_activity_log_line(kind='info', text=cleaned)}\n") + captured_tail.append(cleaned) + log_handle.flush() + if len(captured_tail) > 20: + captured_tail.pop(0) + rc = proc.wait(timeout=30) + except Exception: + proc.kill() + raise + return int(rc) diff --git a/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py index f6d78ed79..ec18ec151 100644 --- a/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py +++ b/src/api/extraction/infrastructure/remote_sticky_container_chat_agent.py @@ -11,6 +11,7 @@ from extraction.domain.entities.agent_session import ExtractionAgentSession from extraction.domain.value_objects import GraphManagementUiMode +from extraction.infrastructure.runtime_session_auth import RUNTIME_AUTH_HEADER from extraction.infrastructure.workload_runtime_settings import ( get_extraction_workload_runtime_settings, ) @@ -57,11 +58,15 @@ async def stream_turn( if workload_token and workload_token.strip(): payload["workload_token"] = workload_token.strip() url = f"{runtime_base_url.rstrip('/')}/v1/turn" + runtime_auth_token = sticky_runtime.get("runtime_auth_token") + headers: dict[str, str] = {} + if isinstance(runtime_auth_token, str) and runtime_auth_token.strip(): + headers[RUNTIME_AUTH_HEADER] = runtime_auth_token.strip() try: timeout = httpx.Timeout(10.0, read=self._request_timeout_seconds) async with httpx.AsyncClient(timeout=timeout) as client: - async with client.stream("POST", url, json=payload) as response: + async with client.stream("POST", url, json=payload, headers=headers) as response: if response.status_code >= 400: body = await response.aread() detail = body.decode("utf-8", errors="replace") diff --git a/src/api/extraction/infrastructure/runtime_session_auth.py b/src/api/extraction/infrastructure/runtime_session_auth.py new file mode 100644 index 000000000..d2f296962 --- /dev/null +++ b/src/api/extraction/infrastructure/runtime_session_auth.py @@ -0,0 +1,19 @@ +"""Session-bound auth tokens for sticky agent runtime HTTP endpoints.""" + +from __future__ import annotations + +import secrets + +RUNTIME_AUTH_HEADER = "X-Kartograph-Runtime-Auth" + + +def issue_runtime_auth_token() -> str: + """Return a high-entropy token bound to one sticky runtime container.""" + return secrets.token_urlsafe(32) + + +def runtime_auth_matches(*, expected: str, provided: str) -> bool: + """Constant-time comparison for runtime auth header values.""" + if not expected or not provided: + return False + return secrets.compare_digest(expected.strip(), provided.strip()) diff --git a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py index ab91d3901..ede499e33 100644 --- a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py +++ b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py @@ -54,8 +54,9 @@ async def build( knowledge_graph_id: str, session_id: str, include_job_packages: bool, + ui_mode: str | None = None, ) -> StickySessionRuntimeBootstrap | None: - if self._runtime_settings.backend != "container": + if self._runtime_settings.backend not in {"container", "openshell"}: return None job_packages: tuple[PreparedJobPackageSource, ...] = () @@ -83,4 +84,5 @@ async def build( credentials=credentials, host_session_work_dir=str(host_session_work_dir), api_base_url=self._runtime_settings.api_base_url, + ui_mode=ui_mode, ) \ No newline at end of file diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py index b1eb7d1bb..d989293f4 100644 --- a/src/api/extraction/infrastructure/workload_runtime_factory.py +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -9,6 +9,9 @@ ContainerEphemeralExtractionWorkerLauncher, ContainerStickySessionRuntimeManager, ) +from extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager import ( + OpenShellStickySessionRuntimeManager, +) from extraction.infrastructure.deterministic_chat_agent import DeterministicExtractionChatAgent from extraction.infrastructure.remote_sticky_container_chat_agent import ( RemoteStickyContainerChatAgent, @@ -59,7 +62,7 @@ def create_extraction_chat_agent( ) -> IExtractionChatAgent: """Build chat agent implementation for configured runtime backend.""" resolved = settings or get_extraction_workload_runtime_settings() - if resolved.backend == "container": + if resolved.backend in {"container", "openshell"}: return RemoteStickyContainerChatAgent() return DeterministicExtractionChatAgent() @@ -74,6 +77,27 @@ def create_sticky_session_runtime_manager( session_ttl=timedelta(minutes=resolved.session_ttl_minutes) ) + if resolved.backend == "openshell": + return OpenShellStickySessionRuntimeManager( + sticky_image=resolved.sticky_image, + session_ttl=timedelta(minutes=resolved.session_ttl_minutes), + sticky_service_port=resolved.sticky_service_port, + container_work_mount=resolved.container_work_mount, + vertex_project_id=resolved.vertex_project_id, + vertex_region=resolved.vertex_region, + vertex_enabled=resolved.vertex_enabled(), + agent_turn_timeout_seconds=resolved.sticky_turn_timeout_seconds, + agent_max_turns=resolved.sticky_max_turns, + api_base_url=resolved.api_base_url, + gateway_name=resolved.openshell_gateway_name, + gateway_url=resolved.openshell_gateway_url, + provider_name=resolved.openshell_provider_name, + runtime_host=resolved.openshell_runtime_host, + forward_port_base=resolved.openshell_forward_port_base, + policy_dir=resolved.openshell_policy_dir or None, + policy_enforcement=resolved.openshell_policy_enforcement, + ) + container_runtime = create_container_runtime(resolved.container_engine) return ContainerStickySessionRuntimeManager( container_runtime=container_runtime, @@ -92,6 +116,13 @@ def create_sticky_session_runtime_manager( container_run_gid=resolved.container_run_gid, agent_turn_timeout_seconds=resolved.sticky_turn_timeout_seconds, agent_max_turns=resolved.sticky_max_turns, + container_hardening_enabled=resolved.container_hardening_enabled, + container_cap_drop_all=resolved.container_cap_drop_all, + container_read_only_rootfs=resolved.container_read_only_rootfs, + container_no_new_privileges=resolved.container_no_new_privileges, + container_pids_limit=resolved.container_pids_limit, + container_memory_limit=resolved.container_memory_limit, + container_tmpfs_mounts=resolved.container_tmpfs_mounts, ) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index bdc47537c..bb485f2b3 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -22,8 +22,8 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): extra="ignore", ) - backend: Literal["memory", "container"] = Field(default="memory") - job_runner: Literal["stub", "agentic_ci"] | None = Field(default=None) + backend: Literal["memory", "container", "openshell"] = Field(default="memory") + job_runner: Literal["stub", "agentic_ci", "openshell"] | None = Field(default=None) container_engine: Literal["auto", "docker", "podman"] = Field(default="auto") container_network: str | None = Field(default=None) sticky_image: str = Field(default="kartograph-agent-runtime:dev") @@ -70,6 +70,40 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): gcloud_config_container_path: str = Field(default="/gcloud/config") container_run_uid: int | None = Field(default=None) container_run_gid: int | None = Field(default=None) + container_hardening_enabled: bool = Field(default=True) + container_cap_drop_all: bool = Field(default=True) + container_read_only_rootfs: bool = Field(default=True) + container_no_new_privileges: bool = Field(default=True) + container_pids_limit: int | None = Field(default=256, ge=32, le=4096) + container_memory_limit: str | None = Field(default="2g") + container_tmpfs_mounts: tuple[str, ...] = Field( + default=("/tmp:rw,noexec,nosuid,size=512m",), + ) + openshell_gateway_name: str = Field(default="openshell") + openshell_gateway_url: str = Field( + default="https://127.0.0.1:17670", + description=( + "OpenShell gateway endpoint for CLI registration. " + "Use https://host.docker.internal:17670 when the API runs inside compose." + ), + ) + openshell_provider_name: str = Field(default="kartograph-gma") + openshell_runtime_host: str = Field( + default="host.docker.internal", + description=( + "Host reachable from the API process for OpenShell port forwards. " + "Use host.docker.internal in compose dev; 127.0.0.1 when API runs on host." + ), + ) + openshell_forward_port_base: int = Field(default=18787, ge=1024, le=65000) + openshell_policy_dir: str = Field( + default="", + description="Directory containing OpenShell policy YAML files. Empty uses bundled defaults.", + ) + openshell_policy_enforcement: Literal["soft", "hard_requirement"] = Field( + default="soft", + description="Landlock enforcement mode for OpenShell policies (hard_requirement in prod).", + ) def vertex_enabled(self) -> bool: return vertex_enabled_from_env() @@ -77,11 +111,12 @@ def vertex_enabled(self) -> bool: @model_validator(mode="after") def _apply_vertex_env_aliases(self) -> "ExtractionWorkloadRuntimeSettings": if self.job_runner is None: - object.__setattr__( - self, - "job_runner", - "agentic_ci" if self.backend == "container" else "stub", - ) + if self.backend == "openshell": + object.__setattr__(self, "job_runner", "openshell") + elif self.backend == "container": + object.__setattr__(self, "job_runner", "agentic_ci") + else: + object.__setattr__(self, "job_runner", "stub") if not self.vertex_project_id: object.__setattr__( self, @@ -121,7 +156,7 @@ def _apply_vertex_env_aliases(self) -> "ExtractionWorkloadRuntimeSettings": break return self - @field_validator("sticky_command", "worker_command", mode="before") + @field_validator("sticky_command", "worker_command", "container_tmpfs_mounts", mode="before") @classmethod def _parse_command(cls, value: object) -> tuple[str, ...]: if isinstance(value, tuple): diff --git a/src/api/extraction/ports/runtime.py b/src/api/extraction/ports/runtime.py index a4e9c31fd..78c4fa71f 100644 --- a/src/api/extraction/ports/runtime.py +++ b/src/api/extraction/ports/runtime.py @@ -20,6 +20,7 @@ class StickySessionRuntimeLease: last_activity_at: datetime expires_at: datetime runtime_base_url: str | None = None + runtime_auth_token: str | None = None @dataclass(frozen=True) @@ -30,6 +31,7 @@ class StickySessionRuntimeBootstrap: credentials: ScopedWorkloadCredentials host_session_work_dir: str api_base_url: str + ui_mode: str | None = None @dataclass(frozen=True) diff --git a/src/api/extraction/ports/sticky_session_bootstrap.py b/src/api/extraction/ports/sticky_session_bootstrap.py index 35fc4b0b6..cb98b9ddd 100644 --- a/src/api/extraction/ports/sticky_session_bootstrap.py +++ b/src/api/extraction/ports/sticky_session_bootstrap.py @@ -17,6 +17,7 @@ async def build( knowledge_graph_id: str, session_id: str, include_job_packages: bool, + ui_mode: str | None = None, ) -> StickySessionRuntimeBootstrap | None: """Return bootstrap payload when container runtime is enabled.""" ... diff --git a/src/api/shared_kernel/container_runtime/cli_runtime.py b/src/api/shared_kernel/container_runtime/cli_runtime.py index 400de71dc..d4e00c6d7 100644 --- a/src/api/shared_kernel/container_runtime/cli_runtime.py +++ b/src/api/shared_kernel/container_runtime/cli_runtime.py @@ -38,6 +38,20 @@ def run(self, spec: ContainerRunSpec) -> ContainerRunResult: command.extend(["--network", spec.network]) if spec.user is not None: command.extend(["--user", spec.user]) + if spec.cap_drop_all: + command.extend(["--cap-drop", "ALL"]) + if spec.read_only_rootfs: + command.append("--read-only") + if spec.no_new_privileges: + command.extend(["--security-opt", "no-new-privileges"]) + for opt in spec.security_opt: + command.extend(["--security-opt", opt]) + if spec.pids_limit is not None: + command.extend(["--pids-limit", str(spec.pids_limit)]) + if spec.memory_limit is not None: + command.extend(["--memory", spec.memory_limit]) + for mount in spec.tmpfs_mounts: + command.extend(["--tmpfs", mount]) command.append(spec.image) if spec.command: command.extend(spec.command) diff --git a/src/api/shared_kernel/container_runtime/ports.py b/src/api/shared_kernel/container_runtime/ports.py index 3e7e83239..f26be1829 100644 --- a/src/api/shared_kernel/container_runtime/ports.py +++ b/src/api/shared_kernel/container_runtime/ports.py @@ -24,6 +24,13 @@ class ContainerRunSpec: detach: bool = True remove_on_exit: bool = False user: str | None = None + cap_drop_all: bool = False + read_only_rootfs: bool = False + no_new_privileges: bool = False + pids_limit: int | None = None + memory_limit: str | None = None + tmpfs_mounts: tuple[str, ...] = field(default_factory=tuple) + security_opt: tuple[str, ...] = field(default_factory=tuple) @dataclass(frozen=True) diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_policy.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_policy.py new file mode 100644 index 000000000..ddbe1e2d6 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_policy.py @@ -0,0 +1,39 @@ +"""Unit tests for OpenShell policy resolution.""" + +from __future__ import annotations + +from extraction.infrastructure.openshell.policy import ( + bundled_policy_dir, + resolve_endpoints, + resolve_enforcement, + resolve_l7_paths, + resolve_policy_path, +) + + +def test_resolve_policy_path_by_ui_mode() -> None: + path = resolve_policy_path(ui_mode="initial-schema-design") + assert path.name == "gma-initial-schema-design.yaml" + + +def test_resolve_endpoints_rewrites_api_host() -> None: + endpoints = resolve_endpoints( + ui_mode="one-off-mutations", + api_host="kartograph-api:8000", + ) + assert "kartograph-api:8000:read-write" in endpoints + assert "inference.local:443:read-write" in endpoints + + +def test_resolve_enforcement_from_bundled_policy() -> None: + enforcement = resolve_enforcement(ui_mode="initial-schema-design") + assert enforcement in {"soft", "hard_requirement"} + + +def test_resolve_l7_paths_for_extraction_jobs_mode() -> None: + paths = resolve_l7_paths(ui_mode="extraction-jobs") + assert any("jobs" in path for path in paths) + + +def test_bundled_policy_dir_exists() -> None: + assert bundled_policy_dir().is_dir() diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_sticky_session_runtime_manager.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_sticky_session_runtime_manager.py new file mode 100644 index 000000000..2819bf080 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_sticky_session_runtime_manager.py @@ -0,0 +1,115 @@ +"""Unit tests for OpenShell sticky session runtime manager.""" + +from __future__ import annotations + +from datetime import timedelta +from unittest.mock import MagicMock, patch + +from extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager import ( + OpenShellStickySessionRuntimeManager, +) +from extraction.infrastructure.workload_runtime import ScopedWorkloadCredentialIssuer +from extraction.ports.runtime import StickySessionRuntimeBootstrap + + +class TestOpenShellStickySessionRuntimeManager: + def test_start_runtime_issues_auth_token_and_runtime_url(self) -> None: + manager = OpenShellStickySessionRuntimeManager( + sticky_image="kartograph-agent-runtime:dev", + session_ttl=timedelta(minutes=30), + runtime_host="host.docker.internal", + forward_port_base=18787, + ) + issuer = ScopedWorkloadCredentialIssuer(default_ttl=timedelta(minutes=10)) + credentials = issuer.issue_for_sticky_session( + tenant_id="tenant-1", + knowledge_graph_id="kg-1", + session_id="session-1", + ) + bootstrap = StickySessionRuntimeBootstrap( + tenant_id="tenant-1", + credentials=credentials, + host_session_work_dir="/tmp/session-work", + api_base_url="http://api:8000", + ui_mode="initial-schema-design", + ) + + with patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_gateway.ensure_gateway_registered" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.delete_sandbox" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.create_sandbox" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.upload_path" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.apply_policy" + ) as apply_policy, patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.exec_background" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.start_forward" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.emit_lifecycle" + ): + lease = manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="graph_management", + bootstrap=bootstrap, + ) + + assert lease.runtime_auth_token + assert lease.runtime_base_url.startswith("http://host.docker.internal:") + apply_policy.assert_called_once() + assert apply_policy.call_args.kwargs["ui_mode"] == "initial-schema-design" + + def test_terminate_runtime_deletes_sandbox(self) -> None: + manager = OpenShellStickySessionRuntimeManager( + sticky_image="kartograph-agent-runtime:dev", + session_ttl=timedelta(minutes=30), + ) + probe = MagicMock() + manager._probe = probe # noqa: SLF001 + with patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.sandbox_exists", + return_value=True, + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_gateway.ensure_gateway_registered" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.delete_sandbox" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.create_sandbox" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.apply_policy" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.exec_background" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.start_forward" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.emit_lifecycle" + ): + manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="graph_management", + bootstrap=None, + ) + + with patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.stop_forward" + ) as stop_forward, patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.delete_sandbox" + ) as delete_sandbox, patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.emit_lifecycle" + ): + manager.terminate_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="graph_management", + ) + + stop_forward.assert_called_once() + delete_sandbox.assert_called_once() diff --git a/src/api/tests/unit/extraction/infrastructure/test_runtime_session_auth.py b/src/api/tests/unit/extraction/infrastructure/test_runtime_session_auth.py new file mode 100644 index 000000000..50b90aa04 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_runtime_session_auth.py @@ -0,0 +1,29 @@ +"""Unit tests for sticky runtime session auth tokens.""" + +from __future__ import annotations + +from extraction.infrastructure.runtime_session_auth import ( + RUNTIME_AUTH_HEADER, + issue_runtime_auth_token, + runtime_auth_matches, +) + + +def test_issue_runtime_auth_token_is_unique_and_non_empty() -> None: + first = issue_runtime_auth_token() + second = issue_runtime_auth_token() + assert first + assert second + assert first != second + + +def test_runtime_auth_matches_rejects_missing_or_mismatched_values() -> None: + token = issue_runtime_auth_token() + assert runtime_auth_matches(expected=token, provided=token) + assert not runtime_auth_matches(expected=token, provided="wrong") + assert not runtime_auth_matches(expected="", provided=token) + assert not runtime_auth_matches(expected=token, provided="") + + +def test_runtime_auth_header_constant() -> None: + assert RUNTIME_AUTH_HEADER == "X-Kartograph-Runtime-Auth" diff --git a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py index 0476e10ef..f08fd82c9 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py +++ b/src/api/tests/unit/extraction/infrastructure/test_sticky_session_container_bootstrap.py @@ -54,7 +54,14 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: spec: ContainerRunSpec = runtime.run.call_args.args[0] assert spec.command == () assert spec.network == "kartograph_kartograph" - assert spec.env["KARTOGRAPH_WORKLOAD_TOKEN"] == credentials.token + assert "KARTOGRAPH_WORKLOAD_TOKEN" not in spec.env + assert spec.env["KARTOGRAPH_RUNTIME_AUTH_TOKEN"] + assert spec.cap_drop_all is True + assert spec.read_only_rootfs is True + assert spec.no_new_privileges is True + assert spec.pids_limit == 256 + assert spec.memory_limit == "2g" + assert "/tmp:rw,noexec,nosuid,size=512m" in spec.tmpfs_mounts assert "/tmp/session-work:/workspace" in spec.binds assert "/tmp/session-work/repository-files:/workspace/repository-files:ro" in spec.binds assert "/host/.config/gcloud:/gcloud/config:ro" in spec.binds @@ -66,3 +73,4 @@ def test_start_runtime_mounts_skills_workspace_and_injects_token() -> None: assert spec.env["KARTOGRAPH_AGENT_MAX_TURNS"] == "500" assert spec.user == "1000:1000" assert lease.runtime_base_url.startswith("http://kartograph-sticky-") + assert lease.runtime_auth_token == spec.env["KARTOGRAPH_RUNTIME_AUTH_TOKEN"] diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py index c6f3afa61..8c0b90121 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_factory.py @@ -8,6 +8,9 @@ ContainerEphemeralExtractionWorkerLauncher, ContainerStickySessionRuntimeManager, ) +from extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager import ( + OpenShellStickySessionRuntimeManager, +) from extraction.infrastructure.workload_runtime import ( InMemoryEphemeralExtractionWorkerLauncher, InMemoryStickySessionRuntimeManager, @@ -43,6 +46,14 @@ def test_container_backend_returns_container_adapters(self) -> None: assert isinstance(sticky, ContainerStickySessionRuntimeManager) assert isinstance(worker, ContainerEphemeralExtractionWorkerLauncher) + def test_openshell_backend_returns_openshell_sticky_manager(self) -> None: + settings = ExtractionWorkloadRuntimeSettings(backend="openshell") + + sticky = create_sticky_session_runtime_manager(settings) + + assert isinstance(sticky, OpenShellStickySessionRuntimeManager) + assert settings.job_runner == "openshell" + def test_outbox_extraction_handler_uses_runtime_factory_wiring(self) -> None: main_source = Path(__file__).resolve().parents[4] / "main.py" content = main_source.read_text(encoding="utf-8") diff --git a/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py b/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py index 4ecefa917..337816592 100644 --- a/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py +++ b/src/api/tests/unit/shared_kernel/container_runtime/test_cli_runtime.py @@ -107,6 +107,35 @@ def test_is_running_returns_false_for_missing_container(self) -> None: assert runtime.is_running("abc123") is False + def test_run_applies_container_hardening_flags(self) -> None: + runtime = CliContainerRuntime(binary="docker") + + with patch("shared_kernel.container_runtime.cli_runtime.subprocess.run") as run: + run.return_value = MagicMock(returncode=0, stdout="abc123\n", stderr="") + + runtime.run( + ContainerRunSpec( + image="busybox:1.36", + cap_drop_all=True, + read_only_rootfs=True, + no_new_privileges=True, + pids_limit=128, + memory_limit="1g", + tmpfs_mounts=("/tmp:rw,noexec,nosuid,size=256m",), + ) + ) + + command = run.call_args.args[0] + assert "--cap-drop" in command + assert "ALL" in command + assert "--read-only" in command + assert "no-new-privileges" in command + assert "--pids-limit" in command + assert "128" in command + assert "--memory" in command + assert "1g" in command + assert "--tmpfs" in command + def test_remove_by_name_force_removes_existing_container(self) -> None: runtime = CliContainerRuntime(binary="docker") From 2526ed69a96bf8c8aa6ff6cb6225ed112e9a6c5a Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Mon, 15 Jun 2026 17:59:45 -0400 Subject: [PATCH 141/153] feat(extraction): OpenShell worker sandboxes, GMA inference routing, and manage UX Move batch extraction to one reusable OpenShell sandbox per worker, route GMA through inference.local with Vertex effort capping, and add maintain/archive workspace improvements plus token-efficient partial UPDATE tooling for jobs. Co-authored-by: Cursor <cursoragent@cursor.com> --- Makefile | 2 + compose.dev.yaml | 16 +- .../base/openshell-policies-configmap.yaml | 13 + .../policies/gma-extraction-jobs.yaml | 4 + .../policies/gma-initial-schema-design.yaml | 3 + .../policies/gma-one-off-mutations.yaml | 3 + .../openshell/policies/gma-sticky-base.yaml | 5 +- scripts/cleanup-openshell-sandboxes.sh | 29 + src/agent-runtime/Dockerfile | 23 +- .../kartograph_agent_runtime/executor.py | 9 +- .../kartograph_agent_runtime/settings.py | 10 + .../kartograph_agent_runtime/vertex.py | 13 +- src/agent-runtime/tests/test_vertex.py | 28 + .../application/agent_session_service.py | 41 +- .../archive_completed_extraction_jobs.py | 71 ++ .../application/schema_authoring_guide.py | 25 + .../extraction_job_container.py | 39 +- .../extraction_job_helpers/__init__.py | 2 +- .../workload-graph-read.sh | 141 ++++ .../extraction_job_mutation_metrics.py | 27 + .../infrastructure/extraction_job_prompt.py | 42 +- .../extraction_run_orchestrator.py | 7 + .../infrastructure/openshell/cli.py | 27 + .../openshell/extraction_sandbox_pool.py | 55 ++ .../infrastructure/openshell/gateway.py | 39 +- .../infrastructure/openshell/inference_env.py | 65 ++ ...penshell_sticky_session_runtime_manager.py | 47 +- .../policies/gma-extraction-jobs.yaml | 4 + .../policies/gma-initial-schema-design.yaml | 3 + .../policies/gma-one-off-mutations.yaml | 3 + .../openshell/policies/gma-sticky-base.yaml | 3 + .../infrastructure/openshell/policy.py | 13 + .../infrastructure/openshell/runtime_env.py | 31 + .../infrastructure/openshell/sandbox.py | 360 ++++++++- .../openshell/vertex_provider.py | 140 ++++ .../openshell_extraction_job_runner.py | 303 +++++-- .../repositories/extraction_job_repository.py | 57 +- .../sticky_session_bootstrap_builder.py | 2 +- .../infrastructure/vertex_runtime_env.py | 15 + .../workload_runtime_factory.py | 6 +- .../workload_runtime_settings.py | 61 +- src/api/extraction/presentation/routes.py | 4 +- .../management/extraction_jobs_service.py | 70 +- .../services/data_source_service.py | 4 +- .../domain/aggregates/data_source.py | 27 + .../extraction_baseline_updater.py | 36 + .../infrastructure/sync_lifecycle_handler.py | 1 + .../extraction_jobs_routes.py | 21 + .../scripts/smoke-openshell-extraction-job.py | 170 ++++ .../application/test_agent_session_service.py | 60 ++ .../test_archive_completed_extraction_jobs.py | 128 +++ .../test_schema_authoring_guide.py | 3 + .../test_agentic_ci_extraction_job_runner.py | 1 + .../test_extraction_job_mutation_metrics.py | 40 + .../test_extraction_job_prompt.py | 4 + .../test_extraction_job_runtime_stop.py | 44 ++ ...est_extraction_job_workdir_materializer.py | 1 + .../test_extraction_run_orchestrator.py | 56 ++ .../test_openshell_extraction_job_runner.py | 130 +++ .../test_openshell_extraction_sandbox_pool.py | 43 + .../infrastructure/test_openshell_gateway.py | 147 ++++ .../test_openshell_inference_env.py | 40 + .../infrastructure/test_openshell_policy.py | 24 + .../infrastructure/test_openshell_sandbox.py | 323 ++++++++ ...penshell_sticky_session_runtime_manager.py | 51 +- .../test_openshell_vertex_provider.py | 83 ++ .../infrastructure/test_vertex_runtime_env.py | 42 +- .../test_extraction_baseline_updater.py | 78 ++ .../test_sync_lifecycle_handler.py | 44 ++ .../tests/unit/management/test_data_source.py | 35 + .../GraphExtractionJobSetsPanel.vue | 64 +- .../GraphExtractionJobsWorkspace.vue | 123 ++- .../GraphMaintenanceWorkspace.vue | 746 ++++++++++++++++++ .../[kgId]/data-sources/index.vue | 9 +- .../pages/knowledge-graphs/[kgId]/manage.vue | 43 +- .../app/tests/kg-maintenance-schedule.test.ts | 14 + .../app/tests/kg-manage-workspace-hub.test.ts | 2 +- src/dev-ui/app/tests/kgManageState.test.ts | 7 + .../knowledge-graph-manage-workspace.test.ts | 46 +- src/dev-ui/app/utils/kgDataSourcesCommits.ts | 11 + src/dev-ui/app/utils/kgMaintenanceSchedule.ts | 60 ++ src/dev-ui/app/utils/kgManageState.ts | 13 + src/dev-ui/app/utils/kgManageWorkspace.ts | 12 +- src/dev-ui/app/utils/kgManageWorkspaceHub.ts | 36 +- 84 files changed, 4410 insertions(+), 273 deletions(-) create mode 100755 scripts/cleanup-openshell-sandboxes.sh create mode 100644 src/agent-runtime/tests/test_vertex.py create mode 100644 src/api/extraction/application/archive_completed_extraction_jobs.py create mode 100755 src/api/extraction/infrastructure/extraction_job_helpers/workload-graph-read.sh create mode 100644 src/api/extraction/infrastructure/openshell/extraction_sandbox_pool.py create mode 100644 src/api/extraction/infrastructure/openshell/inference_env.py create mode 100644 src/api/extraction/infrastructure/openshell/runtime_env.py create mode 100644 src/api/extraction/infrastructure/openshell/vertex_provider.py create mode 100644 src/api/management/infrastructure/extraction_baseline_updater.py create mode 100644 src/api/scripts/smoke-openshell-extraction-job.py create mode 100644 src/api/tests/unit/extraction/application/test_archive_completed_extraction_jobs.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_runtime_stop.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_job_runner.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_sandbox_pool.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_openshell_gateway.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_openshell_inference_env.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_openshell_sandbox.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_openshell_vertex_provider.py create mode 100644 src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py create mode 100644 src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue create mode 100644 src/dev-ui/app/tests/kg-maintenance-schedule.test.ts create mode 100644 src/dev-ui/app/utils/kgMaintenanceSchedule.ts diff --git a/Makefile b/Makefile index db574a7ec..7e8871b1c 100755 --- a/Makefile +++ b/Makefile @@ -23,6 +23,7 @@ certs: .PHONY: dev dev: certs @echo "🧰 [Development] Starting application containers..." + @./scripts/cleanup-openshell-sandboxes.sh docker compose -f compose.yaml -f compose.dev.yaml --profile build-only build agent-runtime docker compose -f compose.yaml build HOST_UID=$$(id -u) HOST_GID=$$(id -g) docker compose -f compose.yaml -f compose.dev.yaml --profile ui up -d @@ -40,6 +41,7 @@ down: -@docker ps -aq --filter name=kartograph-sticky- | xargs -r docker rm -f -@docker ps -aq --filter name=kartograph-worker- | xargs -r docker rm -f -@docker ps -aq --filter name=kartograph-extract- | xargs -r docker rm -f + -@./scripts/cleanup-openshell-sandboxes.sh .PHONY: dev-backup dev-restore dev-backup-list dev-repair-age-graphs dev-backup: diff --git a/compose.dev.yaml b/compose.dev.yaml index bcdd2ab81..f687c3a13 100644 --- a/compose.dev.yaml +++ b/compose.dev.yaml @@ -33,10 +33,19 @@ services: KARTOGRAPH_EXTRACTION_RUNTIME_JOB_RUNNER: openshell ## End Track A / Track B selection KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_GATEWAY_URL: https://host.docker.internal:17670 - KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_RUNTIME_HOST: host.docker.internal + # Forwards run inside this container; bind to 127.0.0.1 here, not on the host. + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_RUNTIME_HOST: 127.0.0.1 + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_API_BASE_URL: http://host.docker.internal:8000 KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_POLICY_DIR: /etc/openshell/policies KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_GATEWAY_NAME: openshell + # OpenShell CLI in the API container reads host gateway registration + mTLS from here + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_XDG_CONFIG_HOME: /root/.config + # Host gateway must listen beyond localhost — in ~/.config/openshell/gateway.toml: + # bind_address = "0.0.0.0:17670" + # then: systemctl --user restart openshell-gateway KARTOGRAPH_EXTRACTION_RUNTIME_AGENTIC_CI_IMAGE: ghcr.io/opendatahub-io/ai-helpers:latest + # OpenShell extraction jobs use agentic-ci claude-sandbox (not ai-helpers or sticky runtime). + KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_EXTRACTION_IMAGE: quay.io/aipcc/agentic-ci/claude-sandbox:latest KARTOGRAPH_EXTRACTION_RUNTIME_AGENTIC_CI_HARNESS: claude-code KARTOGRAPH_EXTRACTION_RUNTIME_EXTRACTION_JOB_WORK_DIR: /tmp/kartograph/extraction_jobs # Vertex AI for Claude Agent SDK in sticky assistant containers @@ -60,6 +69,8 @@ services: # OpenShell CLI + mTLS config (host gateway; API container invokes openshell subprocess) - /usr/bin/openshell:/usr/bin/openshell:ro - ${HOME}/.config/openshell:/root/.config/openshell:ro,z + # forward start -d writes PID/state here; read-only parent mount hangs the CLI + - openshell-forwards:/root/.config/openshell/forwards # OpenShell policy templates (Phase 3) when backend=openshell - ./src/api/extraction/infrastructure/openshell/policies:/etc/openshell/policies:ro,z extra_hosts: @@ -89,3 +100,6 @@ services: ports: - "3000:3000" - "24678:24678" + +volumes: + openshell-forwards: diff --git a/deploy/apps/kartograph/base/openshell-policies-configmap.yaml b/deploy/apps/kartograph/base/openshell-policies-configmap.yaml index d39012f7d..c3f19113b 100644 --- a/deploy/apps/kartograph/base/openshell-policies-configmap.yaml +++ b/deploy/apps/kartograph/base/openshell-policies-configmap.yaml @@ -12,6 +12,9 @@ data: endpoints: - "kartograph-api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/*" gma-initial-schema-design.yaml: | @@ -21,6 +24,9 @@ data: endpoints: - "kartograph-api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/schema/*" - "/extraction/workloads/graph/*" @@ -31,8 +37,12 @@ data: endpoints: - "kartograph-api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/jobs/*" + - "/extraction/workloads/schema/*" - "/extraction/workloads/graph/*" gma-one-off-mutations.yaml: | version: 1 @@ -41,6 +51,9 @@ data: endpoints: - "kartograph-api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/mutations/*" - "/extraction/workloads/graph/*" diff --git a/deploy/openshell/policies/gma-extraction-jobs.yaml b/deploy/openshell/policies/gma-extraction-jobs.yaml index 6f6d53d31..c0ae98a40 100644 --- a/deploy/openshell/policies/gma-extraction-jobs.yaml +++ b/deploy/openshell/policies/gma-extraction-jobs.yaml @@ -5,6 +5,10 @@ description: Production policy for extraction-jobs graph-management mode. endpoints: - "kartograph-api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/jobs/*" + - "/extraction/workloads/schema/*" - "/extraction/workloads/graph/*" diff --git a/deploy/openshell/policies/gma-initial-schema-design.yaml b/deploy/openshell/policies/gma-initial-schema-design.yaml index c138d4bfc..911a2fad7 100644 --- a/deploy/openshell/policies/gma-initial-schema-design.yaml +++ b/deploy/openshell/policies/gma-initial-schema-design.yaml @@ -5,6 +5,9 @@ description: Production policy for initial schema design mode. endpoints: - "kartograph-api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/schema/*" - "/extraction/workloads/graph/*" diff --git a/deploy/openshell/policies/gma-one-off-mutations.yaml b/deploy/openshell/policies/gma-one-off-mutations.yaml index 28350ee57..71c712c90 100644 --- a/deploy/openshell/policies/gma-one-off-mutations.yaml +++ b/deploy/openshell/policies/gma-one-off-mutations.yaml @@ -5,6 +5,9 @@ description: Production policy for one-off graph mutations mode. endpoints: - "kartograph-api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/mutations/*" - "/extraction/workloads/graph/*" diff --git a/deploy/openshell/policies/gma-sticky-base.yaml b/deploy/openshell/policies/gma-sticky-base.yaml index c28ad9828..8dbcbb11a 100644 --- a/deploy/openshell/policies/gma-sticky-base.yaml +++ b/deploy/openshell/policies/gma-sticky-base.yaml @@ -1,9 +1,12 @@ version: 1 name: gma-sticky-base enforcement: hard_requirement -description: Production base policy for graph-management sticky sessions. +description: Production base network policy for graph-management sticky sessions. endpoints: - "kartograph-api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/*" diff --git a/scripts/cleanup-openshell-sandboxes.sh b/scripts/cleanup-openshell-sandboxes.sh new file mode 100755 index 000000000..b5375d5a2 --- /dev/null +++ b/scripts/cleanup-openshell-sandboxes.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Delete Kartograph-owned OpenShell sandboxes left over from sticky GMA sessions +# and extraction jobs (e.g. after make down without end-session). +# +# Safe to run when openshell is not installed or the gateway is down — exits 0. + +set -euo pipefail + +KARTOGRAPH_SANDBOX_PATTERN='^kartograph-(gma|extract)-' + +if ! command -v openshell >/dev/null 2>&1; then + echo "openshell not on PATH; skipping Kartograph sandbox cleanup" + exit 0 +fi + +names="$(openshell sandbox list --names 2>/dev/null | grep -E "$KARTOGRAPH_SANDBOX_PATTERN" || true)" +if [[ -z "${names// }" ]]; then + echo "No Kartograph OpenShell sandboxes to clean up" + exit 0 +fi + +echo "Cleaning up Kartograph OpenShell sandboxes..." +while IFS= read -r name; do + [[ -z "$name" ]] && continue + echo " → deleting $name" + openshell sandbox delete "$name" 2>/dev/null || echo " (delete failed or already gone: $name)" +done <<< "$names" +echo "OpenShell sandbox cleanup done." diff --git a/src/agent-runtime/Dockerfile b/src/agent-runtime/Dockerfile index 035c50698..e04e56864 100644 --- a/src/agent-runtime/Dockerfile +++ b/src/agent-runtime/Dockerfile @@ -1,20 +1,29 @@ FROM registry.access.redhat.com/ubi9/python-312:latest -WORKDIR /runtime +USER 0 + +WORKDIR /app COPY --from=ghcr.io/astral-sh/uv:0.9.18 /uv /uvx /bin/ -COPY pyproject.toml uv.lock /runtime/ -COPY kartograph_agent_runtime /runtime/kartograph_agent_runtime +COPY pyproject.toml uv.lock /app/ +COPY kartograph_agent_runtime /app/kartograph_agent_runtime RUN uv sync --frozen --no-dev -ENV PATH="/runtime/.venv/bin:$PATH" \ +# OpenShell sandboxes require a dedicated non-root identity in the image. +RUN groupadd -r -g 65532 sandbox \ + && useradd -r -u 65532 -g sandbox -d /sandbox -s /bin/bash sandbox \ + && mkdir -p /sandbox /workspace \ + && chown sandbox:sandbox /sandbox /workspace \ + && chmod -R a+rX /app + +ENV PATH="/app/.venv/bin:$PATH" \ PYTHONUNBUFFERED=1 EXPOSE 8787 -HEALTHCHECK --interval=15s --timeout=3s --start-period=10s --retries=5 \ - CMD /runtime/.venv/bin/python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8787/health').read()" || exit 1 +HEALTHCHECK --interval=15s --timeout=3s --start-period=30s --retries=5 \ + CMD pgrep -f "python -m kartograph_agent_runtime" >/dev/null || exit 1 -CMD ["/runtime/.venv/bin/python", "-m", "kartograph_agent_runtime"] +CMD ["/app/.venv/bin/python", "-m", "kartograph_agent_runtime"] diff --git a/src/agent-runtime/kartograph_agent_runtime/executor.py b/src/agent-runtime/kartograph_agent_runtime/executor.py index 7c90b8ef7..451983152 100644 --- a/src/agent-runtime/kartograph_agent_runtime/executor.py +++ b/src/agent-runtime/kartograph_agent_runtime/executor.py @@ -18,7 +18,7 @@ thinking_events_from_sdk_message, ) from kartograph_agent_runtime.tools import RuntimeTooling -from kartograph_agent_runtime.vertex import build_claude_agent_env +from kartograph_agent_runtime.vertex import VERTEX_COMPATIBLE_EFFORT, build_claude_agent_env _DEFAULT_TURN_TIMEOUT_SECONDS = 1000.0 _SDK_HEARTBEAT_SECONDS = 8.0 @@ -142,6 +142,8 @@ def _build_workspace_prompt_appendix(settings: AgentRuntimeSettings) -> str: def _apply_model_env(settings: AgentRuntimeSettings) -> str: for key, value in build_claude_agent_env(settings).items(): os.environ[key] = value + if settings.openshell_inference_enabled(): + return "OpenShell inference (Vertex)" if settings.vertex_enabled(): return "Vertex AI" if settings.anthropic_api_key.strip(): @@ -440,6 +442,11 @@ async def _stream_with_claude_sdk( "kartograph": build_kartograph_schema_mcp_server(tooling), } options_kwargs["allowed_tools"] = list(GMA_ALLOWED_TOOL_NAMES) + if settings.openshell_inference_enabled(): + options_kwargs["extra_args"] = {"bare": None} + options_kwargs["effort"] = VERTEX_COMPATIBLE_EFFORT + elif settings.vertex_enabled(): + options_kwargs["effort"] = VERTEX_COMPATIBLE_EFFORT options = ClaudeAgentOptions( system_prompt=system_prompt, env=sdk_env, diff --git a/src/agent-runtime/kartograph_agent_runtime/settings.py b/src/agent-runtime/kartograph_agent_runtime/settings.py index fb8fac234..41f4c1b42 100644 --- a/src/agent-runtime/kartograph_agent_runtime/settings.py +++ b/src/agent-runtime/kartograph_agent_runtime/settings.py @@ -23,6 +23,11 @@ class AgentRuntimeSettings(BaseSettings): session_id: str = Field(default="", alias="KARTOGRAPH_SESSION_ID") workspace_dir: str = Field(default="/workspace", alias="KARTOGRAPH_WORKSPACE_DIR") anthropic_api_key: str = Field(default="", alias="ANTHROPIC_API_KEY") + anthropic_base_url: str = Field(default="", alias="ANTHROPIC_BASE_URL") + claude_code_disable_experimental_betas: str = Field( + default="", + alias="CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS", + ) vertex_project_id: str = Field(default="", alias="ANTHROPIC_VERTEX_PROJECT_ID") vertex_region: str = Field(default="us-east5", alias="CLOUD_ML_REGION") gcloud_config_dir: str = Field(default="", alias="CLOUDSDK_CONFIG") @@ -34,7 +39,12 @@ class AgentRuntimeSettings(BaseSettings): def vertex_enabled(self) -> bool: return vertex_enabled_from_env() + def openshell_inference_enabled(self) -> bool: + return self.anthropic_base_url.strip().rstrip("/") == "https://inference.local" + def model_configured(self) -> bool: + if self.openshell_inference_enabled(): + return True if self.vertex_enabled(): return bool(self.vertex_project_id.strip()) return bool(self.anthropic_api_key.strip()) diff --git a/src/agent-runtime/kartograph_agent_runtime/vertex.py b/src/agent-runtime/kartograph_agent_runtime/vertex.py index dd88824c4..0c15ce844 100644 --- a/src/agent-runtime/kartograph_agent_runtime/vertex.py +++ b/src/agent-runtime/kartograph_agent_runtime/vertex.py @@ -3,6 +3,12 @@ from __future__ import annotations import os +from typing import Literal + +VertexEffortLevel = Literal["low", "medium", "high", "max"] + +# Vertex AI (direct or via OpenShell inference.local) rejects xhigh effort levels. +VERTEX_COMPATIBLE_EFFORT: VertexEffortLevel = "high" def is_truthy_env(value: str | None) -> bool: @@ -17,8 +23,13 @@ def vertex_enabled_from_env() -> bool: def build_claude_agent_env(settings) -> dict[str, str]: - """Build Claude Agent SDK env for Vertex or direct Anthropic API.""" + """Build Claude Agent SDK env for Vertex, OpenShell inference.local, or Anthropic API.""" env: dict[str, str] = {} + if getattr(settings, "openshell_inference_enabled", lambda: False)(): + env["ANTHROPIC_BASE_URL"] = "https://inference.local" + env["ANTHROPIC_API_KEY"] = settings.anthropic_api_key.strip() or "unused" + env["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] = "1" + return env if settings.vertex_enabled(): env["CLAUDE_CODE_USE_VERTEX"] = "1" if settings.vertex_project_id.strip(): diff --git a/src/agent-runtime/tests/test_vertex.py b/src/agent-runtime/tests/test_vertex.py new file mode 100644 index 000000000..77f525d60 --- /dev/null +++ b/src/agent-runtime/tests/test_vertex.py @@ -0,0 +1,28 @@ +"""Tests for Claude Agent SDK env construction.""" + +from __future__ import annotations + +from kartograph_agent_runtime.settings import AgentRuntimeSettings +from kartograph_agent_runtime.vertex import VERTEX_COMPATIBLE_EFFORT, build_claude_agent_env + + +def test_build_claude_agent_env_uses_openshell_inference_without_vertex_adc() -> None: + settings = AgentRuntimeSettings( + ANTHROPIC_BASE_URL="https://inference.local", + ANTHROPIC_API_KEY="unused", + CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS="1", + ) + env = build_claude_agent_env(settings) + assert env["ANTHROPIC_BASE_URL"] == "https://inference.local" + assert env["ANTHROPIC_API_KEY"] == "unused" + assert "CLAUDE_CODE_USE_VERTEX" not in env + + +def test_openshell_inference_settings_count_as_model_configured() -> None: + settings = AgentRuntimeSettings(ANTHROPIC_BASE_URL="https://inference.local") + assert settings.openshell_inference_enabled() is True + assert settings.model_configured() is True + + +def test_vertex_compatible_effort_avoids_xhigh() -> None: + assert VERTEX_COMPATIBLE_EFFORT == "high" diff --git a/src/api/extraction/application/agent_session_service.py b/src/api/extraction/application/agent_session_service.py index f8135a9d0..e25399f28 100644 --- a/src/api/extraction/application/agent_session_service.py +++ b/src/api/extraction/application/agent_session_service.py @@ -96,6 +96,38 @@ async def _end_session_record(self, session: ExtractionAgentSession) -> None: session.archive() await self._repository.save(session) + @staticmethod + def _session_had_sticky_runtime_attempt(session: ExtractionAgentSession) -> bool: + sticky = session.runtime_context.get("sticky_runtime") + if not isinstance(sticky, dict): + return False + phase = sticky.get("phase") + return phase in {"starting", "ready", "unhealthy", "failed"} + + async def _reconcile_orphaned_sticky_session( + self, + session: ExtractionAgentSession, + ) -> ExtractionAgentSession | None: + """Archive sessions whose sticky runtime no longer exists (e.g. after sandbox delete).""" + if self._sticky_runtime_manager is None: + return session + if not self._session_had_sticky_runtime_attempt(session): + return session + + sticky = session.runtime_context.get("sticky_runtime", {}) + container_id = sticky.get("container_id") if isinstance(sticky, dict) else None + if self._sticky_runtime_manager.is_runtime_active( + session_id=session.id, + container_id=container_id if isinstance(container_id, str) else None, + user_id=session.user_id, + knowledge_graph_id=session.knowledge_graph_id, + mode=session.mode.value, + ): + return session + + await self._end_session_record(session) + return None + async def _create_session( self, *, @@ -147,11 +179,14 @@ async def get_active_session( ui_mode: GraphManagementUiMode, ) -> ExtractionAgentSession | None: await self._expire_idle_sessions(user_id, knowledge_graph_id) - return await self._repository.find_active_by_ui_mode( + session = await self._repository.find_active_by_ui_mode( user_id=user_id, knowledge_graph_id=knowledge_graph_id, ui_mode=ui_mode, ) + if session is None: + return None + return await self._reconcile_orphaned_sticky_session(session) async def start_session( self, @@ -166,7 +201,9 @@ async def start_session( ui_mode=ui_mode, ) if existing is not None: - return existing + existing = await self._reconcile_orphaned_sticky_session(existing) + if existing is not None: + return existing return await self._create_session( user_id=user_id, knowledge_graph_id=knowledge_graph_id, diff --git a/src/api/extraction/application/archive_completed_extraction_jobs.py b/src/api/extraction/application/archive_completed_extraction_jobs.py new file mode 100644 index 000000000..060a68c0b --- /dev/null +++ b/src/api/extraction/application/archive_completed_extraction_jobs.py @@ -0,0 +1,71 @@ +"""Promote completed extraction jobs into archived history with metric backfill.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus +from extraction.infrastructure.extraction_job_activity import job_workdir +from extraction.infrastructure.extraction_job_mutation_metrics import reconcile_mutation_metrics +from extraction.infrastructure.extraction_job_verdict import load_mutation_verdict +from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings + + +def backfill_archival_metrics( + job: ExtractionJobRecord, + *, + workdir: Path, +) -> dict[str, Any]: + """Recompute graph write metrics from a persisted job workdir before archival.""" + base = { + "entities_created": job.entities_created, + "entities_modified": job.entities_modified, + "relationships_created": job.relationships_created, + "relationships_modified": job.relationships_modified, + } + verdict = load_mutation_verdict(workdir) + operations_applied = verdict.operations_applied if verdict else 0 + return reconcile_mutation_metrics( + base, + workdir=workdir, + operations_applied=operations_applied, + ) + + +async def archive_completed_extraction_jobs( + *, + repository: ExtractionJobRepository, + knowledge_graph_id: str, + settings: ExtractionWorkloadRuntimeSettings, +) -> dict[str, int]: + """Move all completed jobs to archived, backfilling metrics from workdirs when possible.""" + jobs = await repository.list_jobs_by_status( + knowledge_graph_id=knowledge_graph_id, + status=ExtractionJobStatus.COMPLETED, + ) + archived_count = 0 + metrics_backfilled_count = 0 + for job in jobs: + workdir = job_workdir( + knowledge_graph_id=job.knowledge_graph_id, + job_id=job.job_id, + settings=settings, + ) + metrics = backfill_archival_metrics(job, workdir=workdir) + prior_write_ops = job.write_ops() + new_write_ops = int(metrics.get("write_ops") or 0) + if new_write_ops > prior_write_ops: + metrics_backfilled_count += 1 + promoted = await repository.promote_completed_job_to_archived( + knowledge_graph_id=knowledge_graph_id, + job_id=job.job_id, + metrics=metrics, + ) + if promoted: + archived_count += 1 + return { + "archived_count": archived_count, + "metrics_backfilled_count": metrics_backfilled_count, + } diff --git a/src/api/extraction/application/schema_authoring_guide.py b/src/api/extraction/application/schema_authoring_guide.py index 95b79296a..dce20cbca 100644 --- a/src/api/extraction/application/schema_authoring_guide.py +++ b/src/api/extraction/application/schema_authoring_guide.py @@ -300,4 +300,29 @@ **Do not** conflate schema design, prepopulation planning, and implementation in one turn when the user listed multiple deliverables — but **do** stop all implementation when graph tools return systemic server errors. + +## Batch extraction jobs (workload API, no MCP) + +Extraction job sandboxes use `helpers/workload-graph-read.sh` and `helpers/workload-mutations.sh` +instead of `kartograph_*` MCP tools. Credentials live in `job-context.json`. + +**Read before UPDATE:** `job-context.json` `target_instances` includes `graph_id` and +`properties_missing` (empty fields only). For populated fields you are refining — especially long +text like `description` — fetch live values first: + +```bash +bash helpers/workload-graph-read.sh search-by-slug <slug> --entity-type <Type> --out mutations/current_<slug>.json +``` + +**Partial UPDATE:** `set_properties` merges into the live node; omitted properties are preserved. +Put only changed keys in each UPDATE line. Example: + +```json +{"op":"UPDATE","type":"node","id":"componenttest:abc123def4567890","set_properties":{"description":"<edited full text>"}} +``` + +For surgical text edits: load the saved JSON, edit one property with Bash/python, emit JSONL +programmatically — do not paste full prior text into chat or resubmit unrelated properties. + +**Apply:** validate then apply via `helpers/workload-mutations.sh` (writes `mutations/result.json`). """.strip() diff --git a/src/api/extraction/infrastructure/extraction_job_container.py b/src/api/extraction/infrastructure/extraction_job_container.py index 52d3c4b21..e300f7bc2 100644 --- a/src/api/extraction/infrastructure/extraction_job_container.py +++ b/src/api/extraction/infrastructure/extraction_job_container.py @@ -1,4 +1,4 @@ -"""Container lifecycle helpers for agentic-ci extraction jobs.""" +"""Container and OpenShell sandbox lifecycle helpers for extraction jobs.""" from __future__ import annotations @@ -7,11 +7,12 @@ from shared_kernel.container_runtime.factory import create_container_runtime _CONTAINER_NAME_SAFE = re.compile(r"[^a-zA-Z0-9_.-]+") +_EXTRACTION_SANDBOX_PREFIX = "kartograph-extract-" def extraction_job_container_name(job_id: str) -> str: cleaned = _CONTAINER_NAME_SAFE.sub("-", job_id).strip("-") - return f"kartograph-extract-{cleaned}"[:63].rstrip("-_.") + return f"{_EXTRACTION_SANDBOX_PREFIX}{cleaned}"[:63].rstrip("-_.") def stop_extraction_job_container(*, job_id: str, container_engine: str = "auto") -> bool: @@ -32,3 +33,37 @@ def stop_extraction_job_containers( if stop_extraction_job_container(job_id=job_id, container_engine=container_engine): stopped += 1 return stopped + + +def stop_extraction_job_sandboxes( + *, + job_ids: tuple[str, ...] | list[str], + sweep_orphans: bool = False, +) -> int: + """Delete OpenShell sandboxes for extraction jobs. Returns count deleted.""" + from extraction.infrastructure.openshell import sandbox as openshell_sandbox + + stopped = openshell_sandbox.stop_extraction_job_sandboxes(job_ids=job_ids) + if sweep_orphans: + stopped += openshell_sandbox.delete_sandboxes_by_prefix(_EXTRACTION_SANDBOX_PREFIX) + return stopped + + +def stop_extraction_job_runtimes( + *, + job_ids: tuple[str, ...] | list[str], + container_engine: str = "auto", + openshell_backend: bool = False, +) -> tuple[int, int]: + """Stop Docker containers and/or OpenShell sandboxes for extraction jobs.""" + containers_stopped = stop_extraction_job_containers( + job_ids=job_ids, + container_engine=container_engine, + ) + sandboxes_stopped = 0 + if openshell_backend: + sandboxes_stopped = stop_extraction_job_sandboxes( + job_ids=job_ids, + sweep_orphans=True, + ) + return containers_stopped, sandboxes_stopped diff --git a/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py b/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py index 732ade2c0..f8d32cdfd 100644 --- a/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py +++ b/src/api/extraction/infrastructure/extraction_job_helpers/__init__.py @@ -4,6 +4,6 @@ HELPERS_DIR = Path(__file__).resolve().parent HELPERS_CONTAINER_DIR = "helpers" -HELPER_SCRIPT_NAMES = ("workload-mutations.sh", "sync_instances.py") +HELPER_SCRIPT_NAMES = ("workload-mutations.sh", "workload-graph-read.sh", "sync_instances.py") HELPER_RESOURCE_NAMES = ("mutation-examples.jsonl",) HELPER_BUNDLE_NAMES = HELPER_SCRIPT_NAMES + HELPER_RESOURCE_NAMES diff --git a/src/api/extraction/infrastructure/extraction_job_helpers/workload-graph-read.sh b/src/api/extraction/infrastructure/extraction_job_helpers/workload-graph-read.sh new file mode 100755 index 000000000..c9a8e7aab --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_job_helpers/workload-graph-read.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# Kartograph extraction job helper — read graph state via workload API. +# +# Usage: +# helpers/workload-graph-read.sh search-by-slug SLUG [--entity-type TYPE] [--out FILE] +# helpers/workload-graph-read.sh instances ENTITY_TYPE [--limit N] [--offset N] [--out FILE] +# helpers/workload-graph-read.sh ontology [--out FILE] +# helpers/workload-graph-read.sh authoring-guide [--out FILE] +set -euo pipefail + +COMMAND="${1:-}" +shift || true + +WORKDIR="${KARTOGRAPH_WORKSPACE:-/workspace}" + +python3 - "${COMMAND}" "${WORKDIR}" "$@" <<'PY' +import json +import sys +import urllib.error +import urllib.request +from pathlib import Path +from urllib.parse import urlencode + +command, workdir, *args = sys.argv[1:] +workdir_path = Path(workdir) +context_path = workdir_path / "job-context.json" + + +def fail(message: str, *, http_status: int | None = None) -> None: + payload = {"error": message} + if http_status is not None: + payload["http_status"] = http_status + json.dump(payload, sys.stdout, indent=2) + print() + raise SystemExit(1) + + +def parse_flags(argv: list[str]) -> tuple[dict[str, str], str | None]: + flags: dict[str, str] = {} + out_path: str | None = None + index = 0 + while index < len(argv): + token = argv[index] + if token == "--out": + index += 1 + if index >= len(argv): + fail("--out requires a file path") + out_path = argv[index] + elif token == "--entity-type": + index += 1 + if index >= len(argv): + fail("--entity-type requires a value") + flags["entity_type"] = argv[index] + elif token == "--limit": + index += 1 + if index >= len(argv): + fail("--limit requires a value") + flags["limit"] = argv[index] + elif token == "--offset": + index += 1 + if index >= len(argv): + fail("--offset requires a value") + flags["offset"] = argv[index] + else: + fail(f"unexpected argument: {token}") + index += 1 + return flags, out_path + + +def write_payload(payload: dict, out_path: str | None) -> None: + text = json.dumps(payload, indent=2) + "\n" + if out_path: + path = Path(out_path) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + print(text, end="") + + +if command not in { + "search-by-slug", + "instances", + "ontology", + "authoring-guide", +}: + fail("first argument must be search-by-slug, instances, ontology, or authoring-guide") +if not context_path.is_file(): + fail("missing job-context.json in workspace") + +context = json.loads(context_path.read_text(encoding="utf-8")) +api_base = str(context["api_base_url"]).rstrip("/") +token = str(context["workload_token"]) + +if command == "search-by-slug": + if not args or args[0].startswith("--"): + fail("search-by-slug requires SLUG") + slug = args[0] + flags, out_path = parse_flags(args[1:]) + query = {"slug": slug} + entity_type = flags.get("entity_type") + if entity_type: + query["entity_type"] = entity_type + endpoint = f"{api_base}/extraction/workloads/graph/search-by-slug?{urlencode(query)}" +elif command == "instances": + if not args or args[0].startswith("--"): + fail("instances requires ENTITY_TYPE") + entity_type = args[0] + flags, out_path = parse_flags(args[1:]) + query = {"entity_type": entity_type} + if "limit" in flags: + query["limit"] = flags["limit"] + if "offset" in flags: + query["offset"] = flags["offset"] + endpoint = f"{api_base}/extraction/workloads/graph/instances?{urlencode(query)}" +elif command == "ontology": + flags, out_path = parse_flags(args) + endpoint = f"{api_base}/extraction/workloads/schema/ontology" +else: + flags, out_path = parse_flags(args) + endpoint = f"{api_base}/extraction/workloads/schema/authoring-guide" + +request = urllib.request.Request( + endpoint, + method="GET", + headers={"X-Workload-Token": token}, +) + +try: + with urllib.request.urlopen(request, timeout=120) as response: + payload = json.loads(response.read().decode("utf-8")) +except urllib.error.HTTPError as exc: + try: + payload = json.loads(exc.read().decode("utf-8")) + detail = payload.get("detail") or payload + except json.JSONDecodeError: + detail = f"HTTP {exc.code}" + fail(str(detail), http_status=exc.code) +except urllib.error.URLError as exc: + fail(f"workload API request failed: {exc.reason}") + +write_payload(payload, out_path) +PY diff --git a/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py index f3e1e3846..ab106c2ac 100644 --- a/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py +++ b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py @@ -3,6 +3,7 @@ from __future__ import annotations from pathlib import Path +from typing import Any from extraction.domain.mutation_jsonl_metrics import metrics_from_mutation_jsonl @@ -10,6 +11,7 @@ "applied_mutation_jsonl_from_workdir", "metrics_from_mutation_jsonl", "metrics_from_mutation_workdir", + "reconcile_mutation_metrics", ] @@ -44,6 +46,31 @@ def applied_mutation_jsonl_from_workdir(job_root: Path) -> str | None: return content or None +def reconcile_mutation_metrics( + metrics: dict[str, Any], + *, + workdir: Path, + operations_applied: int, +) -> dict[str, Any]: + """Ensure graph write counters align with applied mutation batches.""" + merged = dict(metrics) + if int(merged.get("write_ops", 0)) > 0: + return merged + + workdir_metrics = metrics_from_mutation_workdir(workdir) + if int(workdir_metrics.get("write_ops", 0)) > 0: + merged.update(workdir_metrics) + applied_jsonl = applied_mutation_jsonl_from_workdir(workdir) + if applied_jsonl: + merged["applied_mutations_jsonl"] = applied_jsonl + return merged + + if operations_applied > 0: + merged["entities_modified"] = operations_applied + merged["write_ops"] = operations_applied + return merged + + def _empty_metrics() -> dict[str, int]: return { "entities_created": 0, diff --git a/src/api/extraction/infrastructure/extraction_job_prompt.py b/src/api/extraction/infrastructure/extraction_job_prompt.py index be76a8f2d..c733384ca 100644 --- a/src/api/extraction/infrastructure/extraction_job_prompt.py +++ b/src/api/extraction/infrastructure/extraction_job_prompt.py @@ -8,13 +8,16 @@ EXTRACTION_PROMPT_FILENAME = "extraction_prompt.md" MUTATIONS_HELPER = "helpers/workload-mutations.sh" +GRAPH_READ_HELPER = "helpers/workload-graph-read.sh" MUTATION_EXAMPLES = "helpers/mutation-examples.jsonl" EXTRACTION_JOB_INVOKE_PROMPT = ( "You are running a Kartograph extraction job in /workspace. " f"Read {EXTRACTION_PROMPT_FILENAME}, job-context.json, and sources-index.json, then follow " "the instructions completely. Read job-context.json target_instances for graph_id and " - "properties_missing before querying the graph API. Copy JSONL shapes from " + "properties_missing before querying the graph. For existing instances, fetch live properties " + f"with `bash {GRAPH_READ_HELPER} search-by-slug <slug> --entity-type <Type> --out " + "mutations/current_<slug>.json` before editing. Copy JSONL shapes from " f"{MUTATION_EXAMPLES} when writing mutations. Write JSONL batches under mutations/, validate with " f"`bash {MUTATIONS_HELPER} validate mutations/<batch>.jsonl`, then apply with " f"`bash {MUTATIONS_HELPER} apply mutations/<batch>.jsonl`. Do not finish until apply " @@ -22,6 +25,14 @@ ) +def build_extraction_job_invoke_prompt(*, workspace_dir: str = "/workspace") -> str: + """Return the one-shot claude-code -p prompt for one extraction job run.""" + prompt = EXTRACTION_JOB_INVOKE_PROMPT + if workspace_dir != "/workspace": + prompt = prompt.replace("/workspace", workspace_dir.rstrip("/")) + return prompt + + def write_extraction_prompt_file(*, workdir: Path, prompt: str) -> Path: """Materialize the full job instructions for the agent to read from disk.""" path = workdir / EXTRACTION_PROMPT_FILENAME @@ -93,6 +104,27 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: "Use set_properties (not properties). UPDATE and DELETE require top-level id.", "Existing instances must use UPDATE with graph_id from job-context.json target_instances.", "", + "## Editing existing instances (token-efficient)", + "UPDATE merges `set_properties` into the live node — properties you omit are preserved.", + "Include only the fields you are changing in each UPDATE line; never resubmit every", + "property when a subset changed.", + "`properties_missing` lists empty ontology fields only. Populated fields you refine", + "(for example a long `description`) are not listed — fetch current values before editing.", + f"- Fetch one instance: `bash {GRAPH_READ_HELPER} search-by-slug <slug> --entity-type <Type> " + "--out mutations/current_<slug>.json`", + f"- List by type: `bash {GRAPH_READ_HELPER} instances <EntityType> --limit 100 --offset 0`", + "For surgical edits to long text: load the saved JSON, edit the target property with", + "Bash/python, then write one UPDATE line with only the changed keys in `set_properties`.", + "Generate JSONL programmatically (Write + python3); do not paste full prior text into chat.", + "Prefer UPDATE over CREATE when graph_id is present in job-context.json.", + "", + "## Graph read workflow (required before UPDATE)", + f"Use `bash {GRAPH_READ_HELPER}` (reads api_base_url and workload_token from job-context.json):", + f"- `bash {GRAPH_READ_HELPER} search-by-slug <slug> [--entity-type <Type>] [--out FILE]`", + f"- `bash {GRAPH_READ_HELPER} instances <EntityType> [--limit N] [--offset N] [--out FILE]`", + f"- `bash {GRAPH_READ_HELPER} ontology [--out FILE]`", + f"- `bash {GRAPH_READ_HELPER} authoring-guide [--out FILE]`", + "", "## Mutations workflow (required)", "This container has no Kartograph MCP tools. Use the bundled helper script:", f"- Validate: `bash {MUTATIONS_HELPER} validate mutations/<batch>.jsonl`", @@ -101,15 +133,15 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: "workload API, and writes mutations/result.json (the CI verdict artifact).", "Always validate before apply. Do not finish until apply succeeds.", "", - "Manual curl (only if helper fails): base `{api_base_url}/extraction/workloads`,", + "Manual curl (only if helpers fail): base `{api_base_url}/extraction/workloads`,", "header `X-Workload-Token: <workload_token>`, POST `/mutations/validate` or", "`/mutations/apply` with JSON body `{\"jsonl\": \"<file contents>\"}`.", "", - "Other useful GET endpoints:", + "Other useful GET endpoints (prefer workload-graph-read.sh):", "- `/schema/authoring-guide` — JSONL mutation shapes and rules", "- `/schema/ontology` — current graph schema", - "- `/graph/search?q=...` — search existing nodes", - "- `/graph/instances?entity_type=...` — list instances by type", + "- `/graph/search-by-slug?slug=...&entity_type=...` — one instance with full properties", + "- `/graph/instances?entity_type=...` — paginated instances by type", "", "## Completion", "When finished, mutations/result.json must show action=apply and operations_applied > 0.", diff --git a/src/api/extraction/infrastructure/extraction_run_orchestrator.py b/src/api/extraction/infrastructure/extraction_run_orchestrator.py index e5c256167..922ac38f3 100644 --- a/src/api/extraction/infrastructure/extraction_run_orchestrator.py +++ b/src/api/extraction/infrastructure/extraction_run_orchestrator.py @@ -13,6 +13,9 @@ from extraction.infrastructure.extraction_job_executor import ExtractionJobExecutor from extraction.domain.extraction_job import ExtractionRunStatus from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository +from management.infrastructure.extraction_baseline_updater import ( + advance_extraction_baselines_for_knowledge_graph, +) logger = logging.getLogger(__name__) @@ -207,6 +210,10 @@ async def _maybe_finish_run(self, state: _OrchestratorState) -> None: pause_requested=False, completed_at=datetime.now(UTC), ) + await advance_extraction_baselines_for_knowledge_graph( + session=session, + knowledge_graph_id=state.knowledge_graph_id, + ) await session.commit() state.stop_event.set() self._active.pop(state.knowledge_graph_id, None) diff --git a/src/api/extraction/infrastructure/openshell/cli.py b/src/api/extraction/infrastructure/openshell/cli.py index 3fd54c115..fae8071ba 100644 --- a/src/api/extraction/infrastructure/openshell/cli.py +++ b/src/api/extraction/infrastructure/openshell/cli.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import os import subprocess from typing import Sequence @@ -15,6 +16,30 @@ class OpenShellCliError(RuntimeError): """Raised when an OpenShell CLI command fails.""" +def openshell_subprocess_env() -> dict[str, str]: + """Build subprocess env so openshell uses the mounted host gateway config.""" + env = os.environ.copy() + config_home = ( + os.environ.get("KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_XDG_CONFIG_HOME", "").strip() + or os.environ.get("XDG_CONFIG_HOME", "").strip() + ) + if config_home: + env["XDG_CONFIG_HOME"] = config_home + gateway_name = ( + os.environ.get("KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_GATEWAY_NAME", "").strip() + or os.environ.get("OPENSHELL_GATEWAY", "").strip() + ) + if gateway_name: + env["OPENSHELL_GATEWAY"] = gateway_name + gateway_url = ( + os.environ.get("KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_GATEWAY_URL", "").strip() + or os.environ.get("OPENSHELL_GATEWAY_ENDPOINT", "").strip() + ) + if gateway_url: + env["OPENSHELL_GATEWAY_ENDPOINT"] = gateway_url + return env + + def redact_args(args: Sequence[str]) -> list[str]: safe: list[str] = [] for arg in args: @@ -43,6 +68,7 @@ def run_openshell( text=text, check=False, timeout=timeout, + env=openshell_subprocess_env(), ) except FileNotFoundError as exc: raise OpenShellCliError( @@ -65,6 +91,7 @@ def popen_openshell(args: Sequence[str]) -> subprocess.Popen[str]: stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, + env=openshell_subprocess_env(), ) except FileNotFoundError as exc: raise OpenShellCliError( diff --git a/src/api/extraction/infrastructure/openshell/extraction_sandbox_pool.py b/src/api/extraction/infrastructure/openshell/extraction_sandbox_pool.py new file mode 100644 index 000000000..e33227c66 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/extraction_sandbox_pool.py @@ -0,0 +1,55 @@ +"""Dedicated OpenShell sandboxes per extraction worker.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.infrastructure.openshell.sandbox import sanitize_sandbox_name +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings + +_WORKER_INDEX_RE = re.compile(r"(\d+)\s*$") + + +@dataclass(frozen=True) +class ExtractionSandboxAssignment: + """Resolved sandbox identity for one extraction job run.""" + + sandbox_name: str + slot: int | None + reuse: bool + + +def worker_index(worker_id: str | None) -> int: + """Parse worker-01 style identifiers into a 1-based index.""" + if not worker_id: + return 1 + match = _WORKER_INDEX_RE.search(worker_id.strip()) + if not match: + return 1 + return max(1, int(match.group(1))) + + +def resolve_extraction_sandbox_assignment( + job: ExtractionJobRecord, + settings: ExtractionWorkloadRuntimeSettings, +) -> ExtractionSandboxAssignment: + """One reusable OpenShell sandbox per extraction worker for the run.""" + _ = settings + worker_num = worker_index(job.worker_id) + kg_token = _knowledge_graph_token(job.knowledge_graph_id) + name = sanitize_sandbox_name( + "kartograph-extract-", + f"{kg_token}-w{worker_num:02d}", + ) + return ExtractionSandboxAssignment( + sandbox_name=name, + slot=worker_num, + reuse=True, + ) + + +def _knowledge_graph_token(knowledge_graph_id: str) -> str: + cleaned = re.sub(r"[^a-zA-Z0-9]+", "", knowledge_graph_id).lower() + return cleaned[-10:] or "kg" diff --git a/src/api/extraction/infrastructure/openshell/gateway.py b/src/api/extraction/infrastructure/openshell/gateway.py index e272a096f..e21072590 100644 --- a/src/api/extraction/infrastructure/openshell/gateway.py +++ b/src/api/extraction/infrastructure/openshell/gateway.py @@ -2,20 +2,49 @@ from __future__ import annotations -from extraction.infrastructure.openshell.cli import run_openshell +from extraction.infrastructure.openshell.cli import OpenShellCliError, run_openshell -def gateway_is_running() -> bool: +def gateway_is_registered(*, gateway_name: str) -> bool: + result = run_openshell( + ["gateway", "--gateway", gateway_name, "info"], + check=False, + ) + return result.returncode == 0 + + +def gateway_is_connected() -> bool: result = run_openshell(["status"], check=False) if result.returncode != 0: return False - return "No gateway configured" not in (result.stdout or "") + output = f"{result.stdout or ''}\n{result.stderr or ''}" + if "No gateway configured" in output: + return False + return "Connected" in output def ensure_gateway_registered(*, gateway_name: str, gateway_url: str) -> None: - """Ensure a gateway is registered without starting local podman services.""" - if gateway_is_running(): + """Verify the OpenShell gateway is registered and reachable. + + Registration and mTLS material are expected to be provisioned on the host + (systemd user service + `openshell gateway add`). Kartograph does not run + `gateway add` when a registration already exists — that path fails inside + compose when config is bind-mounted read-only or HOME differs from the host. + """ + if gateway_is_registered(gateway_name=gateway_name): + if gateway_is_connected(): + return + raise OpenShellCliError( + f"OpenShell gateway '{gateway_name}' is registered but not reachable at " + f"{gateway_url}. Ensure openshell-gateway is running on the host. For " + "compose dev, gateway.toml should include " + 'bind_address = "0.0.0.0:17670" and the API container needs ' + "XDG_CONFIG_HOME=/root/.config with the host ~/.config/openshell mount." + ) + + if gateway_is_connected(): return + run_openshell( [ "gateway", diff --git a/src/api/extraction/infrastructure/openshell/inference_env.py b/src/api/extraction/infrastructure/openshell/inference_env.py new file mode 100644 index 000000000..3758d0feb --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/inference_env.py @@ -0,0 +1,65 @@ +"""OpenShell inference.local environment for sandbox agents.""" + +from __future__ import annotations + + +def build_openshell_inference_env_script_lines( + *, + workspace_dir: str = "/sandbox", + otel_port: int | None = None, + otel_rate_file: str | None = None, +) -> list[str]: + """Return env exports for Claude Code via inference.local inside OpenShell sandboxes. + + Do not set ``CLAUDE_CODE_USE_VERTEX`` here — that makes Claude Code perform ADC + discovery inside the sandbox, which fails without mounted GCP credentials. + """ + lines = [ + "export ANTHROPIC_BASE_URL=https://inference.local", + "export ANTHROPIC_API_KEY=unused", + # Vertex via inference.local rejects beta-only fields (context_management, etc.) + # when the gateway does not forward anthropic-beta headers. + "export CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1", + f"export KARTOGRAPH_WORKSPACE={workspace_dir.rstrip('/')}", + "export DISABLE_AUTOUPDATER=1", + ] + if otel_port: + lines.extend( + [ + "export CLAUDE_CODE_ENABLE_TELEMETRY=1", + "export OTEL_METRICS_EXPORTER=otlp", + "export OTEL_LOGS_EXPORTER=otlp", + "export OTEL_EXPORTER_OTLP_PROTOCOL=http/json", + f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://10.200.0.1:{otel_port}", + "export OTEL_METRIC_EXPORT_INTERVAL=10000", + ] + ) + if otel_rate_file: + from shlex import quote + + lines.append(f"export OTEL_RATE_FILE={quote(otel_rate_file)}") + return lines + + +def insert_claude_bare_flag(agent_args: list[str]) -> list[str]: + """Insert ``--bare`` after the claude binary for OpenShell inference.local auth.""" + if not agent_args or agent_args[0] != "claude" or "--bare" in agent_args: + return agent_args + return [agent_args[0], "--bare", *agent_args[1:]] + + +def insert_vertex_compatible_effort(agent_args: list[str]) -> list[str]: + """Ensure batch extraction CLI uses a Vertex-supported effort level.""" + if not agent_args or agent_args[0] != "claude": + return agent_args + if any(arg == "--effort" for arg in agent_args): + return agent_args + bare_index = 1 + if len(agent_args) > 1 and agent_args[1] == "--bare": + bare_index = 2 + return [ + *agent_args[:bare_index], + "--effort", + "high", + *agent_args[bare_index:], + ] diff --git a/src/api/extraction/infrastructure/openshell/openshell_sticky_session_runtime_manager.py b/src/api/extraction/infrastructure/openshell/openshell_sticky_session_runtime_manager.py index 8c83eace8..f75af1c03 100644 --- a/src/api/extraction/infrastructure/openshell/openshell_sticky_session_runtime_manager.py +++ b/src/api/extraction/infrastructure/openshell/openshell_sticky_session_runtime_manager.py @@ -10,8 +10,13 @@ from extraction.infrastructure.openshell import gateway as openshell_gateway from extraction.infrastructure.openshell import sandbox as openshell_sandbox from extraction.infrastructure.openshell.audit import LoggingOpenShellRuntimeProbe, OpenShellRuntimeProbe +from extraction.infrastructure.openshell.runtime_env import apply_openshell_gateway_env +from extraction.infrastructure.openshell.vertex_provider import ensure_vertex_provider from extraction.infrastructure.runtime_session_auth import issue_runtime_auth_token -from extraction.infrastructure.vertex_runtime_env import build_vertex_container_env +from extraction.infrastructure.vertex_runtime_env import ( + OPENSHELL_GCLOUD_CONTAINER_PATH, + build_openshell_inference_container_env, +) from extraction.ports.runtime import ( IStickySessionRuntimeManager, StickySessionRuntimeBootstrap, @@ -47,17 +52,19 @@ def __init__( sticky_image: str, session_ttl: timedelta = timedelta(minutes=60), sticky_service_port: int = 8787, - container_work_mount: str = "/workspace", + container_work_mount: str = "/sandbox", vertex_project_id: str = "", vertex_region: str = "us-east5", vertex_enabled: bool = False, + gcloud_config_mount: str | None = None, + gcloud_config_container_path: str = OPENSHELL_GCLOUD_CONTAINER_PATH, agent_turn_timeout_seconds: float = 1000.0, agent_max_turns: int = 500, api_base_url: str = "http://api:8000", gateway_name: str = "kartograph", gateway_url: str = "https://localhost:17670", provider_name: str = "kartograph-gma", - runtime_host: str = "host.docker.internal", + runtime_host: str = "127.0.0.1", forward_port_base: int = 18787, policy_dir: str | None = None, policy_enforcement: str = "soft", @@ -70,6 +77,8 @@ def __init__( self._vertex_project_id = vertex_project_id self._vertex_region = vertex_region self._vertex_enabled = vertex_enabled + self._gcloud_config_mount = gcloud_config_mount + self._gcloud_config_container_path = gcloud_config_container_path.rstrip("/") self._agent_turn_timeout_seconds = agent_turn_timeout_seconds self._agent_max_turns = agent_max_turns self._api_base_url = api_base_url @@ -236,6 +245,18 @@ def _start_runtime( gateway_name=self._gateway_name, gateway_url=self._gateway_url, ) + apply_openshell_gateway_env( + gateway_name=self._gateway_name, + gateway_url=self._gateway_url, + ) + if self._vertex_enabled: + ensure_vertex_provider( + provider_name=self._provider_name, + project_id=self._vertex_project_id, + region=self._vertex_region, + gcloud_config_mount=self._gcloud_config_mount, + auth_mode="vertex", + ) sandbox_name = _sanitize_sandbox_name(session_id) forward_port = _forward_port(session_id=session_id, base=self._forward_port_base) runtime_auth_token = issue_runtime_auth_token() @@ -267,9 +288,9 @@ def _start_runtime( ) if bootstrap is not None: - openshell_sandbox.upload_path( + openshell_sandbox.upload_directory_contents( sandbox_name=sandbox_name, - local_path=bootstrap.host_session_work_dir, + local_dir=bootstrap.host_session_work_dir, dest=self._container_work_mount, ) @@ -279,6 +300,7 @@ def _start_runtime( workload="gma", policy_dir=self._policy_dir, api_host=_api_host_from_base_url(bootstrap.api_base_url if bootstrap else self._api_base_url), + vertex_region=self._vertex_region if self._vertex_enabled else None, policy_enforcement=self._policy_enforcement, probe=self._probe, ) @@ -295,12 +317,16 @@ def _start_runtime( sandbox_name=sandbox_name, env=env, command=( - "/runtime/.venv/bin/python", + "/app/.venv/bin/python", "-m", "kartograph_agent_runtime", ), ) - openshell_sandbox.start_forward(sandbox_name=sandbox_name, port=forward_port) + openshell_sandbox.start_forward( + sandbox_name=sandbox_name, + port=forward_port, + target_port=self._sticky_service_port, + ) openshell_sandbox.emit_lifecycle( sandbox_name=sandbox_name, action="started", @@ -351,12 +377,7 @@ def _build_runtime_env( } ) if self._vertex_enabled: - env.update( - build_vertex_container_env( - project_id=self._vertex_project_id, - region=self._vertex_region, - ) - ) + env.update(build_openshell_inference_container_env()) return env def _terminate_sandbox(self, lease: StickySessionRuntimeLease) -> None: diff --git a/src/api/extraction/infrastructure/openshell/policies/gma-extraction-jobs.yaml b/src/api/extraction/infrastructure/openshell/policies/gma-extraction-jobs.yaml index fa779b175..513600755 100644 --- a/src/api/extraction/infrastructure/openshell/policies/gma-extraction-jobs.yaml +++ b/src/api/extraction/infrastructure/openshell/policies/gma-extraction-jobs.yaml @@ -5,6 +5,10 @@ description: Sticky session policy for extraction-jobs graph-management mode. endpoints: - "api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/jobs/*" + - "/extraction/workloads/schema/*" - "/extraction/workloads/graph/*" diff --git a/src/api/extraction/infrastructure/openshell/policies/gma-initial-schema-design.yaml b/src/api/extraction/infrastructure/openshell/policies/gma-initial-schema-design.yaml index b377dd7f6..06c584f46 100644 --- a/src/api/extraction/infrastructure/openshell/policies/gma-initial-schema-design.yaml +++ b/src/api/extraction/infrastructure/openshell/policies/gma-initial-schema-design.yaml @@ -5,6 +5,9 @@ description: Sticky session policy for initial schema design mode. endpoints: - "api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/schema/*" - "/extraction/workloads/graph/*" diff --git a/src/api/extraction/infrastructure/openshell/policies/gma-one-off-mutations.yaml b/src/api/extraction/infrastructure/openshell/policies/gma-one-off-mutations.yaml index 6c8de4132..7c754b182 100644 --- a/src/api/extraction/infrastructure/openshell/policies/gma-one-off-mutations.yaml +++ b/src/api/extraction/infrastructure/openshell/policies/gma-one-off-mutations.yaml @@ -5,6 +5,9 @@ description: Sticky session policy for one-off graph mutations mode. endpoints: - "api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/mutations/*" - "/extraction/workloads/graph/*" diff --git a/src/api/extraction/infrastructure/openshell/policies/gma-sticky-base.yaml b/src/api/extraction/infrastructure/openshell/policies/gma-sticky-base.yaml index 2f7526e05..dc619327e 100644 --- a/src/api/extraction/infrastructure/openshell/policies/gma-sticky-base.yaml +++ b/src/api/extraction/infrastructure/openshell/policies/gma-sticky-base.yaml @@ -5,5 +5,8 @@ description: Base network policy for graph-management sticky sessions. endpoints: - "api:8000:read-write" - "inference.local:443:read-write" + - "aiplatform.googleapis.com:443:read-write" + - "*.aiplatform.googleapis.com:443:read-write" + - "oauth2.googleapis.com:443:read-write" l7_allowed_paths: - "/extraction/workloads/*" diff --git a/src/api/extraction/infrastructure/openshell/policy.py b/src/api/extraction/infrastructure/openshell/policy.py index 537d738ca..70fe5c6c4 100644 --- a/src/api/extraction/infrastructure/openshell/policy.py +++ b/src/api/extraction/infrastructure/openshell/policy.py @@ -24,6 +24,14 @@ } +def regional_vertex_ai_endpoint(*, vertex_region: str) -> str: + """OpenShell endpoint for a regional Vertex AI hostname (e.g. us-east5-aiplatform.googleapis.com).""" + region = vertex_region.strip() + if not region: + raise ValueError("vertex_region must not be empty") + return f"{region}-aiplatform.googleapis.com:443:read-write" + + def bundled_policy_dir() -> Path: return _BUNDLED_POLICY_DIR @@ -56,6 +64,7 @@ def resolve_endpoints( workload: Literal["gma", "extraction_job"] = "gma", policy_dir: str | None = None, api_host: str | None = None, + vertex_region: str | None = None, ) -> tuple[str, ...]: """Return OpenShell ``policy update --add-endpoint`` strings.""" path = resolve_policy_path(ui_mode=ui_mode, workload=workload, policy_dir=policy_dir) @@ -76,6 +85,10 @@ def resolve_endpoints( else: rewritten.append(endpoint) endpoints = rewritten + if vertex_region and vertex_region.strip(): + regional = regional_vertex_ai_endpoint(vertex_region=vertex_region) + if regional not in endpoints: + endpoints.append(regional) return tuple(endpoints) diff --git a/src/api/extraction/infrastructure/openshell/runtime_env.py b/src/api/extraction/infrastructure/openshell/runtime_env.py new file mode 100644 index 000000000..c1d69fc38 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/runtime_env.py @@ -0,0 +1,31 @@ +"""Shared OpenShell CLI environment for Kartograph subprocess invocations.""" + +from __future__ import annotations + +import os + +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings + + +def apply_openshell_gateway_env( + *, + gateway_name: str = "", + gateway_url: str = "", + xdg_config_home: str = "", +) -> None: + """Ensure openshell subprocesses use the Kartograph gateway registration.""" + if xdg_config_home.strip(): + os.environ["XDG_CONFIG_HOME"] = xdg_config_home.strip() + if gateway_name.strip(): + os.environ["OPENSHELL_GATEWAY"] = gateway_name.strip() + if gateway_url.strip(): + os.environ["OPENSHELL_GATEWAY_ENDPOINT"] = gateway_url.strip() + + +def apply_openshell_cli_env(settings: ExtractionWorkloadRuntimeSettings) -> None: + """Apply OpenShell CLI env from workload runtime settings.""" + apply_openshell_gateway_env( + gateway_name=settings.openshell_gateway_name, + gateway_url=settings.openshell_gateway_url, + xdg_config_home=settings.openshell_xdg_config_home, + ) diff --git a/src/api/extraction/infrastructure/openshell/sandbox.py b/src/api/extraction/infrastructure/openshell/sandbox.py index 85497d13f..fcc9d3e9b 100644 --- a/src/api/extraction/infrastructure/openshell/sandbox.py +++ b/src/api/extraction/infrastructure/openshell/sandbox.py @@ -2,8 +2,14 @@ from __future__ import annotations +import json +import os import re import shlex +import subprocess +import tarfile +import tempfile +import time from pathlib import Path from extraction.infrastructure.openshell.audit import ( @@ -12,10 +18,12 @@ OpenShellRuntimeProbe, OpenShellSandboxLifecycleObservation, ) -from extraction.infrastructure.openshell.cli import popen_openshell, run_openshell +from extraction.infrastructure.openshell.cli import OpenShellCliError, openshell_subprocess_env, popen_openshell, run_openshell from extraction.infrastructure.openshell.policy import resolve_endpoints, resolve_enforcement +from extraction.infrastructure.vertex_runtime_env import GCLOUD_ADC_FILENAME _CONTAINER_NAME_SAFE = re.compile(r"[^a-zA-Z0-9_.-]+") +_FAILURE_PHASES = frozenset({"Error", "Failed", "Terminating"}) def sanitize_sandbox_name(prefix: str, identifier: str) -> str: @@ -29,6 +37,48 @@ def sandbox_exists(name: str) -> bool: return result.returncode == 0 +def sandbox_phase(name: str) -> str | None: + result = run_openshell(["sandbox", "list", "-o", "json"], check=False) + if result.returncode != 0: + return None + try: + sandboxes = json.loads(result.stdout or "[]") + except json.JSONDecodeError: + return None + if not isinstance(sandboxes, list): + return None + for item in sandboxes: + if isinstance(item, dict) and item.get("name") == name: + phase = item.get("phase") + return str(phase) if phase is not None else None + return None + + +def _wait_for_sandbox_ready(*, name: str, timeout: float) -> None: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + phase = sandbox_phase(name) + if phase == "Ready": + return + if phase in _FAILURE_PHASES: + detail = run_openshell(["sandbox", "get", name], check=False) + message = (detail.stderr or detail.stdout or "").strip() or f"phase={phase}" + raise OpenShellCliError(f"sandbox {name} entered {phase}: {message}") + time.sleep(0.5) + raise OpenShellCliError(f"timed out waiting for sandbox {name} to become Ready") + + +def _terminate_create_process(proc) -> None: + if proc.poll() is not None: + return + proc.terminate() + try: + proc.wait(timeout=5.0) + except Exception: + proc.kill() + proc.wait(timeout=5.0) + + def create_sandbox( *, name: str, @@ -46,7 +96,11 @@ def create_sandbox( if provider_name: args.extend(["--provider", provider_name]) args.extend(["--from", image, "--", "sleep", "infinity"]) - run_openshell(args, timeout=300.0) + proc = popen_openshell(args) + try: + _wait_for_sandbox_ready(name=name, timeout=300.0) + finally: + _terminate_create_process(proc) def delete_sandbox(name: str) -> None: @@ -55,13 +109,244 @@ def delete_sandbox(name: str) -> None: run_openshell(["sandbox", "delete", name], check=False, timeout=120.0) +def list_sandbox_names() -> list[str]: + """Return sandbox names from the active OpenShell gateway.""" + result = run_openshell(["sandbox", "list", "-o", "json"], check=False, timeout=30.0) + if result.returncode != 0: + return [] + try: + sandboxes = json.loads(result.stdout or "[]") + except json.JSONDecodeError: + return [] + if not isinstance(sandboxes, list): + return [] + names: list[str] = [] + for item in sandboxes: + if isinstance(item, dict) and item.get("name"): + names.append(str(item["name"])) + return names + + +def delete_sandboxes_by_prefix(prefix: str) -> int: + """Delete all sandboxes whose names start with prefix. Returns count deleted.""" + deleted = 0 + for name in list_sandbox_names(): + if name.startswith(prefix): + delete_sandbox(name) + deleted += 1 + return deleted + + +def extraction_job_sandbox_name(job_id: str) -> str: + return sanitize_sandbox_name("kartograph-extract-", job_id) + + +def stop_extraction_job_sandbox(*, job_id: str) -> bool: + """Delete the OpenShell sandbox for one extraction job, if it exists.""" + name = extraction_job_sandbox_name(job_id) + if not sandbox_exists(name): + return False + delete_sandbox(name) + return True + + +def stop_extraction_job_sandboxes( + *, + job_ids: tuple[str, ...] | list[str], +) -> int: + """Delete OpenShell sandboxes for extraction jobs. Returns count deleted.""" + stopped = 0 + for job_id in job_ids: + if stop_extraction_job_sandbox(job_id=job_id): + stopped += 1 + return stopped + + def upload_path(*, sandbox_name: str, local_path: str, dest: str | None = None) -> None: args = ["sandbox", "upload", "--no-git-ignore", sandbox_name, local_path] if dest: - args.extend(["--dest", dest]) + args.append(dest) run_openshell(args, timeout=600.0) +def download_path(*, sandbox_name: str, sandbox_path: str, local_path: str) -> None: + """Download a sandbox file into a local path (parent directory is created).""" + destination = Path(local_path) + destination.parent.mkdir(parents=True, exist_ok=True) + run_openshell( + ["sandbox", "download", sandbox_name, sandbox_path, str(destination.parent)], + timeout=120.0, + ) + # OpenShell writes basename(sandbox_path) into the destination directory. + downloaded = destination.parent / Path(sandbox_path).name + if downloaded != destination: + if not downloaded.is_file(): + raise OpenShellCliError( + f"openshell download did not create expected file at {downloaded}" + ) + if destination.exists(): + destination.unlink() + downloaded.rename(destination) + + +def upload_directory_contents(*, sandbox_name: str, local_dir: str, dest: str) -> None: + """Upload directory contents into dest without nesting under the directory basename. + + ``sandbox upload`` places a directory at ``dest/<basename(local_dir)>``. Agent + runtimes expect job package files at the workspace root (``KARTOGRAPH_WORKSPACE_DIR``). + """ + source = Path(local_dir) + if not source.is_dir(): + raise ValueError(f"local_dir must be a directory: {local_dir}") + + remote_tar = f"/tmp/kartograph-upload-{sandbox_name}.tar" + with tempfile.NamedTemporaryFile(suffix=".tar", delete=False) as handle: + local_tar = Path(handle.name) + try: + with tarfile.open(local_tar, "w") as archive: + for entry in sorted(source.iterdir()): + archive.add(entry, arcname=entry.name) + upload_path(sandbox_name=sandbox_name, local_path=str(local_tar), dest=remote_tar) + extract_cmd = ( + f"mkdir -p {shlex.quote(dest)} && " + f"tar -xf {shlex.quote(remote_tar)} -C {shlex.quote(dest)} && " + f"rm -f {shlex.quote(remote_tar)}" + ) + run_openshell( + [ + "sandbox", + "exec", + "--name", + sandbox_name, + "--no-tty", + "--", + "bash", + "-lc", + extract_cmd, + ], + timeout=600.0, + ) + finally: + local_tar.unlink(missing_ok=True) + + +def download_directory_contents( + *, + sandbox_name: str, + remote_dir: str, + local_dir: Path, +) -> None: + """Download a sandbox directory tree into a local directory.""" + remote = remote_dir.rstrip("/") + remote_name = Path(remote).name + remote_parent = str(Path(remote).parent) or "/" + remote_tar = f"/tmp/kartograph-download-{sandbox_name}.tar" + + run_openshell( + [ + "sandbox", + "exec", + "--name", + sandbox_name, + "--no-tty", + "--", + "bash", + "-lc", + ( + f"test -d {shlex.quote(remote)} && " + f"tar -cf {shlex.quote(remote_tar)} -C {shlex.quote(remote_parent)} " + f"{shlex.quote(remote_name)}" + ), + ], + timeout=120.0, + ) + + with tempfile.NamedTemporaryFile(suffix=".tar", delete=False) as handle: + local_tar = Path(handle.name) + try: + download_path( + sandbox_name=sandbox_name, + sandbox_path=remote_tar, + local_path=str(local_tar), + ) + local_dir.parent.mkdir(parents=True, exist_ok=True) + with tarfile.open(local_tar, "r") as archive: + archive.extractall(local_dir) + run_openshell( + [ + "sandbox", + "exec", + "--name", + sandbox_name, + "--no-tty", + "--", + "rm", + "-f", + remote_tar, + ], + check=False, + timeout=30.0, + ) + finally: + local_tar.unlink(missing_ok=True) + + +def upload_gcloud_adc( + *, + sandbox_name: str, + host_gcloud_config_dir: str, + container_config_path: str, +) -> None: + """Upload host Application Default Credentials into an OpenShell sandbox.""" + host_adc = Path(host_gcloud_config_dir).expanduser() / GCLOUD_ADC_FILENAME + if not host_adc.is_file(): + raise OpenShellCliError( + f"Google ADC not found at {host_adc}. " + "Run `gcloud auth application-default login` on the host, or set " + "KARTOGRAPH_GCLOUD_CONFIG_MOUNT to your ~/.config/gcloud directory." + ) + + remote_dir = container_config_path.rstrip("/") + remote_adc = f"{remote_dir}/{GCLOUD_ADC_FILENAME}" + prepare_cmd = ( + f"mkdir -p {shlex.quote(remote_dir)} && " + f"chmod 755 {shlex.quote(remote_dir)}" + ) + run_openshell( + [ + "sandbox", + "exec", + "--name", + sandbox_name, + "--no-tty", + "--", + "bash", + "-lc", + prepare_cmd, + ], + timeout=60.0, + ) + upload_path(sandbox_name=sandbox_name, local_path=str(host_adc), dest=remote_adc) + run_openshell( + [ + "sandbox", + "exec", + "--name", + sandbox_name, + "--no-tty", + "--", + "bash", + "-lc", + f"chmod a+r {shlex.quote(remote_adc)}", + ], + timeout=60.0, + ) + + +_GMA_POLICY_BINARIES = ("/app/.venv/bin/python",) +_EXTRACTION_POLICY_BINARIES = ("/usr/local/bin/claude", "/usr/bin/opencode") + + def apply_policy( *, sandbox_name: str, @@ -69,7 +354,9 @@ def apply_policy( workload: str = "gma", policy_dir: str | None = None, api_host: str | None = None, + vertex_region: str | None = None, policy_enforcement: str = "soft", + policy_binaries: tuple[str, ...] | None = None, probe: OpenShellRuntimeProbe | None = None, ) -> None: endpoints = resolve_endpoints( @@ -77,6 +364,7 @@ def apply_policy( workload=workload, # type: ignore[arg-type] policy_dir=policy_dir, api_host=api_host, + vertex_region=vertex_region, ) enforcement = resolve_enforcement( ui_mode=ui_mode, @@ -87,13 +375,17 @@ def apply_policy( if not endpoints: return + binaries = policy_binaries + if binaries is None: + binaries = _EXTRACTION_POLICY_BINARIES if workload == "extraction_job" else _GMA_POLICY_BINARIES + args = [ "policy", "update", "--wait", - "--binary", - "/runtime/.venv/bin/python", ] + for binary in binaries: + args.extend(["--binary", binary]) if enforcement == "hard_requirement": args.extend(["--enforcement", "hard_requirement"]) for endpoint in endpoints: @@ -114,10 +406,52 @@ def apply_policy( ) -def start_forward(*, sandbox_name: str, port: int) -> None: - run_openshell( - ["forward", "start", str(port), sandbox_name, "-d"], - timeout=30.0, +def _forwards_state_dir() -> Path: + config_home = ( + os.environ.get("KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_XDG_CONFIG_HOME", "").strip() + or os.environ.get("XDG_CONFIG_HOME", "").strip() + or str(Path.home() / ".config") + ) + return Path(config_home) / "openshell" / "forwards" + + +def _ensure_forwards_state_dir_writable() -> None: + forwards_dir = _forwards_state_dir() + try: + forwards_dir.mkdir(parents=True, exist_ok=True) + except OSError as exc: + raise OpenShellCliError( + f"OpenShell forwards state directory is not writable: {forwards_dir}. " + "When the API runs in compose, mount a writable volume at " + "/root/.config/openshell/forwards (see compose.dev.yaml)." + ) from exc + if not os.access(forwards_dir, os.W_OK): + raise OpenShellCliError( + f"OpenShell forwards state directory is read-only: {forwards_dir}. " + "openshell forward start -d hangs when it cannot write PID files. " + "Mount a writable volume at /root/.config/openshell/forwards." + ) + + +def start_forward(*, sandbox_name: str, port: int, target_port: int = 8787) -> None: + """Forward a local port to the agent runtime listening inside the sandbox.""" + _ensure_forwards_state_dir_writable() + command = [ + "openshell", + "forward", + "service", + sandbox_name, + "--target-port", + str(target_port), + "--local", + str(port), + ] + subprocess.Popen( + command, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + env=openshell_subprocess_env(), + start_new_session=True, ) @@ -196,3 +530,11 @@ def exec_streaming(*, sandbox_name: str, command: list[str]): return popen_openshell( ["sandbox", "exec", "--name", sandbox_name, "--no-tty", "--", *command] ) + + +def run_sandbox_exec(*, sandbox_name: str, command: list[str]) -> None: + """Run a command inside a sandbox and raise on failure.""" + run_openshell( + ["sandbox", "exec", "--name", sandbox_name, "--no-tty", "--", *command], + timeout=120.0, + ) diff --git a/src/api/extraction/infrastructure/openshell/vertex_provider.py b/src/api/extraction/infrastructure/openshell/vertex_provider.py new file mode 100644 index 000000000..ee165b118 --- /dev/null +++ b/src/api/extraction/infrastructure/openshell/vertex_provider.py @@ -0,0 +1,140 @@ +"""Ensure OpenShell google-vertex-ai provider for Vertex-backed agent sandboxes.""" + +from __future__ import annotations + +import json +import os +from contextlib import contextmanager +from pathlib import Path +from typing import Iterator, Literal + +from extraction.infrastructure.openshell.cli import OpenShellCliError, run_openshell + +AuthMode = Literal["vertex", "api-key"] + + +def _adc_path(*, gcloud_config_mount: str | None) -> Path: + if gcloud_config_mount: + return Path(gcloud_config_mount).expanduser() / "application_default_credentials.json" + return Path.home() / ".config" / "gcloud" / "application_default_credentials.json" + + +@contextmanager +def _home_for_adc(*, gcloud_config_mount: str | None) -> Iterator[None]: + """Point HOME at the mounted gcloud directory so ``--from-gcloud-adc`` resolves ADC.""" + if not gcloud_config_mount: + yield + return + home = str(Path(gcloud_config_mount).expanduser().resolve().parent.parent) + previous = os.environ.get("HOME") + os.environ["HOME"] = home + try: + yield + finally: + if previous is None: + os.environ.pop("HOME", None) + else: + os.environ["HOME"] = previous + + +def provider_exists(*, provider_name: str) -> bool: + result = run_openshell(["provider", "get", provider_name], check=False, timeout=15.0) + return result.returncode == 0 + + +def _adc_credential_type(*, gcloud_config_mount: str | None) -> str: + adc = _adc_path(gcloud_config_mount=gcloud_config_mount) + if not adc.is_file(): + return "unknown" + try: + data = json.loads(adc.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return "unknown" + return str(data.get("type", "unknown")) + + +def ensure_vertex_provider( + *, + provider_name: str, + project_id: str, + region: str, + gcloud_config_mount: str | None = None, + auth_mode: AuthMode = "vertex", + model: str = "", +) -> None: + """Create or reuse an OpenShell provider that injects Vertex credentials into sandboxes.""" + if provider_exists(provider_name=provider_name): + if auth_mode == "vertex": + ensure_inference_routing(provider_name=provider_name, model=model) + return + + if auth_mode == "api-key": + api_key = os.environ.get("ANTHROPIC_API_KEY", "").strip() + if not api_key: + raise OpenShellCliError( + "ANTHROPIC_API_KEY is required for OpenShell anthropic provider auth mode" + ) + run_openshell( + [ + "provider", + "create", + "--name", + provider_name, + "--type", + "anthropic", + "--credential", + "ANTHROPIC_API_KEY", + ], + timeout=60.0, + ) + return + + adc = _adc_path(gcloud_config_mount=gcloud_config_mount) + if not adc.is_file(): + raise OpenShellCliError( + f"Google ADC not found at {adc}. Run `gcloud auth application-default login` " + "on the host or set KARTOGRAPH_GCLOUD_CONFIG_MOUNT." + ) + + cred_type = _adc_credential_type(gcloud_config_mount=gcloud_config_mount) + if cred_type not in {"authorized_user", "service_account"}: + raise OpenShellCliError( + f"Unsupported ADC credential type {cred_type!r} at {adc}. " + "Expected authorized_user or service_account." + ) + + args = [ + "provider", + "create", + "--name", + provider_name, + "--type", + "google-vertex-ai", + "--from-gcloud-adc", + ] + if project_id.strip(): + args.extend(["--config", f"VERTEX_AI_PROJECT_ID={project_id.strip()}"]) + if region.strip(): + args.extend(["--config", f"VERTEX_AI_REGION={region.strip()}"]) + + with _home_for_adc(gcloud_config_mount=gcloud_config_mount): + run_openshell(args, timeout=60.0) + if auth_mode == "vertex": + ensure_inference_routing(provider_name=provider_name, model=model) + + +def ensure_inference_routing(*, provider_name: str, model: str) -> None: + """Point sandbox inference.local at the configured Vertex provider.""" + resolved_model = model.strip() or "claude-opus-4-6" + run_openshell( + [ + "inference", + "set", + "--provider", + provider_name, + "--model", + resolved_model, + "--no-verify", + ], + timeout=60.0, + ) diff --git a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py index d03523f57..ac525359d 100644 --- a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py @@ -1,4 +1,4 @@ -"""Run extraction jobs inside OpenShell sandboxes.""" +"""Run extraction jobs inside OpenShell sandboxes via agentic-ci patterns.""" from __future__ import annotations @@ -24,18 +24,32 @@ format_claude_code_stream_line, ) from extraction.infrastructure.extraction_job_metrics import merge_extraction_job_metrics +from extraction.infrastructure.extraction_job_mutation_metrics import ( + reconcile_mutation_metrics, +) from extraction.infrastructure.extraction_job_prompt import ( - EXTRACTION_JOB_INVOKE_PROMPT, + build_extraction_job_invoke_prompt, build_extraction_job_prompt, write_extraction_prompt_file, ) from extraction.infrastructure.extraction_job_verdict import require_successful_apply +from extraction.infrastructure.extraction_job_workdir_layout import mutation_result_path from extraction.infrastructure.extraction_job_workdir_materializer import ( ExtractionJobWorkdirMaterializer, ) -from extraction.infrastructure.openshell import gateway as openshell_gateway +from extraction.infrastructure.openshell.extraction_sandbox_pool import ( + resolve_extraction_sandbox_assignment, +) from extraction.infrastructure.openshell import sandbox as openshell_sandbox from extraction.infrastructure.openshell.audit import LoggingOpenShellRuntimeProbe +from extraction.infrastructure.openshell.inference_env import ( + build_openshell_inference_env_script_lines, + insert_claude_bare_flag, + insert_vertex_compatible_effort, +) +from extraction.infrastructure.openshell.cli import OpenShellCliError +from extraction.infrastructure.openshell.runtime_env import apply_openshell_cli_env +from extraction.infrastructure.openshell.vertex_provider import ensure_vertex_provider from extraction.infrastructure.workload_runtime_factory import get_workload_credential_issuer from extraction.infrastructure.workload_runtime_settings import ( ExtractionWorkloadRuntimeSettings, @@ -43,11 +57,7 @@ ) from extraction.ports.extraction_job_runner import IExtractionJobRunner - -def _strip_harness_binary(command: list[str]) -> list[str]: - if command and command[0] in {"claude", "opencode"}: - return command[1:] - return command +_AGENTIC_CI_ENV_SCRIPT = "/tmp/.agentic-ci-env.sh" def _patch_job_context_api_base(workdir: Path, api_base_url: str) -> None: @@ -92,7 +102,7 @@ async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, An tenant_id=tenant_id, credentials=credentials, ) - _patch_job_context_api_base(workdir, self._settings.api_base_url) + _patch_job_context_api_base(workdir, self._settings.sandbox_reachable_api_base_url()) prompt = build_extraction_job_prompt(job=job) return await self._run_in_sandbox(job=job, workdir=workdir, prompt=prompt) @@ -113,7 +123,8 @@ def _run_in_sandbox_sync( workdir: Path, prompt: str, ) -> dict[str, Any]: - sandbox_name = openshell_sandbox.sanitize_sandbox_name("kartograph-extract-", job.job_id) + assignment = resolve_extraction_sandbox_assignment(job, self._settings) + sandbox_name = assignment.sandbox_name run_dir = tempfile.mkdtemp(prefix="kartograph-openshell-") otel_proc = None otel_log: Path | None = None @@ -123,47 +134,89 @@ def _run_in_sandbox_sync( gateway_name=self._settings.openshell_gateway_name, gateway_url=self._settings.openshell_gateway_url, ) - openshell_sandbox.delete_sandbox(sandbox_name) - openshell_sandbox.create_sandbox( - name=sandbox_name, - image=self._settings.agentic_ci_image, - provider_name=self._settings.openshell_provider_name, - ) - openshell_sandbox.emit_lifecycle( - sandbox_name=sandbox_name, - action="created", - probe=self._probe, - image=self._settings.agentic_ci_image, - job_id=job.job_id, - ) - openshell_sandbox.upload_path( - sandbox_name=sandbox_name, - local_path=str(workdir), - dest="/workspace", - ) - openshell_sandbox.apply_policy( + apply_openshell_cli_env(self._settings) + if self._settings.vertex_enabled(): + ensure_vertex_provider( + provider_name=self._settings.openshell_provider_name, + project_id=self._settings.vertex_project_id, + region=self._settings.vertex_region, + gcloud_config_mount=self._settings.gcloud_config_mount, + auth_mode="vertex", + model=self._resolve_model(), + ) + sandbox_image = self._settings.openshell_extraction_sandbox_image() + created = False + if assignment.reuse and openshell_sandbox.sandbox_exists(sandbox_name): + openshell_sandbox.emit_lifecycle( + sandbox_name=sandbox_name, + action="reused", + probe=self._probe, + image=sandbox_image, + job_id=job.job_id, + ) + else: + openshell_sandbox.delete_sandbox(sandbox_name) + openshell_sandbox.create_sandbox( + name=sandbox_name, + image=sandbox_image, + provider_name=self._settings.openshell_provider_name, + ) + openshell_sandbox.emit_lifecycle( + sandbox_name=sandbox_name, + action="created", + probe=self._probe, + image=sandbox_image, + job_id=job.job_id, + ) + created = True + work_mount = self._settings.openshell_container_work_mount + if assignment.reuse and not created: + self._reset_sandbox_workspace( + sandbox_name=sandbox_name, + work_mount=work_mount, + ) + write_extraction_prompt_file(workdir=workdir, prompt=prompt) + openshell_sandbox.upload_directory_contents( sandbox_name=sandbox_name, - workload="extraction_job", - policy_dir=self._settings.openshell_policy_dir or None, - api_host=_api_host_from_base_url(self._settings.api_base_url), - policy_enforcement=self._settings.openshell_policy_enforcement, - probe=self._probe, + local_dir=str(workdir), + dest=work_mount, ) + if created or not assignment.reuse: + openshell_sandbox.apply_policy( + sandbox_name=sandbox_name, + workload="extraction_job", + policy_dir=self._settings.openshell_policy_dir or None, + api_host=_api_host_from_base_url(self._settings.sandbox_reachable_api_base_url()), + vertex_region=( + self._settings.vertex_region + if self._settings.vertex_enabled() + else None + ), + policy_enforcement=self._settings.openshell_policy_enforcement, + probe=self._probe, + ) - otel_proc, otel_port, otel_log_path, _otel_rate = otel.start_collector(run_dir) + otel_proc, otel_port, otel_log_path, otel_rate_file = otel.start_collector(run_dir) otel_log = Path(otel_log_path) - write_extraction_prompt_file(workdir=workdir, prompt=prompt) model = self._resolve_model() - command = _strip_harness_binary( - self._harness.build_args(EXTRACTION_JOB_INVOKE_PROMPT, model) - ) - env_script = self._build_env_script(model=model, otel_port=otel_port) + invoke_prompt = build_extraction_job_invoke_prompt(workspace_dir=work_mount) log_path = activity_log_path(workdir) - append_activity_line(log_path, f"📡 Processing job {job.job_id} in OpenShell sandbox...") + slot_note = ( + f" (worker sandbox {assignment.slot})" + if assignment.slot is not None + else "" + ) + append_activity_line( + log_path, + f"📡 Processing job {job.job_id} on {job.worker_id or 'worker'} " + f"in OpenShell sandbox {sandbox_name}{slot_note}...", + ) rc = self._run_agent( sandbox_name=sandbox_name, - env_script=env_script, - command=command, + model=model, + otel_port=otel_port, + otel_rate_file=otel_rate_file, + invoke_prompt=invoke_prompt, timeout_seconds=self._settings.agentic_ci_timeout_seconds, activity_log_path=log_path, ) @@ -171,15 +224,18 @@ def _run_in_sandbox_sync( if otel_proc is not None: otel.stop_collector(otel_proc) otel_proc = None - metrics = merge_extraction_job_metrics( - otel_log=otel_log, - workdir=workdir, - activity_log=log_path, - ) if rc != 0: + detail = self._read_activity_log_tail(log_path) raise RuntimeError( - f"OpenShell extraction sandbox exited with code {rc} for job {job.job_id}" + "OpenShell extraction sandbox exited with code " + f"{rc} for job {job.job_id}" + + (f": {detail}" if detail else "") ) + self._sync_mutation_artifacts_from_sandbox( + sandbox_name=sandbox_name, + workdir=workdir, + work_mount=work_mount, + ) verdict = require_successful_apply(workdir) append_activity_message( log_path, @@ -189,18 +245,32 @@ def _run_in_sandbox_sync( "via workload API." ), ) + metrics = merge_extraction_job_metrics( + otel_log=otel_log, + workdir=workdir, + activity_log=log_path, + ) + metrics = reconcile_mutation_metrics( + metrics, + workdir=workdir, + operations_applied=verdict.operations_applied, + ) metrics["operations_applied"] = verdict.operations_applied + if assignment.slot is not None: + metrics["sandbox_slot"] = assignment.slot + metrics["sandbox_name"] = sandbox_name return metrics finally: if otel_proc is not None: otel.stop_collector(otel_proc) - openshell_sandbox.delete_sandbox(sandbox_name) - openshell_sandbox.emit_lifecycle( - sandbox_name=sandbox_name, - action="deleted", - probe=self._probe, - job_id=job.job_id, - ) + if not assignment.reuse: + openshell_sandbox.delete_sandbox(sandbox_name) + openshell_sandbox.emit_lifecycle( + sandbox_name=sandbox_name, + action="deleted", + probe=self._probe, + job_id=job.job_id, + ) def _resolve_model(self) -> str: configured = self._settings.agentic_ci_model.strip() @@ -212,30 +282,131 @@ def _resolve_model(self) -> str: return from_env return self._harness.default_model() - def _build_env_script(self, *, model: str, otel_port: int) -> str: - lines = self._harness.build_env_script_lines(otel_port, None) + @staticmethod + def _reset_sandbox_workspace(*, sandbox_name: str, work_mount: str) -> None: + """Clear the prior job workspace before uploading the next job package.""" + quoted = shlex.quote(work_mount.rstrip("/")) + openshell_sandbox.run_sandbox_exec( + sandbox_name=sandbox_name, + command=[ + "bash", + "-lc", + f"mkdir -p {quoted} && find {quoted} -mindepth 1 -maxdepth 1 -exec rm -rf -- {{}} +", + ], + ) + + @staticmethod + def _read_activity_log_tail(log_path: Path, *, max_lines: int = 8) -> str: + if not log_path.is_file(): + return "" + lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines() + tail = [line.strip() for line in lines[-max_lines:] if line.strip()] + return " | ".join(tail) + + @staticmethod + def _sync_mutation_artifacts_from_sandbox( + *, + sandbox_name: str, + workdir: Path, + work_mount: str, + ) -> None: + """Copy mutations/ artifacts from the sandbox back to the host workdir.""" + remote_mutations = f"{work_mount.rstrip('/')}/mutations" + try: + openshell_sandbox.download_directory_contents( + sandbox_name=sandbox_name, + remote_dir=remote_mutations, + local_dir=workdir, + ) + except OpenShellCliError: + if mutation_result_path(workdir).is_file(): + return + openshell_sandbox.download_path( + sandbox_name=sandbox_name, + sandbox_path=f"{remote_mutations}/result.json", + local_path=str(mutation_result_path(workdir)), + ) + + def _write_env_script_in_sandbox( + self, + *, + sandbox_name: str, + model: str, + otel_port: int, + otel_rate_file: str | None, + ) -> None: + """Upload env script into the sandbox (agentic-ci OpenShellBackend pattern).""" + if self._settings.vertex_enabled() and self._harness.auth_mode == "vertex": + lines = build_openshell_inference_env_script_lines( + workspace_dir=self._settings.openshell_container_work_mount, + otel_port=otel_port, + otel_rate_file=otel_rate_file, + ) + else: + lines = self._harness.build_env_script_lines(otel_port, otel_rate_file) lines.append("export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1") lines.append(f"export AGENT_MODEL={shlex.quote(model)}") - lines.append("cd /workspace") - return "\n".join(lines) + "\n" + script = "\n".join(lines) + "\n" + + with tempfile.NamedTemporaryFile( + mode="w", + prefix="agentic-ci-env-", + suffix=".sh", + delete=False, + ) as handle: + handle.write(script) + local_path = handle.name + + try: + openshell_sandbox.upload_path( + sandbox_name=sandbox_name, + local_path=local_path, + ) + remote_name = Path(local_path).name + openshell_sandbox.run_sandbox_exec( + sandbox_name=sandbox_name, + command=[ + "bash", + "-c", + f"mv {shlex.quote(remote_name)} {_AGENTIC_CI_ENV_SCRIPT}", + ], + ) + finally: + Path(local_path).unlink(missing_ok=True) def _run_agent( self, *, sandbox_name: str, - env_script: str, - command: list[str], + model: str, + otel_port: int, + otel_rate_file: str | None, + invoke_prompt: str, timeout_seconds: int, activity_log_path: Path, ) -> int: - shell = ( - f"cat > /tmp/.kartograph-env.sh <<'EOF'\n{env_script}EOF\n" - f". /tmp/.kartograph-env.sh && exec {' '.join(shlex.quote(part) for part in command)}" + self._write_env_script_in_sandbox( + sandbox_name=sandbox_name, + model=model, + otel_port=otel_port, + otel_rate_file=otel_rate_file, ) + agent_args = self._harness.build_args(invoke_prompt, model) + if self._settings.vertex_enabled() and self._harness.auth_mode == "vertex": + agent_args = insert_claude_bare_flag(agent_args) + agent_args = insert_vertex_compatible_effort(agent_args) + work_mount = shlex.quote(self._settings.openshell_container_work_mount) + cmd = [ + "bash", + "-c", + f". {_AGENTIC_CI_ENV_SCRIPT} && cd {work_mount} && exec \"$@\"", + "--", + *agent_args, + ] started = time.monotonic() proc = openshell_sandbox.exec_streaming( sandbox_name=sandbox_name, - command=["bash", "-lc", shell], + command=cmd, ) captured_tail: list[str] = [] stream_log_path = activity_log_path.parent / "agent_stream.jsonl" diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index cc45adcc4..876b3fcd0 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -324,7 +324,10 @@ async def list_recent_jobs( ) -> list[ExtractionJobRecord]: stmt = ( select(ExtractionJobModel) - .where(ExtractionJobModel.knowledge_graph_id == knowledge_graph_id) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status != ExtractionJobStatus.ARCHIVED.value, + ) .order_by( ExtractionJobModel.updated_at.desc(), ExtractionJobModel.order_index.asc(), @@ -334,6 +337,58 @@ async def list_recent_jobs( result = await self._session.execute(stmt) return [_job_model_to_record(model) for model in result.scalars().all()] + async def list_jobs_by_status( + self, + *, + knowledge_graph_id: str, + status: ExtractionJobStatus, + limit: int = 10_000, + ) -> list[ExtractionJobRecord]: + stmt = ( + select(ExtractionJobModel) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.status == status.value, + ) + .order_by( + ExtractionJobModel.completed_at.desc().nullslast(), + ExtractionJobModel.order_index.asc(), + ) + .limit(limit) + ) + result = await self._session.execute(stmt) + return [_job_model_to_record(model) for model in result.scalars().all()] + + async def promote_completed_job_to_archived( + self, + *, + knowledge_graph_id: str, + job_id: str, + metrics: dict[str, Any], + ) -> bool: + now = datetime.now(UTC) + values: dict[str, Any] = { + "status": ExtractionJobStatus.ARCHIVED.value, + "archived_at": now, + "entities_created": int(metrics.get("entities_created", 0)), + "entities_modified": int(metrics.get("entities_modified", 0)), + "relationships_created": int(metrics.get("relationships_created", 0)), + "relationships_modified": int(metrics.get("relationships_modified", 0)), + } + applied_jsonl = metrics.get("applied_mutations_jsonl") + if isinstance(applied_jsonl, str) and applied_jsonl.strip(): + values["applied_mutations_jsonl"] = applied_jsonl + result = await self._session.execute( + update(ExtractionJobModel) + .where( + ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, + ExtractionJobModel.job_id == job_id, + ExtractionJobModel.status == ExtractionJobStatus.COMPLETED.value, + ) + .values(**values) + ) + return int(result.rowcount or 0) > 0 + async def list_active_workers(self, *, knowledge_graph_id: str) -> list[dict[str, Any]]: stmt = select(ExtractionJobModel).where( ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, diff --git a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py index ede499e33..a53984dd4 100644 --- a/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py +++ b/src/api/extraction/infrastructure/sticky_session_bootstrap_builder.py @@ -83,6 +83,6 @@ async def build( tenant_id=tenant_id, credentials=credentials, host_session_work_dir=str(host_session_work_dir), - api_base_url=self._runtime_settings.api_base_url, + api_base_url=self._runtime_settings.sandbox_reachable_api_base_url(), ui_mode=ui_mode, ) \ No newline at end of file diff --git a/src/api/extraction/infrastructure/vertex_runtime_env.py b/src/api/extraction/infrastructure/vertex_runtime_env.py index 82d0431b7..2c7b4c646 100644 --- a/src/api/extraction/infrastructure/vertex_runtime_env.py +++ b/src/api/extraction/infrastructure/vertex_runtime_env.py @@ -8,6 +8,8 @@ GCLOUD_ADC_FILENAME = "application_default_credentials.json" DEFAULT_GCLOUD_CONTAINER_PATH = "/gcloud/config" +# OpenShell sandboxes cannot bind-mount host gcloud config; upload ADC under /tmp instead. +OPENSHELL_GCLOUD_CONTAINER_PATH = "/tmp/kartograph-gcloud" def is_truthy_env(value: str | None) -> bool: @@ -36,6 +38,19 @@ def build_vertex_container_env( return env +def build_openshell_inference_container_env() -> dict[str, str]: + """Route Claude Code through OpenShell inference.local (host holds Vertex ADC). + + Do not set ``CLAUDE_CODE_USE_VERTEX`` — that triggers metadata-server ADC + inside the sandbox, which OpenShell blocks for SSRF hardening. + """ + return { + "ANTHROPIC_BASE_URL": "https://inference.local", + "ANTHROPIC_API_KEY": "unused", + "CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS": "1", + } + + def build_gcloud_adc_env(*, container_config_path: str) -> dict[str, str]: """Env vars so Google client libraries find ADC inside extraction containers.""" base = container_config_path.rstrip("/") diff --git a/src/api/extraction/infrastructure/workload_runtime_factory.py b/src/api/extraction/infrastructure/workload_runtime_factory.py index d989293f4..970f947af 100644 --- a/src/api/extraction/infrastructure/workload_runtime_factory.py +++ b/src/api/extraction/infrastructure/workload_runtime_factory.py @@ -82,13 +82,15 @@ def create_sticky_session_runtime_manager( sticky_image=resolved.sticky_image, session_ttl=timedelta(minutes=resolved.session_ttl_minutes), sticky_service_port=resolved.sticky_service_port, - container_work_mount=resolved.container_work_mount, + container_work_mount=resolved.openshell_container_work_mount, vertex_project_id=resolved.vertex_project_id, vertex_region=resolved.vertex_region, vertex_enabled=resolved.vertex_enabled(), + gcloud_config_mount=resolved.gcloud_config_mount, + gcloud_config_container_path=resolved.openshell_gcloud_container_path, agent_turn_timeout_seconds=resolved.sticky_turn_timeout_seconds, agent_max_turns=resolved.sticky_max_turns, - api_base_url=resolved.api_base_url, + api_base_url=resolved.sandbox_reachable_api_base_url(), gateway_name=resolved.openshell_gateway_name, gateway_url=resolved.openshell_gateway_url, provider_name=resolved.openshell_provider_name, diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index bb485f2b3..37b0721b9 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -9,7 +9,10 @@ from pydantic import Field, field_validator, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict -from extraction.infrastructure.vertex_runtime_env import vertex_enabled_from_env +from extraction.infrastructure.vertex_runtime_env import ( + OPENSHELL_GCLOUD_CONTAINER_PATH, + vertex_enabled_from_env, +) class ExtractionWorkloadRuntimeSettings(BaseSettings): @@ -50,9 +53,29 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): worker_command: tuple[str, ...] = Field(default=("sleep", "3600")) sticky_service_port: int = Field(default=8787, ge=1024, le=65535) container_work_mount: str = Field(default="/workspace") + openshell_container_work_mount: str = Field( + default="/sandbox", + description=( + "In-sandbox workspace path for OpenShell backends. " + "Must be writable under OpenShell Landlock defaults (/sandbox, /tmp)." + ), + ) + openshell_gcloud_container_path: str = Field( + default=OPENSHELL_GCLOUD_CONTAINER_PATH, + description=( + "In-sandbox path for uploaded gcloud ADC when using OpenShell backends." + ), + ) session_ttl_minutes: int = Field(default=60, ge=1, le=24 * 60) job_package_work_dir: str = Field(default="/tmp/kartograph/job_packages") api_base_url: str = Field(default="http://api:8000") + openshell_api_base_url: str = Field( + default="http://host.docker.internal:8000", + description=( + "API base URL reachable from OpenShell sandboxes on the host. " + "Docker service names like api:8000 do not resolve outside the compose network." + ), + ) workload_token_signing_key: str = Field( default="", description=( @@ -80,6 +103,13 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): default=("/tmp:rw,noexec,nosuid,size=512m",), ) openshell_gateway_name: str = Field(default="openshell") + openshell_xdg_config_home: str = Field( + default="", + description=( + "XDG config root for openshell CLI (compose dev: /root/.config when " + "host ~/.config/openshell is mounted there)." + ), + ) openshell_gateway_url: str = Field( default="https://127.0.0.1:17670", description=( @@ -87,12 +117,25 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): "Use https://host.docker.internal:17670 when the API runs inside compose." ), ) - openshell_provider_name: str = Field(default="kartograph-gma") + openshell_provider_name: str = Field( + default="kartograph-gma", + description=( + "OpenShell google-vertex-ai provider shared by GMA sticky sessions and " + "batch extraction jobs. Injects Vertex credentials into sandboxes." + ), + ) + openshell_extraction_image: str = Field( + default="quay.io/aipcc/agentic-ci/claude-sandbox:latest", + description=( + "OpenShell sandbox image for batch extraction jobs. " + "Must include the sandbox user and /usr/local/bin/claude (agentic-ci claude-sandbox)." + ), + ) openshell_runtime_host: str = Field( - default="host.docker.internal", + default="127.0.0.1", description=( "Host reachable from the API process for OpenShell port forwards. " - "Use host.docker.internal in compose dev; 127.0.0.1 when API runs on host." + "Use 127.0.0.1 when the OpenShell CLI runs in the same process/container as the API." ), ) openshell_forward_port_base: int = Field(default=18787, ge=1024, le=65000) @@ -108,6 +151,16 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): def vertex_enabled(self) -> bool: return vertex_enabled_from_env() + def sandbox_reachable_api_base_url(self) -> str: + """API URL workload sandboxes use for Kartograph workload endpoints.""" + if self.backend == "openshell" or self.job_runner == "openshell": + return self.openshell_api_base_url.rstrip("/") + return self.api_base_url.rstrip("/") + + def openshell_extraction_sandbox_image(self) -> str: + """Container image for OpenShell batch extraction sandboxes (agentic-ci claude-sandbox).""" + return self.openshell_extraction_image + @model_validator(mode="after") def _apply_vertex_env_aliases(self) -> "ExtractionWorkloadRuntimeSettings": if self.job_runner is None: diff --git a/src/api/extraction/presentation/routes.py b/src/api/extraction/presentation/routes.py index 8d3be0186..3ace82ae8 100644 --- a/src/api/extraction/presentation/routes.py +++ b/src/api/extraction/presentation/routes.py @@ -85,7 +85,7 @@ async def get_active_session( graph_management_ui_mode: GraphManagementUiMode, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[ - ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) + ExtractionAgentSessionService, Depends(get_extraction_agent_session_service_with_runtime) ], authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], ) -> ExtractionSessionResponse: @@ -118,7 +118,7 @@ async def start_session( request: GraphManagementSessionRequest, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[ - ExtractionAgentSessionService, Depends(get_extraction_agent_session_service) + ExtractionAgentSessionService, Depends(get_extraction_agent_session_service_with_runtime) ], authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], ) -> ExtractionSessionResponse: diff --git a/src/api/infrastructure/management/extraction_jobs_service.py b/src/api/infrastructure/management/extraction_jobs_service.py index 9f37304e4..371c989c3 100644 --- a/src/api/infrastructure/management/extraction_jobs_service.py +++ b/src/api/infrastructure/management/extraction_jobs_service.py @@ -16,8 +16,7 @@ projected_job_count, ) from extraction.infrastructure.extraction_job_container import ( - stop_extraction_job_container, - stop_extraction_job_containers, + stop_extraction_job_runtimes, ) from extraction.infrastructure.extraction_run_orchestrator import get_extraction_run_orchestrator from extraction.domain.extraction_job import ExtractionJobStatus, ExtractionRunStatus @@ -257,10 +256,12 @@ async def _stop_in_progress_containers(self, *, kg_id: str) -> int: ) if not job_ids: return 0 - return stop_extraction_job_containers( + containers_stopped, sandboxes_stopped = stop_extraction_job_runtimes( job_ids=job_ids, container_engine=runtime_settings.container_engine, + openshell_backend=runtime_settings.backend == "openshell", ) + return containers_stopped + sandboxes_stopped async def cancel_job( self, @@ -300,9 +301,11 @@ async def cancel_job( "Use Reset Failed or Reset All Jobs to re-queue finished jobs." ) - stop_extraction_job_container( - job_id=job_id, + runtime_settings = get_extraction_workload_runtime_settings() + containers_stopped, sandboxes_stopped = stop_extraction_job_runtimes( + job_ids=(job_id,), container_engine=runtime_settings.container_engine, + openshell_backend=runtime_settings.backend == "openshell", ) await self._extraction_job_repository.mark_job_failed( knowledge_graph_id=kg_id, @@ -310,9 +313,17 @@ async def cancel_job( error_message="Cancelled by operator", ) await self._session.commit() + runtime_bits: list[str] = [] + if containers_stopped: + runtime_bits.append(f"{containers_stopped} container(s)") + if sandboxes_stopped: + runtime_bits.append(f"{sandboxes_stopped} OpenShell sandbox(es)") + runtime_detail = ( + " and ".join(runtime_bits) if runtime_bits else "no active runtime resources" + ) return { "success": True, - "message": f"Cancelled running job {job_id} and stopped its container.", + "message": f"Cancelled running job {job_id} and stopped {runtime_detail}.", } async def get_database_status( @@ -439,7 +450,8 @@ async def get_extraction_run_state( "workerCount": 0, "pauseRequested": False, } - return { + runtime_settings = get_extraction_workload_runtime_settings() + payload = { "live": live or run.status in {ExtractionRunStatus.RUNNING, ExtractionRunStatus.PAUSING}, "status": run.status.value, "workerCount": run.worker_count, @@ -448,6 +460,9 @@ async def get_extraction_run_state( "completedAt": run.completed_at.isoformat() if run.completed_at else None, "orchestratorPid": run.orchestrator_pid, } + if runtime_settings.job_runner == "openshell" and run.worker_count > 0: + payload["sandboxSlotCount"] = run.worker_count + return payload async def get_extraction_plan_summary( self, @@ -533,16 +548,25 @@ async def halt_extraction(self, *, user_id: str, kg_id: str) -> dict[str, Any]: orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) await orchestrator.halt(knowledge_graph_id=kg_id) runtime_settings = get_extraction_workload_runtime_settings() - stopped = stop_extraction_job_containers( + containers_stopped, sandboxes_stopped = stop_extraction_job_runtimes( job_ids=job_ids, container_engine=runtime_settings.container_engine, + openshell_backend=runtime_settings.backend == "openshell", ) await self._session.commit() + runtime_bits: list[str] = [] + if containers_stopped: + runtime_bits.append(f"{containers_stopped} container(s)") + if sandboxes_stopped: + runtime_bits.append(f"{sandboxes_stopped} OpenShell sandbox(es)") + runtime_detail = ( + " and ".join(runtime_bits) if runtime_bits else "no active runtime resources" + ) return { "success": True, "message": ( "Extraction halted, incomplete jobs marked failed, and " - f"{stopped} extraction container(s) stopped." + f"{runtime_detail} stopped." ), } @@ -621,6 +645,34 @@ async def reset_completed_jobs(self, *, user_id: str, kg_id: str) -> dict[str, A await self._session.commit() return {"success": True, "reset_count": reset} + async def archive_completed_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: + from extraction.application.archive_completed_extraction_jobs import ( + archive_completed_extraction_jobs, + ) + + _ = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) + runtime_settings = get_extraction_workload_runtime_settings() + result = await archive_completed_extraction_jobs( + repository=self._extraction_job_repository, + knowledge_graph_id=kg_id, + settings=runtime_settings, + ) + await self._session.commit() + archived_count = int(result.get("archived_count") or 0) + backfilled_count = int(result.get("metrics_backfilled_count") or 0) + message = f"Archived {archived_count} completed job(s)." + if backfilled_count: + message = ( + f"Archived {archived_count} completed job(s); " + f"backfilled graph write metrics for {backfilled_count}." + ) + return { + "success": True, + "archived_count": archived_count, + "metrics_backfilled_count": backfilled_count, + "message": message, + } + async def reset_failed_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: _ = await self._knowledge_graph_service.get(user_id=user_id, kg_id=kg_id) reset = await self._extraction_job_repository.reset_jobs_by_status( diff --git a/src/api/management/application/services/data_source_service.py b/src/api/management/application/services/data_source_service.py index 87d8efecf..30adc6355 100644 --- a/src/api/management/application/services/data_source_service.py +++ b/src/api/management/application/services/data_source_service.py @@ -474,8 +474,8 @@ async def refresh_commit_references( """Persist the latest tracked branch head for a Git-backed data source. Requires MANAGE permission. Updates only ``tracked_branch_head_commit``; - extraction baseline is advanced on successful sync completion or via - ``adopt_tracked_head_as_baseline``. + extraction baseline is seeded on first prepare, advanced when extraction + jobs finish or maintain sync applies mutations. """ has_manage = await self._check_permission( user_id=user_id, diff --git a/src/api/management/domain/aggregates/data_source.py b/src/api/management/domain/aggregates/data_source.py index 075ecf049..1c3888d14 100644 --- a/src/api/management/domain/aggregates/data_source.py +++ b/src/api/management/domain/aggregates/data_source.py @@ -380,6 +380,33 @@ def advance_extraction_baseline_to_tracked_head(self) -> None: if self.tracked_branch_head_commit: self.last_extraction_baseline_commit = self.tracked_branch_head_commit + def maybe_seed_extraction_baseline_from_prepare( + self, + *, + prepared_commit: str | None, + ) -> None: + """Set baseline from the first successful prepare when still unset.""" + if self._deleted: + raise AggregateDeletedError( + "Cannot update extraction baseline on a deleted data source" + ) + if self.last_extraction_baseline_commit is not None: + return + if prepared_commit: + self.last_extraction_baseline_commit = prepared_commit + + def advance_extraction_baseline_to_ingested_head(self) -> None: + """Move extraction baseline to the ingested/prepared commit on disk.""" + if self._deleted: + raise AggregateDeletedError( + "Cannot update extraction baseline on a deleted data source" + ) + from management.domain.commit_pull_state import resolve_ingested_head_commit + + commit = resolve_ingested_head_commit(self) + if commit: + self.last_extraction_baseline_commit = commit + def record_ingestion_prepared( self, *, diff --git a/src/api/management/infrastructure/extraction_baseline_updater.py b/src/api/management/infrastructure/extraction_baseline_updater.py new file mode 100644 index 000000000..18d7ab83d --- /dev/null +++ b/src/api/management/infrastructure/extraction_baseline_updater.py @@ -0,0 +1,36 @@ +"""Update data-source extraction baselines after graph extraction runs.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession + + from management.ports.repositories import IDataSourceRepository + + +async def advance_extraction_baselines_for_knowledge_graph( + *, + session: AsyncSession, + knowledge_graph_id: str, + data_source_repository: IDataSourceRepository | None = None, +) -> int: + """Advance extraction baselines for every prepared source on a knowledge graph.""" + if data_source_repository is None: + from management.infrastructure.repositories.data_source_repository import ( + DataSourceRepository, + ) + + data_source_repository = DataSourceRepository(session) + + data_sources = await data_source_repository.find_by_knowledge_graph(knowledge_graph_id) + updated = 0 + for data_source in data_sources: + before = data_source.last_extraction_baseline_commit + data_source.advance_extraction_baseline_to_ingested_head() + if data_source.last_extraction_baseline_commit == before: + continue + await data_source_repository.save(data_source) + updated += 1 + return updated diff --git a/src/api/management/infrastructure/sync_lifecycle_handler.py b/src/api/management/infrastructure/sync_lifecycle_handler.py index 97ebe2927..05950919d 100644 --- a/src/api/management/infrastructure/sync_lifecycle_handler.py +++ b/src/api/management/infrastructure/sync_lifecycle_handler.py @@ -261,4 +261,5 @@ async def _update_data_source_ingestion_prepared( prepared_commit=commit, prepared_file_count=file_count, ) + ds.maybe_seed_extraction_baseline_from_prepare(prepared_commit=commit) await self._ds_repo.save(ds) diff --git a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py index 087391ffd..d1dc5d44c 100644 --- a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py +++ b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py @@ -47,6 +47,7 @@ class ActionResponse(BaseModel): message: str | None = None generated_jobs: int | None = None reset_count: int | None = None + archived_count: int | None = None warnings: list[str] = Field(default_factory=list) @@ -380,6 +381,26 @@ async def reset_completed_jobs( return ActionResponse(success=True, reset_count=int(result.get("reset_count") or 0)) +@router.post("/knowledge-graphs/{kg_id}/extraction-jobs/archive-completed", response_model=ActionResponse) +async def archive_completed_jobs( + kg_id: str, + service: Annotated[ExtractionJobsService, Depends(get_extraction_jobs_service)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> ActionResponse: + try: + result = await service.archive_completed_jobs( + user_id=current_user.user_id.value, + kg_id=kg_id, + ) + except UnauthorizedError: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Forbidden") + return ActionResponse( + success=True, + message=result.get("message"), + archived_count=int(result.get("archived_count") or 0), + ) + + @router.post("/knowledge-graphs/{kg_id}/extraction-jobs/reset-failed", response_model=ActionResponse) async def reset_failed_jobs( kg_id: str, diff --git a/src/api/scripts/smoke-openshell-extraction-job.py b/src/api/scripts/smoke-openshell-extraction-job.py new file mode 100644 index 000000000..e83d006fe --- /dev/null +++ b/src/api/scripts/smoke-openshell-extraction-job.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +"""Smoke-test OpenShell extraction job sandbox bootstrap and claude launch.""" + +from __future__ import annotations + +import json +import shlex +import sys +import tempfile +from pathlib import Path + +from agentic_ci.harness import create_harness + +from extraction.infrastructure.extraction_job_prompt import ( + build_extraction_job_invoke_prompt, + write_extraction_prompt_file, +) +from extraction.infrastructure.extraction_job_workdir_layout import prepare_agentic_ci_workspace +from extraction.infrastructure.openshell import gateway as openshell_gateway +from extraction.infrastructure.openshell import sandbox as openshell_sandbox +from extraction.infrastructure.openshell.cli import run_openshell +from extraction.infrastructure.openshell.inference_env import insert_claude_bare_flag +from extraction.infrastructure.openshell.runtime_env import apply_openshell_cli_env +from extraction.infrastructure.openshell.vertex_provider import ensure_vertex_provider +from extraction.infrastructure.openshell_extraction_job_runner import OpenShellExtractionJobRunner +from extraction.infrastructure.workload_runtime_settings import get_extraction_workload_runtime_settings + +_AGENTIC_CI_ENV_SCRIPT = "/tmp/.agentic-ci-env.sh" + + +def main() -> int: + settings = get_extraction_workload_runtime_settings() + harness = create_harness(settings.agentic_ci_harness) + sandbox_name = "kartograph-extract-smoke-test" + work_mount = settings.openshell_container_work_mount + workdir = Path(tempfile.mkdtemp(prefix="kartograph-extract-smoke-")) + runner = OpenShellExtractionJobRunner(settings=settings) + + prepare_agentic_ci_workspace(workdir, container_run_uid=None, container_run_gid=None) + (workdir / "job-context.json").write_text( + json.dumps( + { + "api_base_url": settings.sandbox_reachable_api_base_url(), + "workload_token": "smoke", + } + ) + + "\n", + encoding="utf-8", + ) + (workdir / "sources-index.json").write_text("{}\n", encoding="utf-8") + write_extraction_prompt_file(workdir=workdir, prompt="Smoke test job.") + + print("workdir:", workdir) + print("image:", settings.openshell_extraction_sandbox_image()) + + openshell_gateway.ensure_gateway_registered( + gateway_name=settings.openshell_gateway_name, + gateway_url=settings.openshell_gateway_url, + ) + apply_openshell_cli_env(settings) + if settings.vertex_enabled(): + ensure_vertex_provider( + provider_name=settings.openshell_provider_name, + project_id=settings.vertex_project_id, + region=settings.vertex_region, + gcloud_config_mount=settings.gcloud_config_mount, + auth_mode="vertex", + model=runner._resolve_model(), + ) + openshell_sandbox.delete_sandbox(sandbox_name) + try: + openshell_sandbox.create_sandbox( + name=sandbox_name, + image=settings.openshell_extraction_sandbox_image(), + provider_name=settings.openshell_provider_name, + ) + write_extraction_prompt_file(workdir=workdir, prompt="Smoke test job.") + openshell_sandbox.upload_directory_contents( + sandbox_name=sandbox_name, + local_dir=str(workdir), + dest=work_mount, + ) + openshell_sandbox.apply_policy( + sandbox_name=sandbox_name, + workload="extraction_job", + policy_dir=settings.openshell_policy_dir or None, + api_host="host.docker.internal:8000", + vertex_region=settings.vertex_region if settings.vertex_enabled() else None, + policy_enforcement=settings.openshell_policy_enforcement, + ) + + version = run_openshell( + [ + "sandbox", + "exec", + "--name", + sandbox_name, + "--no-tty", + "--", + "/usr/local/bin/claude", + "--version", + ], + timeout=60.0, + ) + print("claude_version_rc:", version.returncode) + print("claude_version:", (version.stdout or version.stderr or "").strip()[:200]) + if version.returncode != 0: + return 1 + + listing = run_openshell( + [ + "sandbox", + "exec", + "--name", + sandbox_name, + "--no-tty", + "--", + "bash", + "-c", + f"ls -la {shlex.quote(work_mount)}", + ], + timeout=60.0, + ) + print("sandbox_listing:\n", listing.stdout) + if "extraction_prompt.md" not in (listing.stdout or ""): + print("FAIL: extraction_prompt.md missing in sandbox") + return 1 + + model = runner._resolve_model() + invoke_prompt = build_extraction_job_invoke_prompt(workspace_dir=work_mount) + runner._write_env_script_in_sandbox( + sandbox_name=sandbox_name, + model=model, + otel_port=4318, + otel_rate_file=None, + ) + agent_args = harness.build_args( + "Reply with exactly the word OK and nothing else.", + model, + ) + if settings.vertex_enabled() and harness.auth_mode == "vertex": + agent_args = insert_claude_bare_flag(agent_args) + shell_cmd = [ + "bash", + "-c", + f". {_AGENTIC_CI_ENV_SCRIPT} && cd {work_mount} && exec \"$@\"", + "--", + *agent_args, + ] + print("running_short_claude_invoke...") + res = run_openshell( + ["sandbox", "exec", "--name", sandbox_name, "--no-tty", "--", *shell_cmd], + timeout=180.0, + check=False, + ) + print("claude_invoke_rc:", res.returncode) + combined = ((res.stdout or "") + (res.stderr or "")).strip() + if combined: + print("claude_output:", combined[:3000]) + if res.returncode != 0: + return 1 + finally: + openshell_sandbox.delete_sandbox(sandbox_name) + + print("SMOKE_TEST_PASSED") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/api/tests/unit/extraction/application/test_agent_session_service.py b/src/api/tests/unit/extraction/application/test_agent_session_service.py index 763546682..f04d8f1af 100644 --- a/src/api/tests/unit/extraction/application/test_agent_session_service.py +++ b/src/api/tests/unit/extraction/application/test_agent_session_service.py @@ -420,6 +420,66 @@ async def test_select_bootstrap_intake_path_persists_choice_for_continuity(self) assert updated.id == session.id +class _InactiveStickyRuntimeManager(InMemoryStickySessionRuntimeManager): + def is_runtime_active(self, **kwargs) -> bool: + return False + + +@pytest.mark.asyncio +class TestOrphanedStickySessionReconciliation: + async def test_get_active_session_archives_session_when_runtime_is_gone(self): + repo = _InMemoryAgentSessionRepository() + runtime = _InactiveStickyRuntimeManager() + service = ExtractionAgentSessionService( + repository=repo, + sticky_runtime_manager=runtime, + ) + session = await service.start_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + ) + session.runtime_context["sticky_runtime"] = { + "phase": "ready", + "status": "active", + "container_id": "kartograph-gma-deadbeef", + } + await repo.save(session) + + active = await service.get_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + ) + + assert active is None + stored = await repo.get_by_id(session.id) + assert stored is not None + assert stored.archived_at is not None + + async def test_get_active_session_keeps_session_without_runtime_attempt(self): + repo = _InMemoryAgentSessionRepository() + runtime = _InactiveStickyRuntimeManager() + service = ExtractionAgentSessionService( + repository=repo, + sticky_runtime_manager=runtime, + ) + session = await service.start_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + ) + + active = await service.get_active_session( + user_id="user-1", + knowledge_graph_id="kg-1", + ui_mode=GraphManagementUiMode.EXTRACTION_JOBS, + ) + + assert active is not None + assert active.id == session.id + + def test_resolve_backend_session_mode_maps_ui_modes() -> None: assert resolve_backend_session_mode(GraphManagementUiMode.INITIAL_SCHEMA_DESIGN) == ( ExtractionSessionMode.SCHEMA_BOOTSTRAP diff --git a/src/api/tests/unit/extraction/application/test_archive_completed_extraction_jobs.py b/src/api/tests/unit/extraction/application/test_archive_completed_extraction_jobs.py new file mode 100644 index 000000000..88afdb9e5 --- /dev/null +++ b/src/api/tests/unit/extraction/application/test_archive_completed_extraction_jobs.py @@ -0,0 +1,128 @@ +"""Unit tests for promoting completed extraction jobs into archived history.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from extraction.application.archive_completed_extraction_jobs import ( + archive_completed_extraction_jobs, + backfill_archival_metrics, +) +from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings + + +def _completed_job(*, job_id: str = "job-1") -> ExtractionJobRecord: + return ExtractionJobRecord( + id="01JOB", + knowledge_graph_id="kg-1", + job_id=job_id, + job_set_name="adapters", + strategy="per_instance", + status=ExtractionJobStatus.COMPLETED, + order_index=0, + description="test job", + ) + + +def test_backfill_archival_metrics_reads_jsonl_and_result_json(tmp_path: Path) -> None: + job = _completed_job() + mutations = tmp_path / "mutations" + mutations.mkdir() + (mutations / "batch.jsonl").write_text( + json.dumps( + { + "op": "UPDATE", + "type": "node", + "id": "adapter:1", + "label": "Adapter", + "set_properties": {"description": "updated"}, + } + ) + + "\n", + encoding="utf-8", + ) + (mutations / "result.json").write_text( + json.dumps({"action": "apply", "applied": True, "operations_applied": 3}), + encoding="utf-8", + ) + + metrics = backfill_archival_metrics(job, workdir=tmp_path) + + assert metrics["entities_modified"] == 1 + assert metrics["write_ops"] == 1 + assert metrics["applied_mutations_jsonl"] + + +def test_backfill_archival_metrics_falls_back_to_operations_applied(tmp_path: Path) -> None: + job = _completed_job() + mutations = tmp_path / "mutations" + mutations.mkdir() + (mutations / "result.json").write_text( + json.dumps({"action": "apply", "applied": True, "operations_applied": 3}), + encoding="utf-8", + ) + + metrics = backfill_archival_metrics(job, workdir=tmp_path) + + assert metrics["entities_modified"] == 3 + assert metrics["write_ops"] == 3 + + +class _FakeRepository: + def __init__(self, jobs: list[ExtractionJobRecord]) -> None: + self._jobs = list(jobs) + self.promoted: list[tuple[str, dict]] = [] + + async def list_jobs_by_status( + self, + *, + knowledge_graph_id: str, + status: ExtractionJobStatus, + limit: int = 10_000, + ) -> list[ExtractionJobRecord]: + assert knowledge_graph_id == "kg-1" + assert status == ExtractionJobStatus.COMPLETED + return list(self._jobs) + + async def promote_completed_job_to_archived( + self, + *, + knowledge_graph_id: str, + job_id: str, + metrics: dict, + ) -> bool: + self.promoted.append((job_id, metrics)) + return True + + +@pytest.mark.asyncio +async def test_archive_completed_extraction_jobs_promotes_all_completed(tmp_path: Path) -> None: + job = _completed_job() + work_root = tmp_path / "kg-1" / job.job_id + mutations = work_root / "mutations" + mutations.mkdir(parents=True) + (mutations / "result.json").write_text( + json.dumps({"action": "apply", "applied": True, "operations_applied": 2}), + encoding="utf-8", + ) + repo = _FakeRepository([job]) + settings = ExtractionWorkloadRuntimeSettings( + backend="openshell", + container_engine="docker", + extraction_job_work_dir=str(tmp_path), + ) + + result = await archive_completed_extraction_jobs( + repository=repo, + knowledge_graph_id="kg-1", + settings=settings, + ) + + assert result["archived_count"] == 1 + assert result["metrics_backfilled_count"] == 1 + assert repo.promoted[0][0] == "job-1" + assert repo.promoted[0][1]["write_ops"] == 2 diff --git a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py index 623a238c9..a22337035 100644 --- a/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py +++ b/src/api/tests/unit/extraction/application/test_schema_authoring_guide.py @@ -33,3 +33,6 @@ def test_authoring_guide_documents_bootstrap_and_modeling_guidance() -> None: assert "duplicate labels are rejected" in SCHEMA_AUTHORING_GUIDE assert "tests_ct_api" in SCHEMA_AUTHORING_GUIDE assert "eight primary" in SCHEMA_AUTHORING_GUIDE + assert "## Batch extraction jobs" in SCHEMA_AUTHORING_GUIDE + assert "workload-graph-read.sh" in SCHEMA_AUTHORING_GUIDE + assert "Partial UPDATE" in SCHEMA_AUTHORING_GUIDE diff --git a/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py b/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py index b4c28dc43..36eb36d51 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py +++ b/src/api/tests/unit/extraction/infrastructure/test_agentic_ci_extraction_job_runner.py @@ -45,6 +45,7 @@ def test_extraction_job_invoke_prompt_references_materialized_file() -> None: assert "extraction_prompt.md" in EXTRACTION_JOB_INVOKE_PROMPT assert "job-context.json" in EXTRACTION_JOB_INVOKE_PROMPT assert "helpers/workload-mutations.sh" in EXTRACTION_JOB_INVOKE_PROMPT + assert "helpers/workload-graph-read.sh" in EXTRACTION_JOB_INVOKE_PROMPT assert "mutations/result.json" in EXTRACTION_JOB_INVOKE_PROMPT diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_mutation_metrics.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_mutation_metrics.py index f8f7f32fc..df40c8031 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_mutation_metrics.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_mutation_metrics.py @@ -8,6 +8,7 @@ from extraction.infrastructure.extraction_job_mutation_metrics import ( metrics_from_mutation_jsonl, metrics_from_mutation_workdir, + reconcile_mutation_metrics, ) @@ -81,3 +82,42 @@ def test_metrics_from_mutation_workdir_reads_latest_jsonl(tmp_path: Path) -> Non assert metrics["relationships_modified"] == 1 assert metrics["write_ops"] == 1 + + +def test_reconcile_mutation_metrics_prefers_workdir_jsonl(tmp_path: Path) -> None: + mutations = tmp_path / "mutations" + mutations.mkdir() + (mutations / "batch.jsonl").write_text( + json.dumps( + { + "op": "UPDATE", + "type": "node", + "id": "adapter:1", + "label": "Adapter", + "set_properties": {"description": "updated"}, + } + ) + + "\n", + encoding="utf-8", + ) + + metrics = reconcile_mutation_metrics( + {"write_ops": 0}, + workdir=tmp_path, + operations_applied=5, + ) + + assert metrics["entities_modified"] == 1 + assert metrics["write_ops"] == 1 + assert metrics["applied_mutations_jsonl"] + + +def test_reconcile_mutation_metrics_falls_back_to_operations_applied(tmp_path: Path) -> None: + metrics = reconcile_mutation_metrics( + {"write_ops": 0}, + workdir=tmp_path, + operations_applied=3, + ) + + assert metrics["entities_modified"] == 3 + assert metrics["write_ops"] == 3 diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py index 68759a926..6afa5c161 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_prompt.py @@ -42,6 +42,10 @@ def test_build_extraction_job_prompt_includes_instances_and_files() -> None: assert "mutation-examples.jsonl" in prompt assert "properties_missing" in prompt assert "paths_not_found" in prompt + assert "workload-graph-read.sh" in prompt + assert "token-efficient" in prompt.lower() or "token-efficient" in prompt + assert "set_properties" in prompt + assert "properties you omit are preserved" in prompt def test_build_extraction_job_prompt_mentions_graph_id_in_job_context() -> None: diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_runtime_stop.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_runtime_stop.py new file mode 100644 index 000000000..732f18cb9 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_runtime_stop.py @@ -0,0 +1,44 @@ +"""Unit tests for extraction job runtime teardown (containers + OpenShell sandboxes).""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from extraction.infrastructure.extraction_job_container import stop_extraction_job_runtimes + + +@patch("extraction.infrastructure.extraction_job_container.stop_extraction_job_sandboxes") +@patch("extraction.infrastructure.extraction_job_container.stop_extraction_job_containers") +def test_stop_extraction_job_runtimes_stops_sandboxes_on_openshell_backend( + mock_stop_containers: MagicMock, + mock_stop_sandboxes: MagicMock, +) -> None: + mock_stop_containers.return_value = 0 + mock_stop_sandboxes.return_value = 3 + + containers, sandboxes = stop_extraction_job_runtimes( + job_ids=("job-a", "job-b"), + openshell_backend=True, + ) + + assert containers == 0 + assert sandboxes == 3 + mock_stop_sandboxes.assert_called_once_with(job_ids=("job-a", "job-b"), sweep_orphans=True) + + +@patch("extraction.infrastructure.extraction_job_container.stop_extraction_job_sandboxes") +@patch("extraction.infrastructure.extraction_job_container.stop_extraction_job_containers") +def test_stop_extraction_job_runtimes_skips_sandboxes_for_container_backend( + mock_stop_containers: MagicMock, + mock_stop_sandboxes: MagicMock, +) -> None: + mock_stop_containers.return_value = 2 + + containers, sandboxes = stop_extraction_job_runtimes( + job_ids=("job-a",), + openshell_backend=False, + ) + + assert containers == 2 + assert sandboxes == 0 + mock_stop_sandboxes.assert_not_called() diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py index 57ecfb553..7e6539360 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_workdir_materializer.py @@ -133,6 +133,7 @@ async def test_prepare_materializes_instance_referenced_paths_and_workspace_layo assert repo_file.is_file() assert (job_root / "mutations").is_dir() assert (job_root / "helpers" / "workload-mutations.sh").is_file() + assert (job_root / "helpers" / "workload-graph-read.sh").is_file() assert (job_root / "helpers" / "mutation-examples.jsonl").is_file() assert (job_root / "helpers" / "sync_instances.py").is_file() context = json.loads((job_root / "job-context.json").read_text(encoding="utf-8")) diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py new file mode 100644 index 000000000..ef3aad249 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py @@ -0,0 +1,56 @@ +"""Tests for extraction run orchestrator baseline updates.""" + +from __future__ import annotations + +from contextlib import asynccontextmanager +from unittest.mock import AsyncMock, patch + +import pytest + +from extraction.domain.extraction_job import ExtractionRunStatus +from extraction.infrastructure.extraction_run_orchestrator import ( + ExtractionRunOrchestrator, + _OrchestratorState, +) + + +@pytest.mark.asyncio +async def test_maybe_finish_run_advances_extraction_baselines_for_kg() -> None: + session = AsyncMock() + session.commit = AsyncMock() + + @asynccontextmanager + async def session_context(): + yield session + + session_factory = lambda: session_context() + + orchestrator = ExtractionRunOrchestrator(session_factory=session_factory) + state = _OrchestratorState( + knowledge_graph_id="kg-001", + tenant_id="tenant-001", + worker_count=2, + ) + + repo = AsyncMock() + repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} + + with ( + patch( + "extraction.infrastructure.extraction_run_orchestrator.ExtractionJobRepository", + return_value=repo, + ), + patch( + "extraction.infrastructure.extraction_run_orchestrator.advance_extraction_baselines_for_knowledge_graph", + new_callable=AsyncMock, + ) as advance_baselines, + ): + await orchestrator._maybe_finish_run(state) + + repo.upsert_run.assert_awaited_once() + assert repo.upsert_run.await_args.kwargs["status"] == ExtractionRunStatus.IDLE + advance_baselines.assert_awaited_once_with( + session=session, + knowledge_graph_id="kg-001", + ) + session.commit.assert_awaited_once() diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_job_runner.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_job_runner.py new file mode 100644 index 000000000..708502703 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_job_runner.py @@ -0,0 +1,130 @@ +"""Unit tests for OpenShell extraction job runner sandbox wiring.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import patch + +from extraction.infrastructure.openshell.cli import OpenShellCliError +from extraction.infrastructure.openshell_extraction_job_runner import OpenShellExtractionJobRunner +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings + + +def test_openshell_extraction_sandbox_image_uses_agentic_ci_claude_sandbox() -> None: + settings = ExtractionWorkloadRuntimeSettings( + sticky_image="kartograph-agent-runtime:dev", + openshell_extraction_image="quay.io/aipcc/agentic-ci/claude-sandbox:latest", + agentic_ci_image="ghcr.io/opendatahub-io/ai-helpers:latest", + ) + + assert ( + settings.openshell_extraction_sandbox_image() + == "quay.io/aipcc/agentic-ci/claude-sandbox:latest" + ) + assert settings.openshell_extraction_sandbox_image() != settings.sticky_image + assert settings.openshell_extraction_sandbox_image() != settings.agentic_ci_image + + +def test_run_agent_uses_harness_claude_binary() -> None: + runner = OpenShellExtractionJobRunner() + + command = runner._harness.build_args("Extract entities.", "claude-opus-4-6") + + assert command[0] == "claude" + assert "-p" in command + + +def test_build_extraction_job_invoke_prompt_uses_openshell_workspace() -> None: + from extraction.infrastructure.extraction_job_prompt import build_extraction_job_invoke_prompt + + prompt = build_extraction_job_invoke_prompt(workspace_dir="/sandbox") + + assert "in /sandbox." in prompt + assert "/workspace" not in prompt + + +def test_extraction_provider_defaults_to_kartograph_gma() -> None: + settings = ExtractionWorkloadRuntimeSettings() + + assert settings.openshell_provider_name == "kartograph-gma" + + +def test_run_agent_uses_inference_local_bare_for_vertex(monkeypatch) -> None: + monkeypatch.setenv("CLAUDE_CODE_USE_VERTEX", "1") + monkeypatch.setenv("ANTHROPIC_VERTEX_PROJECT_ID", "my-project") + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + runner = OpenShellExtractionJobRunner() + + agent_args = runner._harness.build_args("Extract entities.", "claude-opus-4-6") + from extraction.infrastructure.openshell.inference_env import insert_claude_bare_flag + + if runner._settings.vertex_enabled() and runner._harness.auth_mode == "vertex": + agent_args = insert_claude_bare_flag(agent_args) + + assert agent_args[0] == "claude" + assert agent_args[1] == "--bare" + + +def test_sync_mutation_artifacts_downloads_mutations_directory(tmp_path: Path) -> None: + runner = OpenShellExtractionJobRunner() + workdir = tmp_path / "job" + + with patch( + "extraction.infrastructure.openshell_extraction_job_runner.openshell_sandbox.download_directory_contents", + ) as download_dir: + runner._sync_mutation_artifacts_from_sandbox( + sandbox_name="kartograph-extract-job-1", + workdir=workdir, + work_mount="/sandbox", + ) + + download_dir.assert_called_once_with( + sandbox_name="kartograph-extract-job-1", + remote_dir="/sandbox/mutations", + local_dir=workdir, + ) + + +def test_sync_mutation_artifacts_falls_back_to_result_json(tmp_path: Path) -> None: + runner = OpenShellExtractionJobRunner() + workdir = tmp_path / "job" + + with patch( + "extraction.infrastructure.openshell_extraction_job_runner.openshell_sandbox.download_directory_contents", + side_effect=OpenShellCliError("sandbox missing"), + ), patch( + "extraction.infrastructure.openshell_extraction_job_runner.openshell_sandbox.download_path", + ) as download_file: + runner._sync_mutation_artifacts_from_sandbox( + sandbox_name="kartograph-extract-job-1", + workdir=workdir, + work_mount="/sandbox", + ) + + download_file.assert_called_once_with( + sandbox_name="kartograph-extract-job-1", + sandbox_path="/sandbox/mutations/result.json", + local_path=str(workdir / "mutations" / "result.json"), + ) + + +def test_sync_mutation_artifacts_skips_fallback_when_result_exists(tmp_path: Path) -> None: + runner = OpenShellExtractionJobRunner() + workdir = tmp_path / "job" + result = workdir / "mutations" / "result.json" + result.parent.mkdir(parents=True) + result.write_text('{"action":"apply","applied":true,"operations_applied":1,"errors":[]}\n') + + with patch( + "extraction.infrastructure.openshell_extraction_job_runner.openshell_sandbox.download_directory_contents", + side_effect=OpenShellCliError("sandbox missing"), + ), patch( + "extraction.infrastructure.openshell_extraction_job_runner.openshell_sandbox.download_path", + ) as download_file: + runner._sync_mutation_artifacts_from_sandbox( + sandbox_name="kartograph-extract-job-1", + workdir=workdir, + work_mount="/sandbox", + ) + + download_file.assert_not_called() diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_sandbox_pool.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_sandbox_pool.py new file mode 100644 index 000000000..6e0970517 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_sandbox_pool.py @@ -0,0 +1,43 @@ +"""Unit tests for OpenShell per-worker extraction sandboxes.""" + +from __future__ import annotations + +from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus +from extraction.infrastructure.openshell.extraction_sandbox_pool import ( + resolve_extraction_sandbox_assignment, + worker_index, +) +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings + + +def _job(*, job_id: str = "job-a", worker_id: str | None = "worker-03") -> ExtractionJobRecord: + return ExtractionJobRecord( + id="01JOB", + knowledge_graph_id="01KG1234567890", + job_id=job_id, + job_set_name="adapters", + strategy="by_files", + status=ExtractionJobStatus.IN_PROGRESS, + order_index=0, + description="test", + worker_id=worker_id, + ) + + +def test_worker_index_parses_worker_ids() -> None: + assert worker_index("worker-01") == 1 + assert worker_index("worker-12") == 12 + assert worker_index(None) == 1 + + +def test_resolve_assignment_uses_one_sandbox_per_worker() -> None: + settings = ExtractionWorkloadRuntimeSettings(job_runner="openshell") + + worker_03 = resolve_extraction_sandbox_assignment(_job(worker_id="worker-03"), settings) + worker_07 = resolve_extraction_sandbox_assignment(_job(job_id="job-b", worker_id="worker-07"), settings) + + assert worker_03.reuse is True + assert worker_03.slot == 3 + assert worker_03.sandbox_name.endswith("-w03") + assert worker_07.sandbox_name.endswith("-w07") + assert worker_03.sandbox_name != worker_07.sandbox_name diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_gateway.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_gateway.py new file mode 100644 index 000000000..c2f363541 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_gateway.py @@ -0,0 +1,147 @@ +"""Unit tests for OpenShell gateway helpers.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from extraction.infrastructure.openshell.cli import OpenShellCliError, openshell_subprocess_env +from extraction.infrastructure.openshell.gateway import ( + ensure_gateway_registered, + gateway_is_connected, + gateway_is_registered, +) + + +class TestOpenShellSubprocessEnv: + def test_maps_kartograph_openshell_settings_to_cli_env(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("XDG_CONFIG_HOME", raising=False) + monkeypatch.delenv("OPENSHELL_GATEWAY", raising=False) + monkeypatch.delenv("OPENSHELL_GATEWAY_ENDPOINT", raising=False) + monkeypatch.setenv("KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_XDG_CONFIG_HOME", "/root/.config") + monkeypatch.setenv("KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_GATEWAY_NAME", "openshell") + monkeypatch.setenv( + "KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_GATEWAY_URL", + "https://host.docker.internal:17670", + ) + + env = openshell_subprocess_env() + + assert env["XDG_CONFIG_HOME"] == "/root/.config" + assert env["OPENSHELL_GATEWAY"] == "openshell" + assert env["OPENSHELL_GATEWAY_ENDPOINT"] == "https://host.docker.internal:17670" + + +class TestGatewayIsRegistered: + def test_returns_true_when_gateway_get_succeeds(self) -> None: + with patch( + "extraction.infrastructure.openshell.gateway.run_openshell", + return_value=MagicMock(returncode=0), + ) as run: + assert gateway_is_registered(gateway_name="openshell") is True + run.assert_called_once_with( + ["gateway", "--gateway", "openshell", "info"], + check=False, + ) + + def test_returns_false_when_gateway_get_fails(self) -> None: + with patch( + "extraction.infrastructure.openshell.gateway.run_openshell", + return_value=MagicMock(returncode=1), + ): + assert gateway_is_registered(gateway_name="openshell") is False + + +class TestGatewayIsConnected: + def test_returns_true_when_status_shows_connected(self) -> None: + with patch( + "extraction.infrastructure.openshell.gateway.run_openshell", + return_value=MagicMock(returncode=0, stdout="Connected", stderr=""), + ): + assert gateway_is_connected() is True + + def test_returns_false_when_no_gateway_configured(self) -> None: + with patch( + "extraction.infrastructure.openshell.gateway.run_openshell", + return_value=MagicMock( + returncode=1, + stdout="", + stderr="No gateway configured", + ), + ): + assert gateway_is_connected() is False + + +class TestEnsureGatewayRegistered: + def test_skips_gateway_add_when_registered_and_connected(self) -> None: + with patch( + "extraction.infrastructure.openshell.gateway.gateway_is_registered", + return_value=True, + ), patch( + "extraction.infrastructure.openshell.gateway.gateway_is_connected", + return_value=True, + ), patch( + "extraction.infrastructure.openshell.gateway.run_openshell" + ) as run: + ensure_gateway_registered( + gateway_name="openshell", + gateway_url="https://host.docker.internal:17670", + ) + run.assert_not_called() + + def test_raises_when_registered_but_unreachable(self) -> None: + with patch( + "extraction.infrastructure.openshell.gateway.gateway_is_registered", + return_value=True, + ), patch( + "extraction.infrastructure.openshell.gateway.gateway_is_connected", + return_value=False, + ): + with pytest.raises(OpenShellCliError, match="not reachable"): + ensure_gateway_registered( + gateway_name="openshell", + gateway_url="https://host.docker.internal:17670", + ) + + def test_skips_gateway_add_when_connected_without_registration(self) -> None: + with patch( + "extraction.infrastructure.openshell.gateway.gateway_is_registered", + return_value=False, + ), patch( + "extraction.infrastructure.openshell.gateway.gateway_is_connected", + return_value=True, + ), patch( + "extraction.infrastructure.openshell.gateway.run_openshell" + ) as run: + ensure_gateway_registered( + gateway_name="openshell", + gateway_url="https://127.0.0.1:17670", + ) + run.assert_not_called() + + def test_registers_gateway_when_not_configured(self) -> None: + with patch( + "extraction.infrastructure.openshell.gateway.gateway_is_registered", + return_value=False, + ), patch( + "extraction.infrastructure.openshell.gateway.gateway_is_connected", + return_value=False, + ), patch( + "extraction.infrastructure.openshell.gateway.run_openshell" + ) as run: + ensure_gateway_registered( + gateway_name="openshell", + gateway_url="https://127.0.0.1:17670", + ) + run.assert_called_once_with( + [ + "gateway", + "add", + "https://127.0.0.1:17670", + "--local", + "--name", + "openshell", + ], + timeout=30.0, + ) diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_inference_env.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_inference_env.py new file mode 100644 index 000000000..e8445f17e --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_inference_env.py @@ -0,0 +1,40 @@ +"""Unit tests for OpenShell inference.local sandbox env.""" + +from __future__ import annotations + +from extraction.infrastructure.openshell.inference_env import ( + build_openshell_inference_env_script_lines, + insert_claude_bare_flag, + insert_vertex_compatible_effort, +) + + +def test_inference_env_script_uses_inference_local_not_vertex_adc() -> None: + lines = build_openshell_inference_env_script_lines() + + joined = "\n".join(lines) + assert "ANTHROPIC_BASE_URL=https://inference.local" in joined + assert "ANTHROPIC_API_KEY=unused" in joined + assert "CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1" in joined + assert "KARTOGRAPH_WORKSPACE=/sandbox" in joined + assert "CLAUDE_CODE_USE_VERTEX" not in joined + + +def test_insert_claude_bare_flag_adds_bare_after_binary() -> None: + args = insert_claude_bare_flag(["claude", "--model", "claude-opus-4-6", "-p", "hi"]) + + assert args[:3] == ["claude", "--bare", "--model"] + + +def test_insert_claude_bare_flag_is_idempotent() -> None: + args = ["claude", "--bare", "-p", "hi"] + + assert insert_claude_bare_flag(args) == args + + +def test_insert_vertex_compatible_effort_adds_high_after_bare() -> None: + args = insert_vertex_compatible_effort( + insert_claude_bare_flag(["claude", "--model", "claude-opus-4-6", "-p", "hi"]), + ) + + assert args[:5] == ["claude", "--bare", "--effort", "high", "--model"] diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_policy.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_policy.py index ddbe1e2d6..15bed7e9b 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_openshell_policy.py +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_policy.py @@ -4,6 +4,7 @@ from extraction.infrastructure.openshell.policy import ( bundled_policy_dir, + regional_vertex_ai_endpoint, resolve_endpoints, resolve_enforcement, resolve_l7_paths, @@ -30,6 +31,29 @@ def test_resolve_enforcement_from_bundled_policy() -> None: assert enforcement in {"soft", "hard_requirement"} +def test_resolve_endpoints_includes_vertex_oauth_for_gma_modes() -> None: + endpoints = resolve_endpoints(ui_mode="extraction-jobs") + assert "oauth2.googleapis.com:443:read-write" in endpoints + assert "aiplatform.googleapis.com:443:read-write" in endpoints + + +def test_resolve_endpoints_adds_regional_vertex_hostname() -> None: + endpoints = resolve_endpoints( + ui_mode="extraction-jobs", + api_host="host.docker.internal:8000", + vertex_region="us-east5", + ) + assert "us-east5-aiplatform.googleapis.com:443:read-write" in endpoints + assert "host.docker.internal:8000:read-write" in endpoints + + +def test_regional_vertex_ai_endpoint() -> None: + assert ( + regional_vertex_ai_endpoint(vertex_region="us-east5") + == "us-east5-aiplatform.googleapis.com:443:read-write" + ) + + def test_resolve_l7_paths_for_extraction_jobs_mode() -> None: paths = resolve_l7_paths(ui_mode="extraction-jobs") assert any("jobs" in path for path in paths) diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_sandbox.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_sandbox.py new file mode 100644 index 000000000..1838804f8 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_sandbox.py @@ -0,0 +1,323 @@ +"""Unit tests for OpenShell sandbox helpers.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from extraction.infrastructure.openshell.cli import OpenShellCliError +from extraction.infrastructure.openshell.sandbox import ( + create_sandbox, + delete_sandboxes_by_prefix, + download_directory_contents, + download_path, + sandbox_phase, + start_forward, + stop_extraction_job_sandbox, + upload_directory_contents, + upload_path, +) + + +class TestSandboxPhase: + def test_returns_phase_for_named_sandbox(self) -> None: + with patch( + "extraction.infrastructure.openshell.sandbox.run_openshell", + return_value=MagicMock( + returncode=0, + stdout='[{"name":"sb-1","phase":"Ready"}]', + ), + ): + assert sandbox_phase("sb-1") == "Ready" + + def test_returns_none_when_sandbox_missing(self) -> None: + with patch( + "extraction.infrastructure.openshell.sandbox.run_openshell", + return_value=MagicMock(returncode=0, stdout="[]"), + ): + assert sandbox_phase("missing") is None + + +class TestCreateSandbox: + def test_waits_for_ready_then_terminates_create_process(self) -> None: + proc = MagicMock() + proc.poll.return_value = None + with patch( + "extraction.infrastructure.openshell.sandbox.popen_openshell", + return_value=proc, + ), patch( + "extraction.infrastructure.openshell.sandbox._wait_for_sandbox_ready", + ) as wait_ready, patch( + "extraction.infrastructure.openshell.sandbox._terminate_create_process", + ) as terminate: + create_sandbox( + name="sb-1", + image="kartograph-agent-runtime:dev", + provider_name="kartograph-gma", + ) + wait_ready.assert_called_once_with(name="sb-1", timeout=300.0) + terminate.assert_called_once_with(proc) + + def test_raises_when_sandbox_enters_error_phase(self) -> None: + with patch( + "extraction.infrastructure.openshell.sandbox.popen_openshell", + return_value=MagicMock(poll=MagicMock(return_value=None)), + ), patch( + "extraction.infrastructure.openshell.sandbox.sandbox_phase", + side_effect=[None, "Error"], + ), patch( + "extraction.infrastructure.openshell.sandbox.run_openshell", + return_value=MagicMock(returncode=0, stdout="phase=Error", stderr=""), + ), patch( + "extraction.infrastructure.openshell.sandbox.time.sleep", + ): + with pytest.raises(OpenShellCliError, match="entered Error"): + create_sandbox(name="sb-1", image="img:dev") + + +class TestUploadPath: + def test_passes_dest_as_positional_argument(self) -> None: + with patch( + "extraction.infrastructure.openshell.sandbox.run_openshell", + ) as run: + upload_path( + sandbox_name="sb-1", + local_path="/tmp/work", + dest="/workspace", + ) + run.assert_called_once_with( + [ + "sandbox", + "upload", + "--no-git-ignore", + "sb-1", + "/tmp/work", + "/workspace", + ], + timeout=600.0, + ) + + +class TestDownloadPath: + def test_downloads_into_parent_directory(self, tmp_path) -> None: + local_file = tmp_path / "mutations" / "result.json" + + def _simulate_openshell_download(args, **kwargs) -> None: + # OpenShell writes basename(remote path) into the destination directory. + local_file.parent.mkdir(parents=True, exist_ok=True) + (local_file.parent / "result.json").write_text("{}", encoding="utf-8") + + with patch( + "extraction.infrastructure.openshell.sandbox.run_openshell", + side_effect=_simulate_openshell_download, + ) as run_mock: + download_path( + sandbox_name="sb-1", + sandbox_path="/sandbox/mutations/result.json", + local_path=str(local_file), + ) + + assert local_file.is_file() + run_mock.assert_called_once_with( + [ + "sandbox", + "download", + "sb-1", + "/sandbox/mutations/result.json", + str(local_file.parent), + ], + timeout=120.0, + ) + + def test_renames_downloaded_tar_to_requested_local_path(self, tmp_path) -> None: + local_tar = tmp_path / "nested" / "archive.tar" + + def _simulate_openshell_download(args, **kwargs) -> None: + local_tar.parent.mkdir(parents=True, exist_ok=True) + (local_tar.parent / "kartograph-download-sb-1.tar").write_bytes(b"tar-bytes") + + with patch( + "extraction.infrastructure.openshell.sandbox.run_openshell", + side_effect=_simulate_openshell_download, + ): + download_path( + sandbox_name="sb-1", + sandbox_path="/tmp/kartograph-download-sb-1.tar", + local_path=str(local_tar), + ) + + assert local_tar.read_bytes() == b"tar-bytes" + + +class TestDownloadDirectoryContents: + def test_tars_remote_dir_downloads_and_extracts(self, tmp_path) -> None: + workdir = tmp_path / "job" + workdir.mkdir() + with patch( + "extraction.infrastructure.openshell.sandbox.run_openshell", + ) as run_mock, patch( + "extraction.infrastructure.openshell.sandbox.download_path", + ) as download_mock, patch( + "extraction.infrastructure.openshell.sandbox.tarfile.open", + ) as tar_open: + download_directory_contents( + sandbox_name="sb-1", + remote_dir="/sandbox/mutations", + local_dir=workdir, + ) + + tar_cmd = run_mock.call_args_list[0].args[0] + assert "tar -cf" in tar_cmd[-1] + assert "/sandbox/mutations" in tar_cmd[-1] + download_mock.assert_called_once() + assert download_mock.call_args.kwargs["sandbox_path"].startswith("/tmp/kartograph-download-") + tar_open.assert_called_once() + tar_open.return_value.__enter__.return_value.extractall.assert_called_once_with(workdir) + + +class TestExtractionSandboxCleanup: + def test_stop_extraction_job_sandbox_deletes_existing_sandbox(self) -> None: + with patch( + "extraction.infrastructure.openshell.sandbox.sandbox_exists", + return_value=True, + ), patch( + "extraction.infrastructure.openshell.sandbox.delete_sandbox", + ) as delete: + assert stop_extraction_job_sandbox(job_id="job-a") is True + delete.assert_called_once() + + def test_delete_sandboxes_by_prefix(self) -> None: + with patch( + "extraction.infrastructure.openshell.sandbox.list_sandbox_names", + return_value=[ + "kartograph-extract-job-a", + "kartograph-gma-session-1", + "kartograph-extract-job-b", + ], + ), patch( + "extraction.infrastructure.openshell.sandbox.delete_sandbox", + ) as delete: + deleted = delete_sandboxes_by_prefix("kartograph-extract-") + + assert deleted == 2 + assert delete.call_count == 2 + + +class TestUploadDirectoryContents: + def test_uploads_tar_and_extracts_into_dest(self, tmp_path) -> None: + workdir = tmp_path / "work" + workdir.mkdir() + (workdir / "helpers").mkdir() + (workdir / "helpers" / "sync.py").write_text("print('ok')", encoding="utf-8") + + with patch( + "extraction.infrastructure.openshell.sandbox.upload_path", + ) as upload_path_mock, patch( + "extraction.infrastructure.openshell.sandbox.run_openshell", + ) as run_mock: + upload_directory_contents( + sandbox_name="sb-1", + local_dir=str(workdir), + dest="/sandbox", + ) + + upload_path_mock.assert_called_once() + uploaded_tar = upload_path_mock.call_args.kwargs["local_path"] + assert uploaded_tar.endswith(".tar") + assert upload_path_mock.call_args.kwargs["dest"].startswith("/tmp/kartograph-upload-sb-1") + + run_mock.assert_called_once() + exec_args = run_mock.call_args.args[0] + assert exec_args[:4] == ["sandbox", "exec", "--name", "sb-1"] + extract_cmd = exec_args[-1] + assert "mkdir -p /sandbox" in extract_cmd + assert "tar -xf" in extract_cmd + assert "-C /sandbox" in extract_cmd + + +class TestUploadGcloudAdc: + def test_uploads_adc_and_sets_permissions(self, tmp_path) -> None: + host_gcloud = tmp_path / "gcloud" + host_gcloud.mkdir() + adc = host_gcloud / "application_default_credentials.json" + adc.write_text('{"type":"service_account"}', encoding="utf-8") + + with patch( + "extraction.infrastructure.openshell.sandbox.run_openshell", + ) as run, patch( + "extraction.infrastructure.openshell.sandbox.upload_path", + ) as upload: + from extraction.infrastructure.openshell.sandbox import upload_gcloud_adc + + upload_gcloud_adc( + sandbox_name="sb-1", + host_gcloud_config_dir=str(host_gcloud), + container_config_path="/tmp/kartograph-gcloud", + ) + + upload.assert_called_once_with( + sandbox_name="sb-1", + local_path=str(adc), + dest="/tmp/kartograph-gcloud/application_default_credentials.json", + ) + assert run.call_count == 2 + mkdir_cmd = run.call_args_list[0].args[0][-1] + assert "mkdir -p /tmp/kartograph-gcloud" in mkdir_cmd + chmod_cmd = run.call_args_list[1].args[0][-1] + assert "chmod a+r" in chmod_cmd + + def test_raises_when_adc_missing(self, tmp_path) -> None: + with pytest.raises(OpenShellCliError, match="Google ADC not found"): + from extraction.infrastructure.openshell.sandbox import upload_gcloud_adc + + upload_gcloud_adc( + sandbox_name="sb-1", + host_gcloud_config_dir=str(tmp_path / "missing"), + container_config_path="/tmp/kartograph-gcloud", + ) + + +class TestStartForward: + def test_raises_when_forwards_state_dir_is_read_only( + self, + tmp_path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + config_home = tmp_path / "config" + forwards_dir = config_home / "openshell" / "forwards" + forwards_dir.mkdir(parents=True) + monkeypatch.setenv("KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_XDG_CONFIG_HOME", str(config_home)) + monkeypatch.setattr("os.access", lambda _path, _mode: False) + + with pytest.raises(OpenShellCliError, match="read-only"): + start_forward(sandbox_name="sb-1", port=18814) + + def test_starts_forward_service_to_agent_runtime_port( + self, + tmp_path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + config_home = tmp_path / "config" + forwards_dir = config_home / "openshell" / "forwards" + forwards_dir.mkdir(parents=True) + monkeypatch.setenv("KARTOGRAPH_EXTRACTION_RUNTIME_OPENSHELL_XDG_CONFIG_HOME", str(config_home)) + + with patch( + "extraction.infrastructure.openshell.sandbox.subprocess.Popen", + ) as popen: + start_forward(sandbox_name="sb-1", port=18814, target_port=8787) + + popen.assert_called_once() + command = popen.call_args.args[0] + assert command == [ + "openshell", + "forward", + "service", + "sb-1", + "--target-port", + "8787", + "--local", + "18814", + ] + assert popen.call_args.kwargs["start_new_session"] is True diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_sticky_session_runtime_manager.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_sticky_session_runtime_manager.py index 2819bf080..097ee2f6f 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_openshell_sticky_session_runtime_manager.py +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_sticky_session_runtime_manager.py @@ -41,7 +41,7 @@ def test_start_runtime_issues_auth_token_and_runtime_url(self) -> None: ), patch( "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.create_sandbox" ), patch( - "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.upload_path" + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.upload_directory_contents" ), patch( "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.apply_policy" ) as apply_policy, patch( @@ -64,6 +64,55 @@ def test_start_runtime_issues_auth_token_and_runtime_url(self) -> None: apply_policy.assert_called_once() assert apply_policy.call_args.kwargs["ui_mode"] == "initial-schema-design" + def test_start_runtime_ensures_vertex_provider(self) -> None: + manager = OpenShellStickySessionRuntimeManager( + sticky_image="kartograph-agent-runtime:dev", + session_ttl=timedelta(minutes=30), + vertex_enabled=True, + vertex_project_id="my-project", + vertex_region="us-east5", + gcloud_config_mount="/host/.config/gcloud", + ) + + with patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_gateway.ensure_gateway_registered" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.ensure_vertex_provider" + ) as ensure_provider, patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.delete_sandbox" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.create_sandbox" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.apply_policy" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.exec_background" + ) as exec_background, patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.start_forward" + ), patch( + "extraction.infrastructure.openshell.openshell_sticky_session_runtime_manager.openshell_sandbox.emit_lifecycle" + ): + manager.get_or_start_runtime( + session_id="session-1", + user_id="user-1", + knowledge_graph_id="kg-1", + mode="graph_management", + bootstrap=None, + ) + + ensure_provider.assert_called_once_with( + provider_name="kartograph-gma", + project_id="my-project", + region="us-east5", + gcloud_config_mount="/host/.config/gcloud", + auth_mode="vertex", + ) + env = exec_background.call_args.kwargs["env"] + assert env["ANTHROPIC_BASE_URL"] == "https://inference.local" + assert env["ANTHROPIC_API_KEY"] == "unused" + assert env["CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS"] == "1" + assert "CLAUDE_CODE_USE_VERTEX" not in env + assert "GOOGLE_APPLICATION_CREDENTIALS" not in env + def test_terminate_runtime_deletes_sandbox(self) -> None: manager = OpenShellStickySessionRuntimeManager( sticky_image="kartograph-agent-runtime:dev", diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_vertex_provider.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_vertex_provider.py new file mode 100644 index 000000000..a53dc41d1 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_vertex_provider.py @@ -0,0 +1,83 @@ +"""Unit tests for OpenShell Vertex provider setup.""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from extraction.infrastructure.openshell.cli import OpenShellCliError +from extraction.infrastructure.openshell.vertex_provider import ensure_vertex_provider + + +def test_ensure_vertex_provider_skips_when_provider_exists() -> None: + with patch( + "extraction.infrastructure.openshell.vertex_provider.provider_exists", + return_value=True, + ) as exists, patch( + "extraction.infrastructure.openshell.vertex_provider.ensure_inference_routing", + ) as inference, patch( + "extraction.infrastructure.openshell.vertex_provider.run_openshell", + ) as run: + ensure_vertex_provider( + provider_name="kartograph-gma", + project_id="proj", + region="us-east5", + model="claude-opus-4-6", + ) + + exists.assert_called_once_with(provider_name="kartograph-gma") + inference.assert_called_once_with( + provider_name="kartograph-gma", + model="claude-opus-4-6", + ) + run.assert_not_called() + + +def test_ensure_vertex_provider_creates_google_vertex_ai_from_adc(tmp_path) -> None: + adc_dir = tmp_path / "gcloud" + adc_dir.mkdir() + adc_file = adc_dir / "application_default_credentials.json" + adc_file.write_text('{"type":"authorized_user"}', encoding="utf-8") + + with patch( + "extraction.infrastructure.openshell.vertex_provider.provider_exists", + return_value=False, + ), patch( + "extraction.infrastructure.openshell.vertex_provider.ensure_inference_routing", + ) as inference, patch( + "extraction.infrastructure.openshell.vertex_provider.run_openshell", + ) as run: + ensure_vertex_provider( + provider_name="kartograph-gma", + project_id="my-project", + region="us-east5", + gcloud_config_mount=str(adc_dir), + model="claude-opus-4-6", + ) + + run.assert_called_once() + args = run.call_args.args[0] + assert args[:4] == ["provider", "create", "--name", "kartograph-gma"] + assert "google-vertex-ai" in args + assert "--from-gcloud-adc" in args + assert "VERTEX_AI_PROJECT_ID=my-project" in args + assert "VERTEX_AI_REGION=us-east5" in args + inference.assert_called_once_with( + provider_name="kartograph-gma", + model="claude-opus-4-6", + ) + + +def test_ensure_vertex_provider_raises_when_adc_missing(tmp_path) -> None: + with patch( + "extraction.infrastructure.openshell.vertex_provider.provider_exists", + return_value=False, + ): + with pytest.raises(OpenShellCliError, match="Google ADC not found"): + ensure_vertex_provider( + provider_name="kartograph-gma", + project_id="proj", + region="us-east5", + gcloud_config_mount=str(tmp_path / "missing"), + ) diff --git a/src/api/tests/unit/extraction/infrastructure/test_vertex_runtime_env.py b/src/api/tests/unit/extraction/infrastructure/test_vertex_runtime_env.py index a5fe91d39..e93365973 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_vertex_runtime_env.py +++ b/src/api/tests/unit/extraction/infrastructure/test_vertex_runtime_env.py @@ -1,42 +1,22 @@ -"""Unit tests for Vertex runtime environment helpers.""" +"""Tests for Vertex and OpenShell inference runtime env helpers.""" from __future__ import annotations -import pytest - from extraction.infrastructure.vertex_runtime_env import ( + build_openshell_inference_container_env, build_vertex_container_env, - vertex_enabled_from_env, ) -@pytest.mark.parametrize( - ("value", "expected"), - [ - ("1", True), - ("true", True), - ("yes", True), - ("0", False), - ("", False), - (None, False), - ], -) -def test_vertex_enabled_from_env( - monkeypatch: pytest.MonkeyPatch, value: str | None, expected: bool -) -> None: - if value is None: - monkeypatch.delenv("CLAUDE_CODE_USE_VERTEX", raising=False) - else: - monkeypatch.setenv("CLAUDE_CODE_USE_VERTEX", value) - assert vertex_enabled_from_env() is expected +def test_build_openshell_inference_container_env_routes_through_inference_local() -> None: + env = build_openshell_inference_container_env() + assert env["ANTHROPIC_BASE_URL"] == "https://inference.local" + assert env["ANTHROPIC_API_KEY"] == "unused" + assert "CLAUDE_CODE_USE_VERTEX" not in env -def test_build_vertex_container_env_includes_project_and_region() -> None: - env = build_vertex_container_env( - project_id="my-gcp-project", - region="us-central1", - ) +def test_build_vertex_container_env_sets_vertex_flags() -> None: + env = build_vertex_container_env(project_id="proj", region="us-east5") assert env["CLAUDE_CODE_USE_VERTEX"] == "1" - assert env["ANTHROPIC_VERTEX_PROJECT_ID"] == "my-gcp-project" - assert env["CLOUD_ML_REGION"] == "us-central1" - assert env["VERTEXAI_LOCATION"] == "us-central1" + assert env["ANTHROPIC_VERTEX_PROJECT_ID"] == "proj" + assert env["CLOUD_ML_REGION"] == "us-east5" diff --git a/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py b/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py new file mode 100644 index 000000000..283e8727f --- /dev/null +++ b/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py @@ -0,0 +1,78 @@ +"""Tests for KG-scoped extraction baseline advancement.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from unittest.mock import AsyncMock + +import pytest + +from management.domain.aggregates import DataSource +from management.domain.value_objects import DataSourceId, Schedule, ScheduleType +from management.infrastructure.extraction_baseline_updater import ( + advance_extraction_baselines_for_knowledge_graph, +) +from shared_kernel.datasource_types import DataSourceAdapterType + + +def _make_ds(*, ds_id: str = "ds-001", kg_id: str = "kg-001", **kwargs) -> DataSource: + now = datetime.now(UTC) + defaults = { + "id": DataSourceId(value=ds_id), + "knowledge_graph_id": kg_id, + "tenant_id": "tenant-001", + "name": f"Source {ds_id}", + "adapter_type": DataSourceAdapterType.GITHUB, + "connection_config": {}, + "credentials_path": None, + "schedule": Schedule(schedule_type=ScheduleType.MANUAL), + "last_sync_at": None, + "created_at": now, + "updated_at": now, + } + defaults.update(kwargs) + return DataSource(**defaults) + + +@pytest.mark.asyncio +async def test_advance_extraction_baselines_updates_all_sources_on_kg() -> None: + ds_a = _make_ds( + ds_id="ds-a", + last_extraction_baseline_commit="old-a", + last_prepared_commit="prepared-a", + ) + ds_b = _make_ds( + ds_id="ds-b", + last_extraction_baseline_commit=None, + clone_head_commit="prepared-b", + ) + mock_repo = AsyncMock() + mock_repo.find_by_knowledge_graph.return_value = [ds_a, ds_b] + + updated = await advance_extraction_baselines_for_knowledge_graph( + session=AsyncMock(), + knowledge_graph_id="kg-001", + data_source_repository=mock_repo, + ) + + assert updated == 2 + assert ds_a.last_extraction_baseline_commit == "prepared-a" + assert ds_b.last_extraction_baseline_commit == "prepared-b" + assert mock_repo.save.await_count == 2 + + +@pytest.mark.asyncio +async def test_advance_extraction_baselines_skips_sources_without_ingested_head() -> None: + ds = _make_ds(last_extraction_baseline_commit="keep-me") + mock_repo = AsyncMock() + mock_repo.find_by_knowledge_graph.return_value = [ds] + + updated = await advance_extraction_baselines_for_knowledge_graph( + session=AsyncMock(), + knowledge_graph_id="kg-001", + data_source_repository=mock_repo, + ) + + assert updated == 0 + assert ds.last_extraction_baseline_commit == "keep-me" + mock_repo.save.assert_not_awaited() diff --git a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py index dd5ab7f46..d7050c518 100644 --- a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py +++ b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py @@ -166,8 +166,52 @@ async def test_ingestion_prepared_sets_ingested( assert ds.last_prepared_commit == "abc123" assert ds.clone_head_commit == "abc123" assert ds.last_prepared_file_count == 99 + assert ds.last_extraction_baseline_commit == "abc123" mock_ds_repo.save.assert_awaited_once() + async def test_ingestion_prepared_does_not_overwrite_existing_baseline( + self, + handler: SyncLifecycleHandler, + mock_sync_run_repo: AsyncMock, + mock_ds_repo: AsyncMock, + ): + run = _make_sync_run(status="ingesting") + mock_sync_run_repo.get_by_id.return_value = run + + from management.domain.aggregates import DataSource + from management.domain.value_objects import DataSourceId, Schedule, ScheduleType + from shared_kernel.datasource_types import DataSourceAdapterType + + now = datetime.now(UTC) + ds = DataSource( + id=DataSourceId(value=run.data_source_id), + knowledge_graph_id="kg-001", + tenant_id="tenant-001", + name="Repo", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"owner": "org", "repo": "repo"}, + credentials_path=None, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + last_extraction_baseline_commit="existing-baseline", + created_at=now, + updated_at=now, + ) + mock_ds_repo.get_by_id.return_value = ds + + await handler.handle( + "IngestionPrepared", + _payload( + sync_run_id=run.id, + job_package_id="pkg-001", + prepared_commit_sha="abc123", + prepared_file_count=99, + ), + ) + + assert ds.last_prepared_commit == "abc123" + assert ds.last_extraction_baseline_commit == "existing-baseline" + @pytest.mark.asyncio class TestJobPackageProducedTransition: diff --git a/src/api/tests/unit/management/test_data_source.py b/src/api/tests/unit/management/test_data_source.py index 184b2be45..4b1851bd4 100644 --- a/src/api/tests/unit/management/test_data_source.py +++ b/src/api/tests/unit/management/test_data_source.py @@ -473,6 +473,41 @@ def test_record_ingestion_prepared_updates_branch_file_count_on_incremental(self assert ds.last_prepared_file_count == 124 +class TestDataSourceExtractionBaseline: + """Tests for extraction baseline seed/advance helpers.""" + + def _create_ds(self, **kwargs): + defaults = { + "knowledge_graph_id": "kg-123", + "tenant_id": "tenant-456", + "name": "Source", + "adapter_type": DataSourceAdapterType.GITHUB, + "connection_config": {}, + } + defaults.update(kwargs) + ds = DataSource.create(**defaults) + ds.collect_events() + return ds + + def test_maybe_seed_extraction_baseline_sets_commit_when_unset(self): + ds = self._create_ds() + ds.maybe_seed_extraction_baseline_from_prepare(prepared_commit="abc123") + assert ds.last_extraction_baseline_commit == "abc123" + + def test_maybe_seed_extraction_baseline_is_noop_when_already_set(self): + ds = self._create_ds() + ds.last_extraction_baseline_commit = "existing" + ds.maybe_seed_extraction_baseline_from_prepare(prepared_commit="abc123") + assert ds.last_extraction_baseline_commit == "existing" + + def test_advance_extraction_baseline_to_ingested_head_uses_prepared_commit(self): + ds = self._create_ds() + ds.last_prepared_commit = "prepared999" + ds.last_extraction_baseline_commit = "old" + ds.advance_extraction_baseline_to_ingested_head() + assert ds.last_extraction_baseline_commit == "prepared999" + + class TestDataSourceMarkForDeletion: """Tests for DataSource.mark_for_deletion() method.""" diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue index 395401a47..50121dd4f 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobSetsPanel.vue @@ -1,10 +1,18 @@ <script setup lang="ts"> import { computed, ref, watch } from 'vue' import { toast } from 'vue-sonner' -import { Loader2, Save, Layers, FolderSearch, Network, Sparkles } from 'lucide-vue-next' +import { Loader2, Save, Layers, FolderSearch, Network, Sparkles, Pencil } from 'lucide-vue-next' import { Card, CardHeader, CardTitle, CardDescription, CardContent, CardFooter } from '@/components/ui/card' import { Button } from '@/components/ui/button' import { Badge } from '@/components/ui/badge' +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog' const props = withDefaults( defineProps<{ @@ -53,6 +61,23 @@ const loading = ref(true) const saving = ref(false) const doc = ref<ExtractionJobsDocument | null>(null) const entityTypeOptions = ref<EntityTypeOption[]>([]) +const descriptionEditorOpen = ref(false) +const descriptionEditorIndex = ref<number | null>(null) + +const descriptionEditorJobSet = computed(() => { + if (!doc.value || descriptionEditorIndex.value === null) return null + return doc.value.job_sets[descriptionEditorIndex.value] ?? null +}) + +function openDescriptionEditor(index: number) { + descriptionEditorIndex.value = index + descriptionEditorOpen.value = true +} + +function closeDescriptionEditor() { + descriptionEditorOpen.value = false + descriptionEditorIndex.value = null +} function cloneDoc(d: ExtractionJobsDocument): ExtractionJobsDocument { return JSON.parse(JSON.stringify(d)) as ExtractionJobsDocument @@ -315,7 +340,19 @@ defineExpose({ refresh: load }) </template> <div class="space-y-1.5"> - <label class="text-xs font-medium">Per-instance extraction description</label> + <div class="flex items-center justify-between gap-2"> + <label class="text-xs font-medium">Per-instance extraction description</label> + <Button + type="button" + size="sm" + variant="outline" + class="h-7 shrink-0 px-2 text-xs" + @click="openDescriptionEditor(idx)" + > + <Pencil class="mr-1 size-3" /> + Edit + </Button> + </div> <textarea v-model="js.description" rows="3" @@ -339,5 +376,28 @@ defineExpose({ refresh: load }) </CardFooter> </Card> </template> + + <Dialog :open="descriptionEditorOpen" @update:open="(open) => { if (!open) closeDescriptionEditor() }"> + <DialogContent class="flex max-h-[90dvh] flex-col gap-0 overflow-hidden sm:max-w-3xl"> + <DialogHeader> + <DialogTitle>Edit per-instance description</DialogTitle> + <DialogDescription v-if="descriptionEditorJobSet"> + Job set: {{ descriptionEditorJobSet.name }} + </DialogDescription> + </DialogHeader> + <div class="min-h-0 flex-1 overflow-y-auto py-4"> + <textarea + v-if="descriptionEditorJobSet" + v-model="descriptionEditorJobSet.description" + rows="18" + class="min-h-[min(60dvh,28rem)] w-full resize-y rounded-lg border border-border bg-background px-3 py-2 text-sm leading-relaxed" + placeholder="Describe what to extract for each instance in this job set." + /> + </div> + <DialogFooter> + <Button type="button" @click="closeDescriptionEditor">Done</Button> + </DialogFooter> + </DialogContent> + </Dialog> </div> </template> diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue index 6ec6d6f79..f8b893dd3 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue @@ -7,8 +7,8 @@ import { Play, Settings, ClipboardList, - AlertCircle, Eye, + Archive, XCircle, } from 'lucide-vue-next' import { Card, CardHeader, CardTitle, CardDescription, CardContent } from '@/components/ui/card' @@ -75,17 +75,7 @@ interface ExtractionRunState { status: string workerCount: number pauseRequested: boolean -} - -interface PlanSummary { - job_sets: Array<{ - name: string - strategy: string - enabled?: boolean - entity_type?: string - instances_per_job?: number - projected_jobs?: number | null - }> + sandboxSlotCount?: number } const selectedOntologyTab = ref<OntologyTab>('entities') @@ -95,7 +85,6 @@ const dbLoading = ref(true) const dbRefreshing = ref(false) const dbError = ref<string | null>(null) const extractionRunState = ref<ExtractionRunState | null>(null) -const planSummary = ref<PlanSummary | null>(null) const workers = ref(20) const startingExtraction = ref(false) const pausingExtraction = ref(false) @@ -103,6 +92,7 @@ const killingExtraction = ref(false) const regeneratingJobs = ref(false) const resettingRunning = ref(false) const resettingCompleted = ref(false) +const archivingCompleted = ref(false) const resettingFailed = ref(false) const resettingAll = ref(false) const optimisticLiveUntilMs = ref<number | null>(null) @@ -161,7 +151,7 @@ async function loadDatabaseStatus(options?: { background?: boolean }) { try { const status = await apiFetch<DbStatus>(`${basePath.value}/database-status`) dbStatus.value = status - mergeRecentJobEvents(status.recentJobs || []) + mergeRecentJobEvents(status) lastStatusRefreshMs.value = Date.now() dbError.value = null } catch (e: unknown) { @@ -182,20 +172,11 @@ async function loadExtractionRunState() { } } -async function loadPlanSummary() { - try { - planSummary.value = await apiFetch<PlanSummary>(`${basePath.value}/plan-summary`) - } catch { - // Keep prior plan summary during background refresh failures. - } -} - async function refreshAll(options?: { background?: boolean }) { const background = options?.background ?? dbStatus.value !== null await Promise.all([ loadDatabaseStatus({ background }), loadExtractionRunState(), - loadPlanSummary(), ]) } @@ -206,8 +187,8 @@ const completedJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.c const archivedJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.archived || 0)) const failedJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.failed || 0)) const remainingJobsCount = computed(() => pendingJobsCount.value + inProgressJobsCount.value) -const materializedJobsTotal = computed( - () => pendingJobsCount.value + inProgressJobsCount.value + failedJobsCount.value + completedJobsCount.value + archivedJobsCount.value, +const activeQueueJobsTotal = computed( + () => pendingJobsCount.value + inProgressJobsCount.value + failedJobsCount.value + completedJobsCount.value, ) const extractionRunLive = computed(() => { if (optimisticLiveUntilMs.value && nowMs.value < optimisticLiveUntilMs.value) return true @@ -215,20 +196,13 @@ const extractionRunLive = computed(() => { }) const hasRunningJobs = computed(() => inProgressJobsCount.value > 0) const extractionProgressPercent = computed(() => { - const total = materializedJobsTotal.value + const total = activeQueueJobsTotal.value if (total <= 0) return 0 - return Math.round(((completedJobsCount.value + archivedJobsCount.value + failedJobsCount.value) / total) * 100) -}) -const plannedKnownTotalJobs = computed(() => { - const sets = planSummary.value?.job_sets || [] - return sets.reduce((sum, set) => sum + (Number(set.projected_jobs) || 0), 0) + return Math.round(((completedJobsCount.value + failedJobsCount.value) / total) * 100) }) -const plannedVsMaterializedMismatch = computed(() => { - const planned = plannedKnownTotalJobs.value - if (planned <= 0) return false - return planned !== materializedJobsTotal.value -}) -const recentJobs = computed(() => recentJobEvents.value) +const recentJobs = computed(() => + recentJobEvents.value.filter((event) => event.status !== 'archived'), +) const activeWorkerCount = computed(() => dbStatus.value?.activeWorkers?.length || 0) const idleWorkerCount = computed(() => Math.max(0, workerCount.value - activeWorkerCount.value)) const statusAgeSeconds = computed(() => { @@ -239,14 +213,27 @@ const showOptimisticLiveActivity = computed( () => Boolean(optimisticLiveUntilMs.value && nowMs.value < optimisticLiveUntilMs.value), ) -function mergeRecentJobEvents(incoming: DbStatus['recentJobs']) { +function mergeRecentJobEvents(status: DbStatus) { + const incoming = status.recentJobs || [] const now = Date.now() - const existingByJobId = new Map(recentJobEvents.value.map((event) => [event.jobId, event] as const)) - for (const job of incoming) { + const activeIncoming = incoming.filter((job) => job.status !== 'archived') + const activeWorkerJobIds = new Set((status.activeWorkers || []).map((worker) => worker.jobId)) + const inProgressCount = Number(status.jobsByStatus?.in_progress || 0) + const existingByJobId = new Map( + recentJobEvents.value + .filter((event) => event.status !== 'archived') + .map((event) => [event.jobId, event] as const), + ) + for (const job of activeIncoming) { existingByJobId.set(job.jobId, { ...job, eventKey: job.jobId, seenAtMs: now }) } const maxAgeMs = 15 * 60 * 1000 - const merged = Array.from(existingByJobId.values()).filter((event) => now - event.seenAtMs <= maxAgeMs) + let merged = Array.from(existingByJobId.values()).filter((event) => now - event.seenAtMs <= maxAgeMs) + if (inProgressCount === 0) { + merged = merged.filter( + (event) => event.status !== 'in_progress' || activeWorkerJobIds.has(event.jobId), + ) + } merged.sort((a, b) => { const aTs = Date.parse(a.completedAt || a.startedAt || '') || a.seenAtMs const bTs = Date.parse(b.completedAt || b.startedAt || '') || b.seenAtMs @@ -418,6 +405,24 @@ async function resetByKind(kind: 'stale' | 'completed' | 'failed' | 'all') { } } +async function archiveCompletedJobs() { + archivingCompleted.value = true + try { + const res = await apiFetch<{ message?: string; archived_count?: number }>( + `${basePath.value}/archive-completed`, + { method: 'POST' }, + ) + toast.success('Completed jobs archived', { + description: res.message || (res.archived_count !== undefined ? `${res.archived_count} job(s) archived` : undefined), + }) + await refreshAll() + } catch (e: unknown) { + toast.error('Archive failed', { description: resolveApiErrorDescription(e) }) + } finally { + archivingCompleted.value = false + } +} + function startAutoRefresh() { if (autoRefreshInterval) return autoRefreshInterval = setInterval(() => { void refreshAll({ background: true }) }, 1500) @@ -518,7 +523,8 @@ onUnmounted(() => { Run extraction </CardTitle> <CardDescription> - Launch parallel extraction workers. Each worker processes one pending job at a time using the job set description. + Launch parallel extraction workers. Each worker owns one OpenShell sandbox, claims jobs + from the queue until the run completes, and keeps per-job stats in Graph Writes History. </CardDescription> </CardHeader> <CardContent class="space-y-4"> @@ -551,36 +557,17 @@ onUnmounted(() => { </div> </div> - <div class="grid gap-3 sm:grid-cols-2 lg:grid-cols-4 text-sm"> + <div class="grid gap-3 sm:grid-cols-2 text-sm"> <div class="rounded-lg border bg-muted/30 p-3"> <p class="text-xs text-muted-foreground">Remaining jobs</p> <p class="text-lg font-semibold">{{ remainingJobsCount }}</p> </div> - <div class="rounded-lg border bg-muted/30 p-3"> - <p class="text-xs text-muted-foreground">Materialized jobs</p> - <p class="text-lg font-semibold">{{ materializedJobsTotal }}</p> - </div> - <div class="rounded-lg border bg-muted/30 p-3"> - <p class="text-xs text-muted-foreground">Planned (from job sets)</p> - <p class="text-lg font-semibold">{{ plannedKnownTotalJobs || '—' }}</p> - </div> <div class="rounded-lg border bg-muted/30 p-3"> <p class="text-xs text-muted-foreground">Progress</p> <p class="text-lg font-semibold">{{ extractionProgressPercent }}%</p> </div> </div> - <div v-if="plannedVsMaterializedMismatch" class="flex items-start gap-2 rounded-lg border border-amber-500/40 bg-amber-500/5 p-3 text-xs"> - <AlertCircle class="mt-0.5 size-4 shrink-0 text-amber-600" /> - <div> - Planned job count ({{ plannedKnownTotalJobs }}) differs from materialized total ({{ materializedJobsTotal }}). - Regenerate syncs pending jobs for enabled sets only; running jobs are left untouched. - <Button size="sm" variant="link" class="h-auto p-0 text-xs" :disabled="regeneratingJobs" @click="regenerateJobs"> - Regenerate jobs - </Button> - </div> - </div> - <div class="rounded-lg border bg-card p-3"> <div class="mb-2 flex flex-wrap items-center justify-between gap-2"> <p class="text-xs font-medium text-foreground/90">Live extraction activity</p> @@ -591,6 +578,9 @@ onUnmounted(() => { <Badge variant="outline" class="font-mono text-[11px]"> workers: {{ activeWorkerCount }}/{{ workerCount }} </Badge> + <Badge v-if="extractionRunState?.sandboxSlotCount" variant="outline" class="font-mono text-[11px]"> + sandboxes: {{ extractionRunState.sandboxSlotCount }} (1 per worker) + </Badge> <Badge v-if="idleWorkerCount > 0" variant="outline" class="font-mono text-[11px]"> {{ idleWorkerCount }} idle </Badge> @@ -745,6 +735,15 @@ onUnmounted(() => { <Button size="sm" variant="outline" :disabled="resettingCompleted" @click="resetByKind('completed')"> Reset Completed </Button> + <Button + size="sm" + variant="outline" + :disabled="archivingCompleted || completedJobsCount === 0" + @click="archiveCompletedJobs" + > + <Archive class="mr-1.5 size-3.5" /> + Archive Completed + </Button> <Button size="sm" variant="outline" :disabled="resettingFailed" @click="resetByKind('failed')"> Reset Failed </Button> diff --git a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue new file mode 100644 index 000000000..d54779a6a --- /dev/null +++ b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue @@ -0,0 +1,746 @@ +<script setup lang="ts"> +import { computed, onMounted, onUnmounted, ref } from 'vue' +import { toast } from 'vue-sonner' +import { + Calendar, + GitBranch, + Loader2, + Play, + RefreshCw, + Settings, + ArrowRight, +} from 'lucide-vue-next' +import { Card, CardHeader, CardTitle, CardDescription, CardContent } from '@/components/ui/card' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' +import { Input } from '@/components/ui/input' +import { isMaintenanceReady } from '@/utils/kgManageWorkspace' +import { buildGraphManagementStepUrl } from '@/utils/kgGraphManagement' +import { + commitStatusClass, + formatFilesOnDisk, + hasUnpulledCommits, + needsIngestionPrepare, + resolveIngestedHeadCommit, + resolveRepoUrl, + resolveTrackedBranch, + shortCommitHash, + unpulledCommitStatusLabel, +} from '@/utils/kgDataSourcesCommits' +import { + cronToDailyTime, + dailyTimeToCron, + formatMaintenanceRunOutcome, + maintenanceRunOutcomeVariant, + MAINTENANCE_TIMEZONE_OPTIONS, +} from '@/utils/kgMaintenanceSchedule' + +const props = defineProps<{ + kgId: string +}>() + +const { apiFetch } = useApiClient() + +interface DiffSummary { + total_changed_files: number + added_count: number + modified_count: number + removed_count: number + renamed_count: number +} + +interface DataSourceRow { + id: string + name: string + connection_config?: Record<string, string> + last_extraction_baseline_commit?: string | null + tracked_branch_head_commit?: string | null + newest_unpulled_commit?: string | null + clone_head_commit?: string | null + last_prepared_commit?: string | null + last_prepared_file_count?: number | null + ingested_head_commit?: string | null + job_package_available?: boolean | null + diff_summary?: DiffSummary | null +} + +interface ExtractionJobSet { + name: string + strategy: string + enabled?: boolean + files_per_job?: number + file_patterns?: string[] + description?: string + entity_type?: string + instances_per_job?: number +} + +interface MaintenanceSchedule { + enabled: boolean + cron_expression: string + timezone_name: string + next_run_at: string | null +} + +interface MaintenanceRun { + run_id: string + triggered_at: string + outcome: string + message: string | null + target_data_source_ids: string[] +} + +interface ExtractionRunState { + live: boolean + status: string + workerCount: number + pauseRequested: boolean +} + +interface DbStatus { + jobsByStatus: Record<string, number> +} + +const loading = ref(true) +const refreshing = ref(false) +const dataSources = ref<DataSourceRow[]>([]) +const schedule = ref<MaintenanceSchedule | null>(null) +const runHistory = ref<MaintenanceRun[]>([]) +const extractionRunState = ref<ExtractionRunState | null>(null) +const dbStatus = ref<DbStatus | null>(null) + +const scheduleEnabled = ref(false) +const scheduleTime = ref('02:00') +const scheduleTimezone = ref('UTC') +const scheduleSaving = ref(false) + +const workers = ref(8) +const filesPerJob = ref(2) +const checkingCommits = ref(false) +const updatingLocalCommits = ref(false) +const runningMaintenance = ref(false) +const startingExtraction = ref(false) + +let refreshInterval: ReturnType<typeof setInterval> | null = null + +const maintenanceReadySources = computed(() => + dataSources.value.filter((ds) => isMaintenanceReady(ds)), +) + +const sourcesNeedingPrepare = computed(() => + dataSources.value.filter((ds) => needsIngestionPrepare(ds)), +) + +const totalChangedFiles = computed(() => + dataSources.value.reduce((sum, ds) => sum + (ds.diff_summary?.total_changed_files || 0), 0), +) + +const normalizedFilesPerJob = computed(() => + Math.max(1, Math.floor(Number(filesPerJob.value || 1))), +) + +const estimatedJobsFromFiles = computed(() => { + const total = totalChangedFiles.value + if (total <= 0) return 0 + return Math.ceil(total / normalizedFilesPerJob.value) +}) + +const pendingJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.pending || 0)) +const inProgressJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.in_progress || 0)) +const extractionLive = computed(() => + Boolean(extractionRunState.value?.live || inProgressJobsCount.value > 0), +) + +const extractionJobsUrl = computed(() => + buildGraphManagementStepUrl(props.kgId, 'extraction-jobs'), +) + +function resolveApiError(e: unknown): string { + const err = e as { data?: { detail?: unknown }; message?: string } + const detail = err.data?.detail + if (typeof detail === 'string' && detail.trim()) return detail + return err.message || 'Request failed' +} + +function formatWhen(value: string | null | undefined): string { + if (!value) return '—' + const date = new Date(value) + if (Number.isNaN(date.getTime())) return value + return date.toLocaleString() +} + +async function loadDiffSummary(ds: DataSourceRow) { + try { + ds.diff_summary = await apiFetch<DiffSummary>( + `/management/data-sources/${ds.id}/diff-summary`, + ) + } catch { + ds.diff_summary = null + } +} + +async function loadDataSources() { + const sources = await apiFetch<DataSourceRow[]>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/data-sources`, + ) + await Promise.all(sources.map((ds) => loadDiffSummary(ds))) + dataSources.value = sources +} + +async function loadSchedule() { + const payload = await apiFetch<MaintenanceSchedule>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/maintenance-schedule`, + ) + schedule.value = payload + scheduleEnabled.value = payload.enabled + scheduleTimezone.value = payload.timezone_name || 'UTC' + scheduleTime.value = cronToDailyTime(payload.cron_expression) || '02:00' +} + +async function loadRunHistory() { + const payload = await apiFetch<{ runs: MaintenanceRun[] }>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/maintenance-runs?limit=20`, + ) + runHistory.value = payload.runs || [] +} + +async function loadExtractionState() { + const base = `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs` + try { + extractionRunState.value = await apiFetch<ExtractionRunState>(`${base}/run-state`) + } catch { + extractionRunState.value = null + } + try { + dbStatus.value = await apiFetch<DbStatus>(`${base}/database-status`) + } catch { + dbStatus.value = null + } +} + +async function refreshAll(options?: { background?: boolean }) { + const background = options?.background ?? false + if (background) refreshing.value = true + else loading.value = true + try { + await Promise.all([ + loadDataSources(), + loadSchedule(), + loadRunHistory(), + loadExtractionState(), + ]) + } catch (e: unknown) { + if (!background) { + toast.error('Failed to load maintenance workspace', { description: resolveApiError(e) }) + } + } finally { + loading.value = false + refreshing.value = false + } +} + +async function checkForNewCommits() { + if (dataSources.value.length === 0) return + checkingCommits.value = true + try { + await Promise.allSettled( + dataSources.value.map((ds) => + apiFetch(`/management/data-sources/${ds.id}/commit-refs/refresh`, { method: 'POST' }), + ), + ) + await loadDataSources() + const unpulled = dataSources.value.filter((ds) => hasUnpulledCommits(ds)) + if (unpulled.length === 0) { + toast.success('Up to date with remote branches') + } else { + toast.success( + `${unpulled.length} source${unpulled.length === 1 ? '' : 's'} have unpulled commits`, + { description: 'Compare baseline vs branch head in the table below.' }, + ) + } + } catch (e: unknown) { + toast.error('Failed to check for new commits', { description: resolveApiError(e) }) + } finally { + checkingCommits.value = false + } +} + +async function getLatestCommitLocally() { + const queue = sourcesNeedingPrepare.value + if (queue.length === 0) { + toast.message('Already up to date locally', { + description: 'No sources need ingestion prepare. Run check for new commits first if unsure.', + }) + return + } + updatingLocalCommits.value = true + try { + const results = await Promise.allSettled( + queue.map((ds) => + apiFetch(`/management/data-sources/${ds.id}/sync`, { + method: 'POST', + body: { mode: 'ingest_only' }, + }), + ), + ) + const failures = results.filter((result) => result.status === 'rejected') + await loadDataSources() + if (failures.length === queue.length) { + toast.error('Failed to update local commits', { + description: resolveApiError(failures[0]?.status === 'rejected' ? failures[0].reason : null), + }) + return + } + if (failures.length > 0) { + toast.warning( + `Started ${queue.length - failures.length} of ${queue.length} preparations`, + { description: 'Some sources could not be queued.' }, + ) + return + } + toast.success(`Preparing ${queue.length} data source${queue.length === 1 ? '' : 's'}`) + } catch (e: unknown) { + toast.error('Failed to get latest commit locally', { description: resolveApiError(e) }) + } finally { + updatingLocalCommits.value = false + } +} + +async function saveSchedule() { + const cron = dailyTimeToCron(scheduleTime.value) + if (!cron) { + toast.error('Invalid schedule time', { description: 'Use HH:MM in 24-hour format.' }) + return + } + scheduleSaving.value = true + try { + schedule.value = await apiFetch<MaintenanceSchedule>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/maintenance-schedule`, + { + method: 'PUT', + body: { + enabled: scheduleEnabled.value, + cron_expression: cron, + timezone_name: scheduleTimezone.value, + }, + }, + ) + toast.success('Maintenance schedule saved') + } catch (e: unknown) { + toast.error('Failed to save schedule', { description: resolveApiError(e) }) + } finally { + scheduleSaving.value = false + } +} + +async function applyFilesPerJobToJobSets() { + const perJob = normalizedFilesPerJob.value + const doc = await apiFetch<{ version?: string; job_sets: ExtractionJobSet[] }>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`, + ) + const hasByFiles = doc.job_sets.some((js) => js.strategy === 'by_files' && js.enabled !== false) + if (!hasByFiles) return + await apiFetch( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`, + { + method: 'PUT', + body: { + version: doc.version || '1.0', + job_sets: doc.job_sets.map((js) => + js.strategy === 'by_files' + ? { ...js, files_per_job: perJob } + : js, + ), + }, + }, + ) +} + +async function runMaintenanceNow() { + runningMaintenance.value = true + try { + const run = await apiFetch<MaintenanceRun>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/maintenance-runs/trigger`, + { method: 'POST' }, + ) + toast.success('Maintenance run recorded', { + description: run.message || formatMaintenanceRunOutcome(run.outcome), + }) + await refreshAll({ background: true }) + } catch (e: unknown) { + toast.error('Maintenance run failed', { description: resolveApiError(e) }) + } finally { + runningMaintenance.value = false + } +} + +async function startExtractionJobs() { + startingExtraction.value = true + try { + try { + await applyFilesPerJobToJobSets() + } catch (e: unknown) { + toast.warning('Could not update files-per-job on job sets', { + description: resolveApiError(e), + }) + } + const res = await apiFetch<{ message?: string }>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs/start`, + { + method: 'POST', + body: { workers: Math.max(1, Math.floor(workers.value)) }, + }, + ) + toast.success('Extraction started', { description: res.message }) + await loadExtractionState() + } catch (e: unknown) { + toast.error('Failed to start extraction', { description: resolveApiError(e) }) + } finally { + startingExtraction.value = false + } +} + +async function runMaintenancePipeline() { + await runMaintenanceNow() + if (maintenanceReadySources.value.length > 0) { + await startExtractionJobs() + } +} + +function startAutoRefresh() { + if (refreshInterval) return + refreshInterval = setInterval(() => { void refreshAll({ background: true }) }, 3000) +} + +function stopAutoRefresh() { + if (!refreshInterval) return + clearInterval(refreshInterval) + refreshInterval = null +} + +onMounted(async () => { + await refreshAll() + startAutoRefresh() +}) + +onUnmounted(() => { + stopAutoRefresh() +}) +</script> + +<template> + <div class="space-y-6"> + <div class="flex flex-wrap items-start justify-between gap-3"> + <div class="space-y-1"> + <p class="text-sm text-muted-foreground"> + Schedule and run incremental maintenance: sync changed sources, then execute extraction jobs. + </p> + </div> + <Button variant="outline" size="sm" :disabled="refreshing || loading" @click="refreshAll({ background: true })"> + <Loader2 v-if="refreshing" class="mr-2 size-4 animate-spin" /> + <RefreshCw v-else class="mr-2 size-4" /> + Refresh + </Button> + </div> + + <div v-if="loading" class="flex items-center justify-center py-16"> + <Loader2 class="size-8 animate-spin text-muted-foreground" /> + </div> + + <template v-else> + <Card> + <CardHeader class="pb-2"> + <div class="flex flex-col gap-2 sm:flex-row sm:items-start sm:justify-between"> + <div> + <CardTitle class="flex items-center gap-2 text-base"> + <GitBranch class="size-4 text-primary" /> + New Files to Process + </CardTitle> + <CardDescription class="mt-1"> + Compare the last job baseline to the remote branch tip and review changed files since extraction. + </CardDescription> + </div> + <div class="flex flex-wrap gap-2"> + <Button + variant="outline" + size="sm" + :disabled="checkingCommits || updatingLocalCommits || dataSources.length === 0" + @click="checkForNewCommits" + > + <Loader2 v-if="checkingCommits" class="mr-2 size-4 animate-spin" /> + <RefreshCw v-else class="mr-2 size-4" /> + Check for new commits + </Button> + <Button + variant="outline" + size="sm" + :disabled="checkingCommits || updatingLocalCommits || dataSources.length === 0" + @click="getLatestCommitLocally" + > + <Loader2 v-if="updatingLocalCommits" class="mr-2 size-4 animate-spin" /> + Get latest commit locally + </Button> + </div> + </div> + </CardHeader> + <CardContent> + <div + v-if="dataSources.length === 0" + class="rounded-md border border-dashed px-4 py-8 text-center text-sm text-muted-foreground" + > + Connect a data source before scheduling maintenance. + </div> + <div v-else class="overflow-x-auto rounded-md border"> + <table class="w-full min-w-[1100px] text-sm"> + <thead> + <tr class="border-b bg-muted/50 text-left"> + <th class="px-3 py-2 font-medium">Source</th> + <th class="px-3 py-2 font-medium">Branch</th> + <th class="px-3 py-2 font-medium">Branch HEAD</th> + <th class="px-3 py-2 text-right font-medium">Files on disk</th> + <th class="px-3 py-2 font-medium">Commit during last extraction</th> + <th class="px-3 py-2 text-right font-medium">Changed files</th> + <th class="px-3 py-2 font-medium">Status</th> + </tr> + </thead> + <tbody> + <tr + v-for="ds in dataSources" + :key="ds.id" + class="border-b border-border/60 align-top last:border-0" + :class="isMaintenanceReady(ds) || hasUnpulledCommits(ds) ? 'bg-amber-50/40 dark:bg-amber-950/10' : ''" + > + <td class="px-3 py-2"> + <p class="font-medium">{{ ds.name }}</p> + <p class="mt-0.5 max-w-[18rem] truncate font-mono text-xs text-muted-foreground"> + {{ resolveRepoUrl(ds.connection_config) }} + </p> + </td> + <td class="px-3 py-2 font-mono text-xs">{{ resolveTrackedBranch(ds.connection_config) }}</td> + <td class="px-3 py-2 font-mono text-xs"> + <span + :class="commitStatusClass(ds.tracked_branch_head_commit, resolveIngestedHeadCommit(ds))" + :title="ds.tracked_branch_head_commit || ''" + > + {{ shortCommitHash(ds.tracked_branch_head_commit) }} + </span> + <p class="mt-0.5 text-[10px] text-muted-foreground"> + {{ + unpulledCommitStatusLabel( + ds.newest_unpulled_commit, + ds.tracked_branch_head_commit, + ) + }} + </p> + </td> + <td class="px-3 py-2 text-right tabular-nums text-muted-foreground"> + {{ formatFilesOnDisk(ds) }} + </td> + <td class="px-3 py-2 font-mono text-xs"> + <span + :class="commitStatusClass(ds.last_extraction_baseline_commit, ds.tracked_branch_head_commit)" + :title="ds.last_extraction_baseline_commit || ''" + > + {{ shortCommitHash(ds.last_extraction_baseline_commit) }} + </span> + </td> + <td class="px-3 py-2 text-right tabular-nums"> + {{ ds.diff_summary?.total_changed_files ?? '—' }} + </td> + <td class="px-3 py-2"> + <Badge + :variant="isMaintenanceReady(ds) ? 'default' : hasUnpulledCommits(ds) ? 'outline' : 'secondary'" + class="text-xs" + > + {{ + isMaintenanceReady(ds) + ? 'New files vs baseline' + : hasUnpulledCommits(ds) + ? 'Unpulled commits' + : 'Up to date' + }} + </Badge> + </td> + </tr> + </tbody> + </table> + </div> + <p class="mt-3 text-xs text-muted-foreground"> + {{ maintenanceReadySources.length }} source(s) have commits ahead of the last job baseline · + {{ totalChangedFiles }} changed file(s) detected · + {{ sourcesNeedingPrepare.length }} need local prepare + </p> + </CardContent> + </Card> + + <div class="grid gap-6 lg:grid-cols-2"> + <Card> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base"> + <Play class="size-4 text-primary" /> + Run maintenance jobs + </CardTitle> + <CardDescription> + Set files per job and worker concurrency, then run maintenance across all data sources. + </CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div class="grid gap-3 sm:grid-cols-2"> + <div class="space-y-1.5"> + <label for="maintain-files-per-job" class="text-sm font-medium">Files per job</label> + <Input + id="maintain-files-per-job" + v-model.number="filesPerJob" + type="number" + min="1" + /> + </div> + <div class="space-y-1.5"> + <label for="maintain-workers" class="text-sm font-medium">Parallel workers</label> + <Input + id="maintain-workers" + v-model.number="workers" + type="number" + min="1" + max="32" + /> + </div> + </div> + + <div class="rounded-lg border bg-muted/20 p-3"> + <p class="text-xs font-medium text-foreground/90">Maintain run preview</p> + <div class="mt-2 grid gap-2 sm:grid-cols-3"> + <div class="rounded-md border bg-background px-3 py-2"> + <p class="text-[11px] uppercase tracking-wide text-muted-foreground">Changed files</p> + <p class="text-lg font-semibold tabular-nums">{{ totalChangedFiles }}</p> + </div> + <div class="rounded-md border bg-background px-3 py-2"> + <p class="text-[11px] uppercase tracking-wide text-muted-foreground">Files per job</p> + <p class="text-lg font-semibold tabular-nums">{{ normalizedFilesPerJob }}</p> + </div> + <div class="rounded-md border bg-background px-3 py-2"> + <p class="text-[11px] uppercase tracking-wide text-muted-foreground">Estimated jobs</p> + <p class="text-lg font-semibold tabular-nums">{{ estimatedJobsFromFiles }}</p> + </div> + </div> + <p class="mt-2 text-xs text-muted-foreground"> + Extraction queue: {{ pendingJobsCount }} ready · {{ inProgressJobsCount }} running + <span v-if="extractionLive"> · live</span> + </p> + </div> + + <div class="flex flex-wrap gap-2"> + <Button :disabled="runningMaintenance" @click="runMaintenanceNow"> + <Loader2 v-if="runningMaintenance" class="mr-2 size-4 animate-spin" /> + Sync changed sources + </Button> + <Button variant="secondary" :disabled="startingExtraction" @click="startExtractionJobs"> + <Loader2 v-if="startingExtraction" class="mr-2 size-4 animate-spin" /> + Start extraction jobs + </Button> + <Button + variant="outline" + :disabled="runningMaintenance || startingExtraction || maintenanceReadySources.length === 0" + @click="runMaintenancePipeline" + > + Run full pipeline + </Button> + </div> + + <Button as-child variant="link" class="h-auto px-0 text-xs"> + <NuxtLink :to="extractionJobsUrl" class="inline-flex items-center gap-1"> + Configure job sets and monitor workers + <ArrowRight class="size-3.5" /> + </NuxtLink> + </Button> + </CardContent> + </Card> + + <Card> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base"> + <Calendar class="size-4 text-primary" /> + Scheduled maintenance + </CardTitle> + <CardDescription> + Daily cron schedule for automatic maintenance orchestration (sync changed sources). + </CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <label class="flex items-center gap-2 text-sm"> + <input v-model="scheduleEnabled" type="checkbox" class="size-4 rounded border" /> + Enable scheduled maintenance + </label> + + <div class="grid gap-3 sm:grid-cols-2"> + <div class="space-y-1.5"> + <label for="schedule-time" class="text-sm font-medium">Daily time (HH:MM)</label> + <Input id="schedule-time" v-model="scheduleTime" placeholder="02:00" /> + </div> + <div class="space-y-1.5"> + <label for="schedule-tz" class="text-sm font-medium">Timezone</label> + <select + id="schedule-tz" + v-model="scheduleTimezone" + class="flex h-9 w-full rounded-md border bg-background px-3 text-sm" + > + <option + v-for="tz in MAINTENANCE_TIMEZONE_OPTIONS" + :key="tz.value" + :value="tz.value" + > + {{ tz.label }} + </option> + </select> + </div> + </div> + + <div class="rounded-lg border bg-muted/20 p-3 text-xs text-muted-foreground"> + <p v-if="schedule?.next_run_at"> + Next scheduled run: {{ formatWhen(schedule.next_run_at) }} + </p> + <p v-else-if="scheduleEnabled">Next run will be computed after saving.</p> + <p v-else>Scheduling is disabled.</p> + </div> + + <Button variant="outline" class="w-full" :disabled="scheduleSaving" @click="saveSchedule"> + <Loader2 v-if="scheduleSaving" class="mr-2 size-4 animate-spin" /> + <Settings v-else class="mr-2 size-4" /> + Save maintenance schedule + </Button> + </CardContent> + </Card> + </div> + + <Card> + <CardHeader> + <CardTitle class="text-base">Maintenance run history</CardTitle> + <CardDescription>Recent manual and scheduled maintenance orchestration attempts.</CardDescription> + </CardHeader> + <CardContent> + <div v-if="runHistory.length === 0" class="text-sm text-muted-foreground"> + No maintenance runs recorded yet. + </div> + <div v-else class="space-y-2"> + <div + v-for="run in runHistory" + :key="run.run_id" + class="rounded-lg border p-3 text-sm" + > + <div class="flex flex-wrap items-center gap-2"> + <Badge :variant="maintenanceRunOutcomeVariant(run.outcome)" class="font-mono text-[11px]"> + {{ formatMaintenanceRunOutcome(run.outcome) }} + </Badge> + <span class="font-mono text-xs text-muted-foreground">{{ formatWhen(run.triggered_at) }}</span> + <span v-if="run.target_data_source_ids.length" class="text-xs text-muted-foreground"> + · {{ run.target_data_source_ids.length }} source(s) + </span> + </div> + <p v-if="run.message" class="mt-1 text-xs text-muted-foreground">{{ run.message }}</p> + </div> + </div> + </CardContent> + </Card> + </template> + </div> +</template> diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue index 55b4028bf..a90c7444c 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/data-sources/index.vue @@ -21,6 +21,7 @@ import { buildKgManageUrl, parseKgDataSourcesFocusQuery, } from '@/utils/kgDataSourcesNavigation' +import { buildManageStepUrl } from '@/utils/kgManageWorkspace' import { isMaintenanceReady } from '@/utils/kgManageWorkspace' import { hasAnyActiveSync, @@ -535,12 +536,12 @@ async function viewLogs(ds: DataSourceItem, run: SyncRun) { onMounted(async () => { if (!hasTenant.value) return - await loadKnowledgeGraph() - await ensureEntryRoute() if (maintainFocus.value) { - await nextTick() - document.getElementById('maintain-section')?.scrollIntoView({ behavior: 'smooth' }) + await navigateTo(buildManageStepUrl(kgId.value, 'maintain')) + return } + await loadKnowledgeGraph() + await ensureEntryRoute() }) onUnmounted(() => stopPolling()) diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index f9c97951d..8189f5fad 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -48,6 +48,7 @@ import GraphDesignEntitiesPanel from '@/components/graph-management/GraphDesignE import GraphDesignRelationshipsPanel from '@/components/graph-management/GraphDesignRelationshipsPanel.vue' import GraphExtractionJobsWorkspace from '@/components/graph-management/GraphExtractionJobsWorkspace.vue' import GraphExtractionArchivedHistory from '@/components/graph-management/GraphExtractionArchivedHistory.vue' +import GraphMaintenanceWorkspace from '@/components/graph-management/GraphMaintenanceWorkspace.vue' import GraphManagementMutationAuthoringPanel from '@/components/graph-management/GraphManagementMutationAuthoringPanel.vue' import GraphSchemaExplorer from '@/components/graph-management/GraphSchemaExplorer.vue' import { @@ -95,6 +96,7 @@ import { buildTransitionRestrictionReason, handleActivatableKeydown, isForbiddenHttpError, + isNotFoundHttpError, resolveForbiddenReason, resolveSectionState, shouldApplyMutationResult, @@ -290,6 +292,12 @@ const stepBadgeLabel = computed(() => { if (activeStep.value === 'graph-management') { return graphManagementModeLabel.value } + if (activeStep.value === 'maintain') { + return 'Maintain' + } + if (activeStep.value === 'mutation-logs') { + return 'Graph Writes History' + } return modeLabel.value }) @@ -834,6 +842,16 @@ function restoreModeConversation(mode: GraphManagementMode) { draftMessage.value = cached.draftMessage } +function syncRuntimeReadyFromSession() { + if (runtimeWarming.value) return + const sticky = extractionSession.value?.runtime_context?.sticky_runtime + runtimeReady.value = Boolean( + sticky + && typeof sticky === 'object' + && (sticky as { phase?: string }).phase === 'ready', + ) +} + async function loadExtractionSession() { if (!kgId.value || activeStep.value !== 'graph-management') return sessionLoading.value = true @@ -844,15 +862,7 @@ async function loadExtractionSession() { + `?graph_management_ui_mode=${encodeURIComponent(graphManagementMode.value)}`, ) syncActivityLinesFromSession() - const stickyPhase = extractionSession.value?.runtime_context?.sticky_runtime - if ( - stickyPhase - && typeof stickyPhase === 'object' - && (stickyPhase as { phase?: string }).phase === 'ready' - && !runtimeWarming.value - ) { - runtimeReady.value = true - } + syncRuntimeReadyFromSession() sessionForbidden.value = false sessionForbiddenReason.value = null } catch (err) { @@ -864,7 +874,7 @@ async function loadExtractionSession() { err, 'You do not have permission to manage this knowledge graph.', ) - } else if (extractErrorMessage(err).includes('404') || extractErrorMessage(err).toLowerCase().includes('not found')) { + } else if (isNotFoundHttpError(err)) { sessionForbidden.value = false sessionForbiddenReason.value = null sessionLoadError.value = null @@ -1073,6 +1083,7 @@ async function warmupAssistantRuntime() { } finally { if (generation === runtimeWarmupGeneration) { runtimeWarming.value = false + syncRuntimeReadyFromSession() snapshotCurrentModeConversation() } } @@ -1341,9 +1352,15 @@ watch( <template v-if="activeStep === 'graph-management'"> Conversation-first graph management with shared session and mode-specific workspace panels. </template> - <template v-else> + <template v-else-if="activeStep === 'maintain'"> + Schedule and run incremental maintenance extraction jobs when tracked sources have new commits. + </template> + <template v-else-if="activeStep === 'mutation-logs'"> Knowledge-graph scoped mutation run visibility and run metrics. </template> + <template v-else> + Knowledge-graph scoped workspace overview. + </template> </p> </div> <Button variant="outline" size="sm" @click="returnToWorkspaceOverview()"> @@ -1608,6 +1625,10 @@ watch( <GraphExtractionArchivedHistory :kg-id="kgId" /> </section> + <section v-else-if="activeStep === 'maintain'" class="space-y-4"> + <GraphMaintenanceWorkspace :kg-id="kgId" /> + </section> + <section v-else-if="activeStep === 'graph-management'" class="space-y-4"> <div v-if="graphManagementSectionState.phase === 'error'" diff --git a/src/dev-ui/app/tests/kg-maintenance-schedule.test.ts b/src/dev-ui/app/tests/kg-maintenance-schedule.test.ts new file mode 100644 index 000000000..8c252683e --- /dev/null +++ b/src/dev-ui/app/tests/kg-maintenance-schedule.test.ts @@ -0,0 +1,14 @@ +import { describe, expect, it } from 'vitest' +import { cronToDailyTime, dailyTimeToCron } from '../utils/kgMaintenanceSchedule' + +describe('kgMaintenanceSchedule', () => { + it('converts daily time to cron and back', () => { + expect(dailyTimeToCron('21:30')).toBe('30 21 * * *') + expect(cronToDailyTime('30 21 * * *')).toBe('21:30') + }) + + it('rejects invalid daily time', () => { + expect(dailyTimeToCron('25:00')).toBeNull() + expect(dailyTimeToCron('bad')).toBeNull() + }) +}) diff --git a/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts b/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts index 7bf307675..5c8cfaf61 100644 --- a/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts +++ b/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts @@ -38,8 +38,8 @@ describe('kgManageWorkspaceHub', () => { expect(tiles.map((tile) => tile.key)).toEqual([ 'data-sources', 'graph-management', - 'mutation-logs', 'maintain', + 'mutation-logs', ]) }) diff --git a/src/dev-ui/app/tests/kgManageState.test.ts b/src/dev-ui/app/tests/kgManageState.test.ts index 64fb1fd74..70dd0bc30 100644 --- a/src/dev-ui/app/tests/kgManageState.test.ts +++ b/src/dev-ui/app/tests/kgManageState.test.ts @@ -6,6 +6,7 @@ import { handleActivatableKeydown, handleChatInputKeydown, isForbiddenHttpError, + isNotFoundHttpError, resolveForbiddenReason, resolveSectionState, shouldApplyMutationResult, @@ -135,6 +136,12 @@ describe('KG-MANAGE-020 - forbidden and disabled action restrictions', () => { expect(isForbiddenHttpError({ statusCode: 404 })).toBe(false) }) + it('detects not-found HTTP errors without treating them as failures', () => { + expect(isNotFoundHttpError({ statusCode: 404 })).toBe(true) + expect(isNotFoundHttpError(new Error('Not Found'))).toBe(true) + expect(isNotFoundHttpError({ statusCode: 403 })).toBe(false) + }) + it('builds explicit forbidden section messaging', () => { const state = resolveSectionState({ section: 'graph-management', diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index fdc43edb1..fb6c73cb7 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -10,6 +10,7 @@ import { buildSuggestedNextStep, buildWorkspaceStepCards, isMaintenanceReady, + parseManageStepQuery, resolveStepDestination, stepStatusTintClass, } from '../utils/kgManageWorkspace' @@ -47,6 +48,14 @@ const graphExtractionArchivedHistoryVue = readFileSync( resolve(__dirname, '../components/graph-management/GraphExtractionArchivedHistory.vue'), 'utf-8', ) +const graphExtractionJobsWorkspaceVue = readFileSync( + resolve(__dirname, '../components/graph-management/GraphExtractionJobsWorkspace.vue'), + 'utf-8', +) +const graphMaintenanceWorkspaceVue = readFileSync( + resolve(__dirname, '../components/graph-management/GraphMaintenanceWorkspace.vue'), + 'utf-8', +) const graphManagementMutationAuthoringVue = readFileSync( resolve(__dirname, '../components/graph-management/GraphManagementMutationAuthoringPanel.vue'), 'utf-8', @@ -110,6 +119,30 @@ describe('Knowledge Graph Manage Workspace - graph writes history', () => { }) }) +describe('KG-MANAGE-012a - completed job archival maintenance', () => { + it('exposes archive completed action alongside reset maintenance controls', () => { + expect(graphExtractionJobsWorkspaceVue).toContain('/archive-completed') + expect(graphExtractionJobsWorkspaceVue).toContain('Archive Completed') + expect(graphExtractionJobsWorkspaceVue).toContain('archiveCompletedJobs') + }) +}) + +describe('KG-MANAGE-012b - maintain workspace commit and job controls', () => { + it('labels new files section and exposes commit refresh actions', () => { + expect(graphMaintenanceWorkspaceVue).toContain('New Files to Process') + expect(graphMaintenanceWorkspaceVue).toContain('Check for new commits') + expect(graphMaintenanceWorkspaceVue).toContain('Get latest commit locally') + expect(graphMaintenanceWorkspaceVue).toContain('last_extraction_baseline_commit') + expect(graphMaintenanceWorkspaceVue).toContain('diff-summary') + }) + + it('defaults maintain files per job to two and shows preview', () => { + expect(graphMaintenanceWorkspaceVue).toContain('filesPerJob = ref(2)') + expect(graphMaintenanceWorkspaceVue).toContain('Files per job') + expect(graphMaintenanceWorkspaceVue).toContain('estimatedJobsFromFiles') + }) +}) + describe('KG-MANAGE-012 - archived graph writes grouping', () => { it('groups archived jobs by run and job set', () => { expect(graphExtractionArchivedHistoryVue).toContain('payload.runs') @@ -215,8 +248,8 @@ describe('KG-MANAGE-002 - workspace hub tile set', () => { expect(cards.map((card) => card.title)).toEqual([ 'Data Sources', 'Graph Management', - 'Graph Writes History', 'Maintain', + 'Graph Writes History', ]) }) }) @@ -327,11 +360,16 @@ describe('KG-MANAGE-005 - graph-scoped data sources step', () => { }) describe('KG-MANAGE-015 - graph-scoped maintain step and round trip', () => { - it('keeps maintain route utility for workspace cards but not graph-management redirects', () => { - expect(manageWorkspaceVue).not.toContain('navigateTo(buildMaintainStepUrl(kgId))') + it('routes maintain workspace cards to manage step', () => { expect(buildMaintainStepUrl('kg-abc')).toBe( - '/knowledge-graphs/kg-abc/data-sources?focus=maintain', + '/knowledge-graphs/kg-abc/manage?step=maintain', ) + expect(parseManageStepQuery('maintain')).toBe('maintain') + }) + + it('renders maintain workspace on manage page', () => { + expect(manageWorkspaceVue).toContain("activeStep === 'maintain'") + expect(manageWorkspaceVue).toContain('GraphMaintenanceWorkspace') }) it('returns to manage overview from in-page steps', () => { diff --git a/src/dev-ui/app/utils/kgDataSourcesCommits.ts b/src/dev-ui/app/utils/kgDataSourcesCommits.ts index a1856bd60..7d92a3090 100644 --- a/src/dev-ui/app/utils/kgDataSourcesCommits.ts +++ b/src/dev-ui/app/utils/kgDataSourcesCommits.ts @@ -99,6 +99,17 @@ export function formatPreparedFileCount(count: number | null | undefined): strin return count.toLocaleString() } +/** Files materialized in the local clone (shown when a clone commit exists). */ +export function formatFilesOnDisk(ds: { + clone_head_commit?: string | null + last_prepared_commit?: string | null + ingested_head_commit?: string | null + last_prepared_file_count?: number | null +}): string { + if (!resolveIngestedHeadCommit(ds)) return '—' + return (ds.last_prepared_file_count ?? 0).toLocaleString() +} + export function resolveRepoUrl(connectionConfig: Record<string, string> | undefined): string { if (!connectionConfig) return '—' if (connectionConfig.repo_url) return connectionConfig.repo_url diff --git a/src/dev-ui/app/utils/kgMaintenanceSchedule.ts b/src/dev-ui/app/utils/kgMaintenanceSchedule.ts new file mode 100644 index 000000000..85613c087 --- /dev/null +++ b/src/dev-ui/app/utils/kgMaintenanceSchedule.ts @@ -0,0 +1,60 @@ +/** Helpers for KG maintenance schedule UI (daily time ↔ cron). */ + +export const MAINTENANCE_TIMEZONE_OPTIONS = [ + { value: 'UTC', label: 'UTC' }, + { value: 'America/New_York', label: 'US Eastern' }, + { value: 'America/Chicago', label: 'US Central' }, + { value: 'America/Denver', label: 'US Mountain' }, + { value: 'America/Los_Angeles', label: 'US Pacific' }, +] as const + +const DAILY_CRON_RE = /^(\d{1,2})\s+(\d{1,2})\s+\*\s+\*\s+\*$/ + +export function dailyTimeToCron(time: string): string | null { + const match = /^(\d{1,2}):(\d{2})$/.exec(time.trim()) + if (!match) return null + const hour = Number(match[1]) + const minute = Number(match[2]) + if (hour < 0 || hour > 23 || minute < 0 || minute > 59) return null + return `${minute} ${hour} * * *` +} + +export function cronToDailyTime(cronExpression: string): string | null { + const match = DAILY_CRON_RE.exec(cronExpression.trim()) + if (!match) return null + const minute = Number(match[1]) + const hour = Number(match[2]) + if (hour < 0 || hour > 23 || minute < 0 || minute > 59) return null + return `${String(hour).padStart(2, '0')}:${String(minute).padStart(2, '0')}` +} + +export function formatMaintenanceRunOutcome(outcome: string): string { + switch (outcome) { + case 'started': + return 'Started' + case 'no-changes': + return 'No changes' + case 'preflight-failed': + return 'Preflight failed' + case 'launch-failed': + return 'Launch failed' + default: + return outcome + } +} + +export function maintenanceRunOutcomeVariant( + outcome: string, +): 'default' | 'secondary' | 'destructive' | 'outline' | 'success' { + switch (outcome) { + case 'started': + return 'success' + case 'no-changes': + return 'secondary' + case 'preflight-failed': + case 'launch-failed': + return 'destructive' + default: + return 'outline' + } +} diff --git a/src/dev-ui/app/utils/kgManageState.ts b/src/dev-ui/app/utils/kgManageState.ts index 98354abc2..e18e650f3 100644 --- a/src/dev-ui/app/utils/kgManageState.ts +++ b/src/dev-ui/app/utils/kgManageState.ts @@ -63,6 +63,19 @@ export function isForbiddenHttpError(err: unknown): boolean { return false } +export function isNotFoundHttpError(err: unknown): boolean { + if (err && typeof err === 'object') { + const fetchErr = err as { statusCode?: number; status?: number } + const status = fetchErr.statusCode ?? fetchErr.status + if (status === 404) return true + } + if (err instanceof Error) { + const message = err.message.toLowerCase() + return message.includes('not found') || message.includes('404') + } + return false +} + export function resolveForbiddenReason( err: unknown, fallback: string, diff --git a/src/dev-ui/app/utils/kgManageWorkspace.ts b/src/dev-ui/app/utils/kgManageWorkspace.ts index b0dd569e8..5ca61299d 100644 --- a/src/dev-ui/app/utils/kgManageWorkspace.ts +++ b/src/dev-ui/app/utils/kgManageWorkspace.ts @@ -23,8 +23,8 @@ export const WORKSPACE_STEP_TITLES: Record<WorkspaceStepId, string> = { export const WORKSPACE_STEP_ORDER: WorkspaceStepId[] = [ 'data-sources', 'graph-management', - 'mutation-logs', 'maintain', + 'mutation-logs', ] export interface WorkspaceReadinessSnapshot { @@ -76,7 +76,7 @@ export function buildDataSourcesStepUrl(kgId: string, dataSourceCount = 0): stri } export function buildMaintainStepUrl(kgId: string): string { - return buildKgDataSourcesUrl(kgId, { focus: 'maintain' }) + return buildManageStepUrl(kgId, 'maintain') } export function buildManageStepUrl(kgId: string, step?: WorkspaceStepId): string { @@ -87,7 +87,11 @@ export function buildManageStepUrl(kgId: string, step?: WorkspaceStepId): string } export function parseManageStepQuery(step: unknown): WorkspaceStepId | null { - if (step === 'graph-management' || step === 'mutation-logs') { + if ( + step === 'graph-management' + || step === 'mutation-logs' + || step === 'maintain' + ) { return step } return null @@ -244,8 +248,8 @@ export function buildWorkspaceStepCards(input: WorkspaceOverviewInputs): Workspa return [ buildDataSourcesCard(input), buildGraphManagementCard(input), - buildMutationLogsCard(input), buildMaintainCard(input), + buildMutationLogsCard(input), ] } diff --git a/src/dev-ui/app/utils/kgManageWorkspaceHub.ts b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts index d305acf94..d7d3c1bed 100644 --- a/src/dev-ui/app/utils/kgManageWorkspaceHub.ts +++ b/src/dev-ui/app/utils/kgManageWorkspaceHub.ts @@ -97,7 +97,7 @@ export function buildWorkspaceHubTiles(input: WorkspaceHubOverview): WorkspaceHu if (done) return 'success' if (!enabled) return 'muted' if (cardStatus === 'needs_attention') return 'warning' - if (highlightKey === (['data-sources', 'graph-management', 'mutation-logs', 'maintain'] as const)[step - 1]) { + if (highlightKey === (['data-sources', 'graph-management', 'maintain', 'mutation-logs'] as const)[step - 1]) { return 'primary' } return 'muted' @@ -145,21 +145,6 @@ export function buildWorkspaceHubTiles(input: WorkspaceHubOverview): WorkspaceHu }, { step: 3, - key: 'mutation-logs', - title: 'Graph Writes History', - subtitle: input.mutationLogRunCount > 0 - ? `${input.mutationLogRunCount} archived write entr${input.mutationLogRunCount === 1 ? 'y' : 'ies'} recorded` - : 'Review GMA sessions and extraction job writes', - to: resolveStepDestination(input.kgId, 'mutation-logs'), - enabled: input.dataSourceCount > 0, - lockedReason: input.dataSourceCount > 0 ? null : 'Connect a data source before reviewing runs.', - highlight: highlightKey === 'mutation-logs', - tone: toneFor(3, input.mutationLogRunCount > 0, input.dataSourceCount > 0, mlCard.status), - linkLabel: linkLabelFor(mlCard.actionLabel, input.mutationLogRunCount > 0), - done: input.mutationLogRunCount > 0, - }, - { - step: 4, key: 'maintain', title: 'Maintain', subtitle: input.maintenanceReadyCount > 0 @@ -169,10 +154,25 @@ export function buildWorkspaceHubTiles(input: WorkspaceHubOverview): WorkspaceHu enabled: designDone, lockedReason: designDone ? null : 'Complete graph management validation before maintenance.', highlight: highlightKey === 'maintain', - tone: toneFor(4, maintainCard.status === 'ready' && input.maintenanceReadyCount === 0, designDone, maintainCard.status), + tone: toneFor(3, maintainCard.status === 'ready' && input.maintenanceReadyCount === 0, designDone, maintainCard.status), linkLabel: linkLabelFor(maintainCard.actionLabel, maintainCard.status === 'ready' && input.maintenanceReadyCount === 0), done: maintainCard.status === 'ready' && input.maintenanceReadyCount === 0 && input.dataSourceCount > 0, }, + { + step: 4, + key: 'mutation-logs', + title: 'Graph Writes History', + subtitle: input.mutationLogRunCount > 0 + ? `${input.mutationLogRunCount} archived write entr${input.mutationLogRunCount === 1 ? 'y' : 'ies'} recorded` + : 'Review GMA sessions and extraction job writes', + to: resolveStepDestination(input.kgId, 'mutation-logs'), + enabled: input.dataSourceCount > 0, + lockedReason: input.dataSourceCount > 0 ? null : 'Connect a data source before reviewing runs.', + highlight: highlightKey === 'mutation-logs', + tone: toneFor(4, input.mutationLogRunCount > 0, input.dataSourceCount > 0, mlCard.status), + linkLabel: linkLabelFor(mlCard.actionLabel, input.mutationLogRunCount > 0), + done: input.mutationLogRunCount > 0, + }, ] } @@ -259,7 +259,7 @@ export function workspaceHubDescription(input: WorkspaceHubOverview): string { if (!designPhaseComplete(input)) { return 'Use Graph Management for the assistant and schema bootstrap. Green tiles use Revisit; the highlighted tile is your suggested next step.' } - return 'Continue with graph writes history or maintenance, or Revisit any completed step below.' + return 'Continue with maintenance or graph writes history, or Revisit any completed step below.' } export function buildManageOverviewUrl(kgId: string): string { From 9ff91a373b8723164f73c211d626d443c2881cfa Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Mon, 15 Jun 2026 23:14:15 -0400 Subject: [PATCH 142/153] fix(extraction): gateway import, job event filters, and archived status sync Repair OpenShell extraction start failures, cap workers at 50 without sandbox UI noise, and keep recent job events accurate with status filters including archived. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../openshell_extraction_job_runner.py | 1 + .../repositories/extraction_job_repository.py | 7 +- .../extraction_jobs_service.py | 2 +- .../management/extraction_jobs_service.py | 5 +- .../extraction_jobs_routes.py | 2 +- .../test_extraction_job_completion_status.py | 34 ++++++++ .../test_openshell_extraction_job_runner.py | 6 ++ .../GraphExtractionJobsWorkspace.vue | 84 +++++++++++++------ .../GraphMaintenanceWorkspace.vue | 11 ++- .../knowledge-graph-manage-workspace.test.ts | 9 ++ 10 files changed, 124 insertions(+), 37 deletions(-) create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_completion_status.py diff --git a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py index ac525359d..36cc5a257 100644 --- a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py @@ -40,6 +40,7 @@ from extraction.infrastructure.openshell.extraction_sandbox_pool import ( resolve_extraction_sandbox_assignment, ) +from extraction.infrastructure.openshell import gateway as openshell_gateway from extraction.infrastructure.openshell import sandbox as openshell_sandbox from extraction.infrastructure.openshell.audit import LoggingOpenShellRuntimeProbe from extraction.infrastructure.openshell.inference_env import ( diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index 876b3fcd0..a239f0c50 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -320,14 +320,11 @@ async def list_recent_jobs( self, *, knowledge_graph_id: str, - limit: int = 20, + limit: int = 50, ) -> list[ExtractionJobRecord]: stmt = ( select(ExtractionJobModel) - .where( - ExtractionJobModel.knowledge_graph_id == knowledge_graph_id, - ExtractionJobModel.status != ExtractionJobStatus.ARCHIVED.value, - ) + .where(ExtractionJobModel.knowledge_graph_id == knowledge_graph_id) .order_by( ExtractionJobModel.updated_at.desc(), ExtractionJobModel.order_index.asc(), diff --git a/src/api/infrastructure/extraction_workload/extraction_jobs_service.py b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py index 56262edfd..6f038c334 100644 --- a/src/api/infrastructure/extraction_workload/extraction_jobs_service.py +++ b/src/api/infrastructure/extraction_workload/extraction_jobs_service.py @@ -263,7 +263,7 @@ async def get_database_status( ) recent_jobs = await self._extraction_job_repository.list_recent_jobs( knowledge_graph_id=knowledge_graph_id, - limit=20, + limit=50, ) active_workers = await self._extraction_job_repository.list_active_workers( knowledge_graph_id=knowledge_graph_id diff --git a/src/api/infrastructure/management/extraction_jobs_service.py b/src/api/infrastructure/management/extraction_jobs_service.py index 371c989c3..7757c2c13 100644 --- a/src/api/infrastructure/management/extraction_jobs_service.py +++ b/src/api/infrastructure/management/extraction_jobs_service.py @@ -342,7 +342,7 @@ async def get_database_status( ) recent_jobs = await self._extraction_job_repository.list_recent_jobs( knowledge_graph_id=kg_id, - limit=20, + limit=50, ) active_workers = await self._extraction_job_repository.list_active_workers( knowledge_graph_id=kg_id @@ -450,7 +450,6 @@ async def get_extraction_run_state( "workerCount": 0, "pauseRequested": False, } - runtime_settings = get_extraction_workload_runtime_settings() payload = { "live": live or run.status in {ExtractionRunStatus.RUNNING, ExtractionRunStatus.PAUSING}, "status": run.status.value, @@ -460,8 +459,6 @@ async def get_extraction_run_state( "completedAt": run.completed_at.isoformat() if run.completed_at else None, "orchestratorPid": run.orchestrator_pid, } - if runtime_settings.job_runner == "openshell" and run.worker_count > 0: - payload["sandboxSlotCount"] = run.worker_count return payload async def get_extraction_plan_summary( diff --git a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py index d1dc5d44c..15d89fdb7 100644 --- a/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py +++ b/src/api/management/presentation/knowledge_graphs/extraction_jobs_routes.py @@ -39,7 +39,7 @@ class ExtractionJobsDocumentResponse(BaseModel): class StartExtractionRequest(BaseModel): - workers: int = Field(default=20, ge=1, le=32) + workers: int = Field(default=20, ge=1, le=50) class ActionResponse(BaseModel): diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_completion_status.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_completion_status.py new file mode 100644 index 000000000..40b10d16d --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_completion_status.py @@ -0,0 +1,34 @@ +"""Regression tests for extraction job recent-history and completion status.""" + +from __future__ import annotations + +from pathlib import Path + +from extraction.domain.extraction_job import ExtractionJobStatus + +_REPO_PATH = ( + Path(__file__).resolve().parents[4] + / "extraction" + / "infrastructure" + / "repositories" + / "extraction_job_repository.py" +) + + +def test_list_recent_jobs_includes_archived_rows() -> None: + source = _REPO_PATH.read_text(encoding="utf-8") + assert "status != ExtractionJobStatus.ARCHIVED" not in source + + +def test_mark_job_completed_auto_archives_when_writes_applied() -> None: + source = _REPO_PATH.read_text(encoding="utf-8") + assert "if write_ops > 0" in source + assert "ExtractionJobStatus.ARCHIVED.value" in source + + write_ops = 2 + status = ( + ExtractionJobStatus.ARCHIVED.value + if write_ops > 0 + else ExtractionJobStatus.COMPLETED.value + ) + assert status == ExtractionJobStatus.ARCHIVED.value diff --git a/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_job_runner.py b/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_job_runner.py index 708502703..7c12709c4 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_job_runner.py +++ b/src/api/tests/unit/extraction/infrastructure/test_openshell_extraction_job_runner.py @@ -25,6 +25,12 @@ def test_openshell_extraction_sandbox_image_uses_agentic_ci_claude_sandbox() -> assert settings.openshell_extraction_sandbox_image() != settings.agentic_ci_image +def test_openshell_extraction_job_runner_imports_gateway() -> None: + import extraction.infrastructure.openshell_extraction_job_runner as module + + assert hasattr(module, "openshell_gateway") + + def test_run_agent_uses_harness_claude_binary() -> None: runner = OpenShellExtractionJobRunner() diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue index f8b893dd3..5b8678c49 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue @@ -75,9 +75,19 @@ interface ExtractionRunState { status: string workerCount: number pauseRequested: boolean - sandboxSlotCount?: number } +const MAX_WORKERS = 50 + +type RecentJobStatusFilter = 'all' | 'pending' | 'in_progress' | 'archived' + +const RECENT_JOB_STATUS_FILTERS: Array<{ value: RecentJobStatusFilter; label: string }> = [ + { value: 'all', label: 'All' }, + { value: 'pending', label: 'Pending' }, + { value: 'in_progress', label: 'In progress' }, + { value: 'archived', label: 'Archived' }, +] + const selectedOntologyTab = ref<OntologyTab>('entities') const jobSetsReloadNonce = ref(0) const dbStatus = ref<DbStatus | null>(null) @@ -99,6 +109,7 @@ const optimisticLiveUntilMs = ref<number | null>(null) const nowMs = ref(Date.now()) const lastStatusRefreshMs = ref<number | null>(null) const recentJobEvents = ref<RecentJobEvent[]>([]) +const recentJobStatusFilter = ref<RecentJobStatusFilter>('all') const watchJobId = ref<string | null>(null) const watchDialogOpen = ref(false) const cancellingJobId = ref<string | null>(null) @@ -180,7 +191,10 @@ async function refreshAll(options?: { background?: boolean }) { ]) } -const workerCount = computed(() => Math.max(1, Math.floor(Number(workers.value) || 1))) +const workerCount = computed(() => { + const raw = Math.floor(Number(workers.value) || 1) + return Math.min(MAX_WORKERS, Math.max(1, raw)) +}) const pendingJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.pending || 0)) const inProgressJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.in_progress || 0)) const completedJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.completed || 0)) @@ -200,9 +214,21 @@ const extractionProgressPercent = computed(() => { if (total <= 0) return 0 return Math.round(((completedJobsCount.value + failedJobsCount.value) / total) * 100) }) -const recentJobs = computed(() => - recentJobEvents.value.filter((event) => event.status !== 'archived'), -) +const recentJobs = computed(() => { + if (recentJobStatusFilter.value === 'all') return recentJobEvents.value + return recentJobEvents.value.filter((event) => event.status === recentJobStatusFilter.value) +}) +const recentJobsEmptyMessage = computed(() => { + if (startingExtraction.value || showOptimisticLiveActivity.value) { + return 'Starting extraction workers. Job events will appear as jobs are claimed and completed.' + } + if (recentJobEvents.value.length === 0) return 'No job events yet.' + const filterLabel = RECENT_JOB_STATUS_FILTERS.find( + (option) => option.value === recentJobStatusFilter.value, + )?.label + if (recentJobStatusFilter.value === 'all') return 'No job events yet.' + return `No ${filterLabel?.toLowerCase() ?? recentJobStatusFilter.value} job events in the recent window.` +}) const activeWorkerCount = computed(() => dbStatus.value?.activeWorkers?.length || 0) const idleWorkerCount = computed(() => Math.max(0, workerCount.value - activeWorkerCount.value)) const statusAgeSeconds = computed(() => { @@ -216,15 +242,12 @@ const showOptimisticLiveActivity = computed( function mergeRecentJobEvents(status: DbStatus) { const incoming = status.recentJobs || [] const now = Date.now() - const activeIncoming = incoming.filter((job) => job.status !== 'archived') const activeWorkerJobIds = new Set((status.activeWorkers || []).map((worker) => worker.jobId)) const inProgressCount = Number(status.jobsByStatus?.in_progress || 0) const existingByJobId = new Map( - recentJobEvents.value - .filter((event) => event.status !== 'archived') - .map((event) => [event.jobId, event] as const), + recentJobEvents.value.map((event) => [event.jobId, event] as const), ) - for (const job of activeIncoming) { + for (const job of incoming) { existingByJobId.set(job.jobId, { ...job, eventKey: job.jobId, seenAtMs: now }) } const maxAgeMs = 15 * 60 * 1000 @@ -255,6 +278,7 @@ function recentJobBadgeVariant(status: string): 'default' | 'outline' | 'seconda if (status === 'in_progress') return 'default' if (status === 'failed') return 'destructive' if (status === 'completed') return 'success' + if (status === 'archived') return 'secondary' return 'outline' } @@ -282,6 +306,11 @@ function formatCompactNumber(value: number): string { async function startExtraction() { startingExtraction.value = true optimisticLiveUntilMs.value = Date.now() + 30000 + const requested = Math.floor(Number(workers.value) || 1) + if (requested > MAX_WORKERS) { + workers.value = MAX_WORKERS + toast.info(`Worker concurrency capped at ${MAX_WORKERS}`) + } try { const res = await apiFetch<{ message?: string }>(`${basePath.value}/start`, { method: 'POST', @@ -523,8 +552,8 @@ onUnmounted(() => { Run extraction </CardTitle> <CardDescription> - Launch parallel extraction workers. Each worker owns one OpenShell sandbox, claims jobs - from the queue until the run completes, and keeps per-job stats in Graph Writes History. + Launch parallel extraction workers that claim jobs from the queue until the run completes. + Per-job stats appear in Graph Writes History. </CardDescription> </CardHeader> <CardContent class="space-y-4"> @@ -535,7 +564,6 @@ onUnmounted(() => { v-model.number="workers" type="number" min="1" - max="32" class="h-10 w-24 rounded-lg border bg-background px-3 text-sm" /> </div> @@ -578,9 +606,6 @@ onUnmounted(() => { <Badge variant="outline" class="font-mono text-[11px]"> workers: {{ activeWorkerCount }}/{{ workerCount }} </Badge> - <Badge v-if="extractionRunState?.sandboxSlotCount" variant="outline" class="font-mono text-[11px]"> - sandboxes: {{ extractionRunState.sandboxSlotCount }} (1 per worker) - </Badge> <Badge v-if="idleWorkerCount > 0" variant="outline" class="font-mono text-[11px]"> {{ idleWorkerCount }} idle </Badge> @@ -596,24 +621,35 @@ onUnmounted(() => { /> </div> <div class="space-y-2"> - <div class="flex items-center justify-between gap-2"> - <p class="text-xs font-medium text-foreground/90">Recent job events</p> + <div class="flex flex-wrap items-center justify-between gap-2"> + <div class="flex flex-wrap items-center gap-2"> + <p class="text-xs font-medium text-foreground/90">Recent job events</p> + <div class="flex flex-wrap gap-1"> + <Button + v-for="option in RECENT_JOB_STATUS_FILTERS" + :key="option.value" + variant="ghost" + size="sm" + class="h-7 px-2 text-[11px]" + :class="recentJobStatusFilter === option.value ? 'bg-muted text-foreground' : 'text-muted-foreground'" + @click="recentJobStatusFilter = option.value" + > + {{ option.label }} + </Button> + </div> + </div> <Button variant="ghost" size="sm" class="h-7 px-2 text-[11px]" - :disabled="recentJobs.length === 0" + :disabled="recentJobEvents.length === 0" @click="clearRecentJobEvents" > Clear events </Button> </div> <div v-if="recentJobs.length === 0" class="text-xs text-muted-foreground"> - {{ - startingExtraction || showOptimisticLiveActivity - ? 'Starting extraction workers. Job events will appear as jobs are claimed and completed.' - : 'No job events yet.' - }} + {{ recentJobsEmptyMessage }} </div> <div v-else class="max-h-80 space-y-1 overflow-y-auto pr-1"> <div diff --git a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue index d54779a6a..56158589d 100644 --- a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue @@ -114,6 +114,8 @@ const scheduleTime = ref('02:00') const scheduleTimezone = ref('UTC') const scheduleSaving = ref(false) +const MAX_MAINTENANCE_WORKERS = 50 + const workers = ref(8) const filesPerJob = ref(2) const checkingCommits = ref(false) @@ -376,6 +378,12 @@ async function runMaintenanceNow() { async function startExtractionJobs() { startingExtraction.value = true + const requested = Math.floor(Number(workers.value) || 1) + if (requested > MAX_MAINTENANCE_WORKERS) { + workers.value = MAX_MAINTENANCE_WORKERS + toast.info(`Worker concurrency capped at ${MAX_MAINTENANCE_WORKERS}`) + } + const workerTotal = Math.min(MAX_MAINTENANCE_WORKERS, Math.max(1, requested)) try { try { await applyFilesPerJobToJobSets() @@ -388,7 +396,7 @@ async function startExtractionJobs() { `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs/start`, { method: 'POST', - body: { workers: Math.max(1, Math.floor(workers.value)) }, + body: { workers: workerTotal }, }, ) toast.success('Extraction started', { description: res.message }) @@ -602,7 +610,6 @@ onUnmounted(() => { v-model.number="workers" type="number" min="1" - max="32" /> </div> </div> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index fb6c73cb7..1fbe6c2c2 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -704,4 +704,13 @@ describe('KG-MANAGE-021 - unified in-place graph operations', () => { expect(graphManagementMutationAuthoringVue).toContain('getMergedEditorContent') expect(manageWorkspaceVue).not.toContain('navigateTo(`/graph/mutations?kg_id=${kgId}&view=editor`)') }) + + it('filters recent job events by pending, in progress, and archived', () => { + expect(graphExtractionJobsWorkspaceVue).toContain('recentJobStatusFilter') + expect(graphExtractionJobsWorkspaceVue).toContain("'pending'") + expect(graphExtractionJobsWorkspaceVue).toContain("'in_progress'") + expect(graphExtractionJobsWorkspaceVue).toContain("'archived'") + expect(graphExtractionJobsWorkspaceVue).toContain("status === 'archived'") + expect(graphExtractionJobsWorkspaceVue).not.toContain("filter((event) => event.status !== 'archived')") + }) }) From 8d957165c23afacd41f7642ec5e9783eb3384d79 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Tue, 16 Jun 2026 11:17:12 -0400 Subject: [PATCH 143/153] fix(extraction): release DB sessions during sandbox runs and scale workers Split job prepare from long OpenShell execution to avoid pool exhaustion with high worker counts, scale up live runs on Start, and add Failed filter to recent job events. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../domain/prepared_extraction_job_run.py | 14 ++++ .../agentic_ci_extraction_job_runner.py | 26 +++++++- .../infrastructure/extraction_job_executor.py | 22 ++++++- .../extraction_run_orchestrator.py | 28 ++++++-- .../openshell_extraction_job_runner.py | 26 +++++++- .../stub_extraction_job_runner.py | 20 ++++++ .../extraction/ports/extraction_job_runner.py | 19 ++++++ .../test_extraction_job_executor.py | 64 +++++++++++++++++++ .../test_extraction_run_orchestrator.py | 37 +++++++++++ .../GraphExtractionJobsWorkspace.vue | 3 +- .../knowledge-graph-manage-workspace.test.ts | 3 +- 11 files changed, 248 insertions(+), 14 deletions(-) create mode 100644 src/api/extraction/domain/prepared_extraction_job_run.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_job_executor.py diff --git a/src/api/extraction/domain/prepared_extraction_job_run.py b/src/api/extraction/domain/prepared_extraction_job_run.py new file mode 100644 index 000000000..d756725ec --- /dev/null +++ b/src/api/extraction/domain/prepared_extraction_job_run.py @@ -0,0 +1,14 @@ +"""Prepared extraction job workspace before long-running agent execution.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class PreparedExtractionJobRun: + """Host workdir and agent prompt materialized without holding a DB session.""" + + workdir: Path + prompt: str diff --git a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py index 96e255104..ebcc8db22 100644 --- a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py @@ -16,6 +16,7 @@ from agentic_ci import otel from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.domain.prepared_extraction_job_run import PreparedExtractionJobRun from extraction.infrastructure.extraction_job_activity import ( activity_log_path, append_activity_line, @@ -83,7 +84,12 @@ def __init__( self._workdir_materializer = workdir_materializer self._harness = create_harness(self._settings.agentic_ci_harness) - async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: + async def prepare_for_run( + self, + job: ExtractionJobRecord, + *, + tenant_id: str, + ) -> PreparedExtractionJobRun: if self._workdir_materializer is None: raise RuntimeError("AgenticCiExtractionJobRunner requires a workdir materializer") credentials = get_workload_credential_issuer().issue( @@ -98,7 +104,23 @@ async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, An ) _patch_job_context_api_base(workdir, self._settings.agentic_ci_api_base_url) prompt = build_extraction_job_prompt(job=job) - return await self._run_in_container(job=job, workdir=workdir, prompt=prompt) + return PreparedExtractionJobRun(workdir=workdir, prompt=prompt) + + async def run_prepared( + self, + job: ExtractionJobRecord, + *, + prepared: PreparedExtractionJobRun, + ) -> dict[str, Any]: + return await self._run_in_container( + job=job, + workdir=prepared.workdir, + prompt=prepared.prompt, + ) + + async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: + prepared = await self.prepare_for_run(job, tenant_id=tenant_id) + return await self.run_prepared(job, prepared=prepared) async def _run_in_container( self, diff --git a/src/api/extraction/infrastructure/extraction_job_executor.py b/src/api/extraction/infrastructure/extraction_job_executor.py index 58bc46842..257ca27aa 100644 --- a/src/api/extraction/infrastructure/extraction_job_executor.py +++ b/src/api/extraction/infrastructure/extraction_job_executor.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio from typing import Any from extraction.domain.extraction_job import ExtractionJobRecord @@ -9,12 +10,15 @@ from extraction.infrastructure.stub_extraction_job_runner import StubExtractionJobRunner from extraction.infrastructure.workload_runtime_settings import get_extraction_workload_runtime_settings from extraction.ports.extraction_job_runner import IExtractionJobRunner +from infrastructure.settings import get_database_settings from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker class ExtractionJobExecutor: """Runs one extraction job using the configured runner backend.""" + _prepare_semaphore: asyncio.Semaphore | None = None + def __init__( self, *, @@ -24,12 +28,24 @@ def __init__( self._session_factory = session_factory self._runner = runner + @classmethod + def _prepare_gate(cls) -> asyncio.Semaphore: + if cls._prepare_semaphore is None: + reserve = 2 + limit = max(1, get_database_settings().pool_max_connections - reserve) + cls._prepare_semaphore = asyncio.Semaphore(limit) + return cls._prepare_semaphore + async def execute(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: if self._runner is not None: return await self._runner.run(job, tenant_id=tenant_id) settings = get_extraction_workload_runtime_settings() if settings.job_runner == "stub" or self._session_factory is None: return await StubExtractionJobRunner().run(job, tenant_id=tenant_id) - async with self._session_factory() as session: - runner = create_extraction_job_runner(session=session, settings=settings) - return await runner.run(job, tenant_id=tenant_id) + + async with self._prepare_gate(): + async with self._session_factory() as session: + runner = create_extraction_job_runner(session=session, settings=settings) + prepared = await runner.prepare_for_run(job, tenant_id=tenant_id) + + return await runner.run_prepared(job, prepared=prepared) diff --git a/src/api/extraction/infrastructure/extraction_run_orchestrator.py b/src/api/extraction/infrastructure/extraction_run_orchestrator.py index 922ac38f3..092e20352 100644 --- a/src/api/extraction/infrastructure/extraction_run_orchestrator.py +++ b/src/api/extraction/infrastructure/extraction_run_orchestrator.py @@ -51,14 +51,26 @@ async def start( worker_count: int, ) -> None: async with self._lock: + requested = max(1, worker_count) existing = self._active.get(knowledge_graph_id) if existing and not existing.stop_event.is_set(): + if existing.worker_count < requested: + self._spawn_workers(existing, target_count=requested) + async with self._session_factory() as session: + repo = ExtractionJobRepository(session) + await repo.upsert_run( + knowledge_graph_id=knowledge_graph_id, + status=ExtractionRunStatus.RUNNING, + worker_count=requested, + pause_requested=False, + ) + await session.commit() return state = _OrchestratorState( knowledge_graph_id=knowledge_graph_id, tenant_id=tenant_id, - worker_count=max(1, worker_count), + worker_count=requested, ) self._active[knowledge_graph_id] = state @@ -75,10 +87,16 @@ async def start( ) await session.commit() - for index in range(state.worker_count): - state.tasks.append( - asyncio.create_task(self._worker_loop(state, worker_index=index + 1)) - ) + self._spawn_workers(state, target_count=state.worker_count) + + def _spawn_workers(self, state: _OrchestratorState, *, target_count: int) -> None: + """Start worker tasks until the pool reaches target_count.""" + while len(state.tasks) < target_count: + worker_index = len(state.tasks) + 1 + state.tasks.append( + asyncio.create_task(self._worker_loop(state, worker_index=worker_index)) + ) + state.worker_count = target_count async def request_pause(self, *, knowledge_graph_id: str) -> None: async with self._session_factory() as session: diff --git a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py index 36cc5a257..9ba241191 100644 --- a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py @@ -16,6 +16,7 @@ from agentic_ci.harness import create_harness from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.domain.prepared_extraction_job_run import PreparedExtractionJobRun from extraction.infrastructure.extraction_job_activity import ( activity_log_path, append_activity_line, @@ -90,7 +91,12 @@ def __init__( self._harness = create_harness(self._settings.agentic_ci_harness) self._probe = LoggingOpenShellRuntimeProbe() - async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: + async def prepare_for_run( + self, + job: ExtractionJobRecord, + *, + tenant_id: str, + ) -> PreparedExtractionJobRun: if self._workdir_materializer is None: raise RuntimeError("OpenShellExtractionJobRunner requires a workdir materializer") credentials = get_workload_credential_issuer().issue( @@ -105,7 +111,23 @@ async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, An ) _patch_job_context_api_base(workdir, self._settings.sandbox_reachable_api_base_url()) prompt = build_extraction_job_prompt(job=job) - return await self._run_in_sandbox(job=job, workdir=workdir, prompt=prompt) + return PreparedExtractionJobRun(workdir=workdir, prompt=prompt) + + async def run_prepared( + self, + job: ExtractionJobRecord, + *, + prepared: PreparedExtractionJobRun, + ) -> dict[str, Any]: + return await self._run_in_sandbox( + job=job, + workdir=prepared.workdir, + prompt=prepared.prompt, + ) + + async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: + prepared = await self.prepare_for_run(job, tenant_id=tenant_id) + return await self.run_prepared(job, prepared=prepared) async def _run_in_sandbox( self, diff --git a/src/api/extraction/infrastructure/stub_extraction_job_runner.py b/src/api/extraction/infrastructure/stub_extraction_job_runner.py index 8aff1afbb..a9ed8b943 100644 --- a/src/api/extraction/infrastructure/stub_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/stub_extraction_job_runner.py @@ -3,15 +3,35 @@ from __future__ import annotations import asyncio +from pathlib import Path from typing import Any from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.domain.prepared_extraction_job_run import PreparedExtractionJobRun from extraction.ports.extraction_job_runner import IExtractionJobRunner class StubExtractionJobRunner(IExtractionJobRunner): """Simulates successful job completion without launching containers.""" + async def prepare_for_run( + self, + job: ExtractionJobRecord, + *, + tenant_id: str, + ) -> PreparedExtractionJobRun: + _ = tenant_id + return PreparedExtractionJobRun(workdir=Path("/tmp/stub"), prompt=job.description) + + async def run_prepared( + self, + job: ExtractionJobRecord, + *, + prepared: PreparedExtractionJobRun, + ) -> dict[str, Any]: + _ = prepared + return await self.run(job, tenant_id="stub") + async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: _ = tenant_id await asyncio.sleep(0.05) diff --git a/src/api/extraction/ports/extraction_job_runner.py b/src/api/extraction/ports/extraction_job_runner.py index f7cf6d8ce..24a76e60b 100644 --- a/src/api/extraction/ports/extraction_job_runner.py +++ b/src/api/extraction/ports/extraction_job_runner.py @@ -5,11 +5,30 @@ from typing import Any, Protocol from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.domain.prepared_extraction_job_run import PreparedExtractionJobRun class IExtractionJobRunner(Protocol): """Runs one extraction job and returns completion metrics.""" + async def prepare_for_run( + self, + job: ExtractionJobRecord, + *, + tenant_id: str, + ) -> PreparedExtractionJobRun: + """Materialize workspace artifacts (short-lived DB usage).""" + ... + + async def run_prepared( + self, + job: ExtractionJobRecord, + *, + prepared: PreparedExtractionJobRun, + ) -> dict[str, Any]: + """Execute a prepared job without an open ORM session.""" + ... + async def run(self, job: ExtractionJobRecord, *, tenant_id: str) -> dict[str, Any]: """Execute the job and return token/cost/write metrics.""" ... diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_job_executor.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_executor.py new file mode 100644 index 000000000..473688bd1 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_job_executor.py @@ -0,0 +1,64 @@ +"""Tests for extraction job executor session lifecycle.""" + +from __future__ import annotations + +from contextlib import asynccontextmanager +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from extraction.domain.extraction_job import ExtractionJobRecord, ExtractionJobStatus +from extraction.domain.prepared_extraction_job_run import PreparedExtractionJobRun +from extraction.infrastructure.extraction_job_executor import ExtractionJobExecutor + + +def _job() -> ExtractionJobRecord: + return ExtractionJobRecord( + id="row-1", + knowledge_graph_id="kg-1", + job_id="job-001", + job_set_name="set-a", + strategy="by_instances", + status=ExtractionJobStatus.IN_PROGRESS, + order_index=0, + description="extract", + ) + + +@pytest.mark.asyncio +async def test_execute_releases_db_session_before_run_prepared() -> None: + session = AsyncMock() + session_cm = AsyncMock() + session_cm.__aenter__ = AsyncMock(return_value=session) + session_cm.__aexit__ = AsyncMock(return_value=None) + + @asynccontextmanager + async def session_factory(): + yield session + + runner = MagicMock() + runner.prepare_for_run = AsyncMock( + return_value=PreparedExtractionJobRun(workdir=Path("/tmp/job"), prompt="go") + ) + runner.run_prepared = AsyncMock(return_value={"write_ops": 1}) + runner.run = AsyncMock() + + with ( + patch( + "extraction.infrastructure.extraction_job_executor.get_extraction_workload_runtime_settings", + ) as settings_mock, + patch( + "extraction.infrastructure.extraction_job_executor.create_extraction_job_runner", + return_value=runner, + ), + ): + settings = settings_mock.return_value + settings.job_runner = "openshell" + executor = ExtractionJobExecutor(session_factory=session_factory) + result = await executor.execute(_job(), tenant_id="tenant-1") + + assert result == {"write_ops": 1} + runner.prepare_for_run.assert_awaited_once() + runner.run_prepared.assert_awaited_once() + runner.run.assert_not_called() diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py index ef3aad249..17b4f90a8 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py @@ -54,3 +54,40 @@ async def session_context(): knowledge_graph_id="kg-001", ) session.commit.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_start_scales_up_worker_tasks_when_run_is_already_live() -> None: + session = AsyncMock() + session.commit = AsyncMock() + + @asynccontextmanager + async def session_context(): + yield session + + session_factory = lambda: session_context() + + orchestrator = ExtractionRunOrchestrator(session_factory=session_factory) + state = _OrchestratorState( + knowledge_graph_id="kg-001", + tenant_id="tenant-001", + worker_count=10, + ) + state.tasks = [AsyncMock() for _ in range(10)] + orchestrator._active["kg-001"] = state + + with ( + patch( + "extraction.infrastructure.extraction_run_orchestrator.ExtractionJobRepository", + ) as repo_cls, + patch.object(orchestrator, "_worker_loop", new_callable=AsyncMock), + ): + repo_cls.return_value = AsyncMock() + await orchestrator.start( + tenant_id="tenant-001", + knowledge_graph_id="kg-001", + worker_count=20, + ) + + assert state.worker_count == 20 + assert len(state.tasks) == 20 diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue index 5b8678c49..b4a0d58ca 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue @@ -79,13 +79,14 @@ interface ExtractionRunState { const MAX_WORKERS = 50 -type RecentJobStatusFilter = 'all' | 'pending' | 'in_progress' | 'archived' +type RecentJobStatusFilter = 'all' | 'pending' | 'in_progress' | 'archived' | 'failed' const RECENT_JOB_STATUS_FILTERS: Array<{ value: RecentJobStatusFilter; label: string }> = [ { value: 'all', label: 'All' }, { value: 'pending', label: 'Pending' }, { value: 'in_progress', label: 'In progress' }, { value: 'archived', label: 'Archived' }, + { value: 'failed', label: 'Failed' }, ] const selectedOntologyTab = ref<OntologyTab>('entities') diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 1fbe6c2c2..16eecb905 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -705,11 +705,12 @@ describe('KG-MANAGE-021 - unified in-place graph operations', () => { expect(manageWorkspaceVue).not.toContain('navigateTo(`/graph/mutations?kg_id=${kgId}&view=editor`)') }) - it('filters recent job events by pending, in progress, and archived', () => { + it('filters recent job events by pending, in progress, archived, and failed', () => { expect(graphExtractionJobsWorkspaceVue).toContain('recentJobStatusFilter') expect(graphExtractionJobsWorkspaceVue).toContain("'pending'") expect(graphExtractionJobsWorkspaceVue).toContain("'in_progress'") expect(graphExtractionJobsWorkspaceVue).toContain("'archived'") + expect(graphExtractionJobsWorkspaceVue).toContain("'failed'") expect(graphExtractionJobsWorkspaceVue).toContain("status === 'archived'") expect(graphExtractionJobsWorkspaceVue).not.toContain("filter((event) => event.status !== 'archived')") }) From 0c619c57224dc52315c1233172c42a366e9114b9 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Tue, 16 Jun 2026 14:49:09 -0400 Subject: [PATCH 144/153] fix(extraction): reconcile stuck runs and seed KG-wide prepare baselines Finish idle extraction runs and advance last_extraction_baseline_commit when the job queue drains but the run row stayed active. On prepare, seed unset baselines for all prepared sources on the knowledge graph, not only the source that just finished ingest. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../extraction_run_orchestrator.py | 38 ++--- .../extraction_run_reconciliation.py | 68 ++++++++ .../management/extraction_jobs_service.py | 18 +- .../extraction_baseline_updater.py | 27 +++ .../infrastructure/sync_lifecycle_handler.py | 8 + .../presentation/data_sources/routes.py | 16 +- .../test_extraction_run_orchestrator.py | 29 +--- .../test_extraction_run_reconciliation.py | 154 ++++++++++++++++++ .../test_extraction_baseline_updater.py | 37 +++++ .../test_sync_lifecycle_handler.py | 59 +++++++ .../presentation/test_data_sources_routes.py | 21 +++ 11 files changed, 432 insertions(+), 43 deletions(-) create mode 100644 src/api/extraction/infrastructure/extraction_run_reconciliation.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation.py diff --git a/src/api/extraction/infrastructure/extraction_run_orchestrator.py b/src/api/extraction/infrastructure/extraction_run_orchestrator.py index 092e20352..e9ebf989d 100644 --- a/src/api/extraction/infrastructure/extraction_run_orchestrator.py +++ b/src/api/extraction/infrastructure/extraction_run_orchestrator.py @@ -12,10 +12,10 @@ from extraction.infrastructure.extraction_job_executor import ExtractionJobExecutor from extraction.domain.extraction_job import ExtractionRunStatus -from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository -from management.infrastructure.extraction_baseline_updater import ( - advance_extraction_baselines_for_knowledge_graph, +from extraction.infrastructure.extraction_run_reconciliation import ( + reconcile_quiescent_extraction_run, ) +from extraction.infrastructure.repositories.extraction_job_repository import ExtractionJobRepository logger = logging.getLogger(__name__) @@ -216,26 +216,24 @@ async def _worker_loop(self, state: _OrchestratorState, *, worker_index: int) -> async def _maybe_finish_run(self, state: _OrchestratorState) -> None: async with self._session_factory() as session: - repo = ExtractionJobRepository(session) - counts = await repo.count_by_status(knowledge_graph_id=state.knowledge_graph_id) - pending = counts.get("pending", 0) - in_progress = counts.get("in_progress", 0) - if pending == 0 and in_progress == 0: - await repo.upsert_run( - knowledge_graph_id=state.knowledge_graph_id, - status=ExtractionRunStatus.IDLE, - worker_count=state.worker_count, - pause_requested=False, - completed_at=datetime.now(UTC), - ) - await advance_extraction_baselines_for_knowledge_graph( - session=session, - knowledge_graph_id=state.knowledge_graph_id, - ) - await session.commit() + _, run_was_active = await reconcile_quiescent_extraction_run( + session=session, + knowledge_graph_id=state.knowledge_graph_id, + ) + if run_was_active: state.stop_event.set() self._active.pop(state.knowledge_graph_id, None) + def stop_active_run(self, *, knowledge_graph_id: str) -> None: + """Stop in-memory workers for a knowledge graph run.""" + state = self._active.get(knowledge_graph_id) + if state is None: + return + state.stop_event.set() + for task in state.tasks: + task.cancel() + self._active.pop(knowledge_graph_id, None) + def is_live(self, *, knowledge_graph_id: str) -> bool: state = self._active.get(knowledge_graph_id) return state is not None and not state.stop_event.is_set() diff --git a/src/api/extraction/infrastructure/extraction_run_reconciliation.py b/src/api/extraction/infrastructure/extraction_run_reconciliation.py new file mode 100644 index 000000000..63afd2697 --- /dev/null +++ b/src/api/extraction/infrastructure/extraction_run_reconciliation.py @@ -0,0 +1,68 @@ +"""Reconcile stuck extraction runs and advance extraction baselines.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import TYPE_CHECKING + +from extraction.domain.extraction_job import ExtractionRunStatus +from extraction.infrastructure.repositories.extraction_job_repository import ( + ExtractionJobRepository, +) +from management.infrastructure.extraction_baseline_updater import ( + advance_extraction_baselines_for_knowledge_graph, +) + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession + + from extraction.infrastructure.extraction_run_orchestrator import ( + ExtractionRunOrchestrator, + ) + + +async def reconcile_quiescent_extraction_run( + *, + session: AsyncSession, + knowledge_graph_id: str, + orchestrator: ExtractionRunOrchestrator | None = None, +) -> tuple[bool, bool]: + """Finish active runs and advance baselines when the job queue has drained. + + Returns: + A tuple of (reconciled, run_was_active). ``reconciled`` is True when the + database was updated. ``run_was_active`` is True when an active run row + was transitioned to idle (callers should stop in-memory orchestrator + workers when this is True). + """ + repo = ExtractionJobRepository(session) + counts = await repo.count_by_status(knowledge_graph_id=knowledge_graph_id) + if counts.get("pending", 0) > 0 or counts.get("in_progress", 0) > 0: + return False, False + + run = await repo.get_run(knowledge_graph_id=knowledge_graph_id) + run_was_active = run is not None and run.status != ExtractionRunStatus.IDLE + + if run_was_active: + await repo.upsert_run( + knowledge_graph_id=knowledge_graph_id, + status=ExtractionRunStatus.IDLE, + worker_count=run.worker_count, + pause_requested=False, + completed_at=datetime.now(UTC), + ) + + baselines_updated = await advance_extraction_baselines_for_knowledge_graph( + session=session, + knowledge_graph_id=knowledge_graph_id, + ) + + if not run_was_active and baselines_updated <= 0: + return False, False + + await session.commit() + + if orchestrator is not None and run_was_active: + orchestrator.stop_active_run(knowledge_graph_id=knowledge_graph_id) + + return True, run_was_active diff --git a/src/api/infrastructure/management/extraction_jobs_service.py b/src/api/infrastructure/management/extraction_jobs_service.py index 7757c2c13..f8174e251 100644 --- a/src/api/infrastructure/management/extraction_jobs_service.py +++ b/src/api/infrastructure/management/extraction_jobs_service.py @@ -19,6 +19,9 @@ stop_extraction_job_runtimes, ) from extraction.infrastructure.extraction_run_orchestrator import get_extraction_run_orchestrator +from extraction.infrastructure.extraction_run_reconciliation import ( + reconcile_quiescent_extraction_run, +) from extraction.domain.extraction_job import ExtractionJobStatus, ExtractionRunStatus from extraction.infrastructure.extraction_job_activity import ( job_workdir, @@ -336,6 +339,13 @@ async def get_database_status( if kg is None: return None + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await reconcile_quiescent_extraction_run( + session=self._session, + knowledge_graph_id=kg_id, + orchestrator=orchestrator, + ) + counts = await self._extraction_job_repository.count_by_status(knowledge_graph_id=kg_id) jobs_by_set = await self._extraction_job_repository.count_by_job_set( knowledge_graph_id=kg_id @@ -440,8 +450,14 @@ async def get_extraction_run_state( if kg is None: return None - run = await self._extraction_job_repository.get_run(knowledge_graph_id=kg_id) orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await reconcile_quiescent_extraction_run( + session=self._session, + knowledge_graph_id=kg_id, + orchestrator=orchestrator, + ) + + run = await self._extraction_job_repository.get_run(knowledge_graph_id=kg_id) live = orchestrator.is_live(knowledge_graph_id=kg_id) if run is None: return { diff --git a/src/api/management/infrastructure/extraction_baseline_updater.py b/src/api/management/infrastructure/extraction_baseline_updater.py index 18d7ab83d..e5215d020 100644 --- a/src/api/management/infrastructure/extraction_baseline_updater.py +++ b/src/api/management/infrastructure/extraction_baseline_updater.py @@ -34,3 +34,30 @@ async def advance_extraction_baselines_for_knowledge_graph( await data_source_repository.save(data_source) updated += 1 return updated + + +async def seed_unset_extraction_baselines_for_knowledge_graph( + *, + session: AsyncSession, + knowledge_graph_id: str, + data_source_repository: IDataSourceRepository | None = None, +) -> int: + """Seed NULL extraction baselines from each source's ingested head on a KG.""" + if data_source_repository is None: + from management.infrastructure.repositories.data_source_repository import ( + DataSourceRepository, + ) + + data_source_repository = DataSourceRepository(session) + + data_sources = await data_source_repository.find_by_knowledge_graph(knowledge_graph_id) + updated = 0 + for data_source in data_sources: + if data_source.last_extraction_baseline_commit is not None: + continue + data_source.advance_extraction_baseline_to_ingested_head() + if data_source.last_extraction_baseline_commit is None: + continue + await data_source_repository.save(data_source) + updated += 1 + return updated diff --git a/src/api/management/infrastructure/sync_lifecycle_handler.py b/src/api/management/infrastructure/sync_lifecycle_handler.py index 05950919d..ade32a20f 100644 --- a/src/api/management/infrastructure/sync_lifecycle_handler.py +++ b/src/api/management/infrastructure/sync_lifecycle_handler.py @@ -24,6 +24,9 @@ from management.domain.entities import MutationLogRunMetadata from management.domain.value_objects import DataSourceId +from management.infrastructure.extraction_baseline_updater import ( + seed_unset_extraction_baselines_for_knowledge_graph, +) if TYPE_CHECKING: from sqlalchemy.ext.asyncio import AsyncSession @@ -263,3 +266,8 @@ async def _update_data_source_ingestion_prepared( ) ds.maybe_seed_extraction_baseline_from_prepare(prepared_commit=commit) await self._ds_repo.save(ds) + await seed_unset_extraction_baselines_for_knowledge_graph( + session=self._session, + knowledge_graph_id=ds.knowledge_graph_id, + data_source_repository=self._ds_repo, + ) diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index c8057ae0c..ca05d74f9 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -5,12 +5,17 @@ from pathlib import Path from typing import Annotated -from fastapi import APIRouter, Depends, HTTPException, Query, status +from fastapi import APIRouter, Depends, HTTPException, Query, Request, status from sqlalchemy.ext.asyncio import AsyncSession from iam.application.value_objects import CurrentUser from iam.dependencies.user import get_current_user from infrastructure.database.dependencies import get_write_session +from infrastructure.management.extraction_jobs_dependencies import get_write_sessionmaker +from extraction.infrastructure.extraction_run_orchestrator import get_extraction_run_orchestrator +from extraction.infrastructure.extraction_run_reconciliation import ( + reconcile_quiescent_extraction_run, +) from management.application.services.data_source_service import DataSourceService from management.dependencies.data_source import ( get_data_source_service, @@ -268,6 +273,7 @@ async def list_all_data_sources( status_code=status.HTTP_200_OK, ) async def list_data_sources( + request: Request, kg_id: str, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[DataSourceService, Depends(get_data_source_service)], @@ -291,6 +297,14 @@ async def list_data_sources( HTTPException: 500 for unexpected errors """ try: + orchestrator = get_extraction_run_orchestrator( + session_factory=get_write_sessionmaker(request), + ) + await reconcile_quiescent_extraction_run( + session=session, + knowledge_graph_id=kg_id, + orchestrator=orchestrator, + ) data_sources = await service.list_for_knowledge_graph( user_id=current_user.user_id.value, kg_id=kg_id, diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py index 17b4f90a8..8e8f65f5b 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_orchestrator.py @@ -7,7 +7,6 @@ import pytest -from extraction.domain.extraction_job import ExtractionRunStatus from extraction.infrastructure.extraction_run_orchestrator import ( ExtractionRunOrchestrator, _OrchestratorState, @@ -32,28 +31,16 @@ async def session_context(): worker_count=2, ) - repo = AsyncMock() - repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} - - with ( - patch( - "extraction.infrastructure.extraction_run_orchestrator.ExtractionJobRepository", - return_value=repo, - ), - patch( - "extraction.infrastructure.extraction_run_orchestrator.advance_extraction_baselines_for_knowledge_graph", - new_callable=AsyncMock, - ) as advance_baselines, - ): + with patch( + "extraction.infrastructure.extraction_run_orchestrator.reconcile_quiescent_extraction_run", + new_callable=AsyncMock, + return_value=(True, True), + ) as reconcile: await orchestrator._maybe_finish_run(state) - repo.upsert_run.assert_awaited_once() - assert repo.upsert_run.await_args.kwargs["status"] == ExtractionRunStatus.IDLE - advance_baselines.assert_awaited_once_with( - session=session, - knowledge_graph_id="kg-001", - ) - session.commit.assert_awaited_once() + reconcile.assert_awaited_once() + assert state.stop_event.is_set() + assert "kg-001" not in orchestrator._active @pytest.mark.asyncio diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation.py new file mode 100644 index 000000000..b22b83fc9 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation.py @@ -0,0 +1,154 @@ +"""Tests for quiescent extraction run reconciliation.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from extraction.domain.extraction_job import ExtractionRunRecord, ExtractionRunStatus +from extraction.infrastructure.extraction_run_reconciliation import ( + reconcile_quiescent_extraction_run, +) + + +@pytest.mark.asyncio +async def test_reconcile_skips_when_jobs_remain() -> None: + session = AsyncMock() + repo = AsyncMock() + repo.count_by_status.return_value = {"pending": 1, "in_progress": 0} + + with patch( + "extraction.infrastructure.extraction_run_reconciliation.ExtractionJobRepository", + return_value=repo, + ): + reconciled, run_was_active = await reconcile_quiescent_extraction_run( + session=session, + knowledge_graph_id="kg-001", + ) + + assert reconciled is False + assert run_was_active is False + repo.upsert_run.assert_not_awaited() + session.commit.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_reconcile_finishes_active_run_and_advances_baselines() -> None: + session = AsyncMock() + repo = AsyncMock() + repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} + repo.get_run.return_value = ExtractionRunRecord( + id="run-001", + knowledge_graph_id="kg-001", + status=ExtractionRunStatus.RUNNING, + worker_count=10, + pause_requested=False, + started_at=None, + completed_at=None, + orchestrator_pid=None, + ) + orchestrator = MagicMock() + + with ( + patch( + "extraction.infrastructure.extraction_run_reconciliation.ExtractionJobRepository", + return_value=repo, + ), + patch( + "extraction.infrastructure.extraction_run_reconciliation.advance_extraction_baselines_for_knowledge_graph", + new_callable=AsyncMock, + return_value=2, + ) as advance_baselines, + ): + reconciled, run_was_active = await reconcile_quiescent_extraction_run( + session=session, + knowledge_graph_id="kg-001", + orchestrator=orchestrator, + ) + + assert reconciled is True + assert run_was_active is True + repo.upsert_run.assert_awaited_once() + assert repo.upsert_run.await_args.kwargs["status"] == ExtractionRunStatus.IDLE + advance_baselines.assert_awaited_once_with( + session=session, + knowledge_graph_id="kg-001", + ) + session.commit.assert_awaited_once() + orchestrator.stop_active_run.assert_called_once_with(knowledge_graph_id="kg-001") + + +@pytest.mark.asyncio +async def test_reconcile_advances_baselines_when_run_already_idle() -> None: + session = AsyncMock() + repo = AsyncMock() + repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} + repo.get_run.return_value = ExtractionRunRecord( + id="run-001", + knowledge_graph_id="kg-001", + status=ExtractionRunStatus.IDLE, + worker_count=0, + pause_requested=False, + started_at=None, + completed_at=None, + orchestrator_pid=None, + ) + + with ( + patch( + "extraction.infrastructure.extraction_run_reconciliation.ExtractionJobRepository", + return_value=repo, + ), + patch( + "extraction.infrastructure.extraction_run_reconciliation.advance_extraction_baselines_for_knowledge_graph", + new_callable=AsyncMock, + return_value=1, + ), + ): + reconciled, run_was_active = await reconcile_quiescent_extraction_run( + session=session, + knowledge_graph_id="kg-001", + ) + + assert reconciled is True + assert run_was_active is False + repo.upsert_run.assert_not_awaited() + session.commit.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_reconcile_noop_when_queue_and_baselines_are_current() -> None: + session = AsyncMock() + repo = AsyncMock() + repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} + repo.get_run.return_value = ExtractionRunRecord( + id="run-001", + knowledge_graph_id="kg-001", + status=ExtractionRunStatus.IDLE, + worker_count=0, + pause_requested=False, + started_at=None, + completed_at=None, + orchestrator_pid=None, + ) + + with ( + patch( + "extraction.infrastructure.extraction_run_reconciliation.ExtractionJobRepository", + return_value=repo, + ), + patch( + "extraction.infrastructure.extraction_run_reconciliation.advance_extraction_baselines_for_knowledge_graph", + new_callable=AsyncMock, + return_value=0, + ), + ): + reconciled, run_was_active = await reconcile_quiescent_extraction_run( + session=session, + knowledge_graph_id="kg-001", + ) + + assert reconciled is False + assert run_was_active is False + session.commit.assert_not_awaited() diff --git a/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py b/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py index 283e8727f..9bd7ae2fb 100644 --- a/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py +++ b/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py @@ -11,6 +11,7 @@ from management.domain.value_objects import DataSourceId, Schedule, ScheduleType from management.infrastructure.extraction_baseline_updater import ( advance_extraction_baselines_for_knowledge_graph, + seed_unset_extraction_baselines_for_knowledge_graph, ) from shared_kernel.datasource_types import DataSourceAdapterType @@ -76,3 +77,39 @@ async def test_advance_extraction_baselines_skips_sources_without_ingested_head( assert updated == 0 assert ds.last_extraction_baseline_commit == "keep-me" mock_repo.save.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_seed_unset_extraction_baselines_sets_only_null_baselines() -> None: + ds_prepared = _make_ds( + ds_id="ds-a", + last_extraction_baseline_commit=None, + last_prepared_commit="prepared-a", + ) + ds_unprepared = _make_ds( + ds_id="ds-b", + last_extraction_baseline_commit=None, + ) + ds_already_set = _make_ds( + ds_id="ds-c", + last_extraction_baseline_commit="existing", + clone_head_commit="prepared-c", + ) + mock_repo = AsyncMock() + mock_repo.find_by_knowledge_graph.return_value = [ + ds_prepared, + ds_unprepared, + ds_already_set, + ] + + updated = await seed_unset_extraction_baselines_for_knowledge_graph( + session=AsyncMock(), + knowledge_graph_id="kg-001", + data_source_repository=mock_repo, + ) + + assert updated == 1 + assert ds_prepared.last_extraction_baseline_commit == "prepared-a" + assert ds_unprepared.last_extraction_baseline_commit is None + assert ds_already_set.last_extraction_baseline_commit == "existing" + mock_repo.save.assert_awaited_once_with(ds_prepared) diff --git a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py index d7050c518..3a99c3d20 100644 --- a/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py +++ b/src/api/tests/unit/management/infrastructure/test_sync_lifecycle_handler.py @@ -212,6 +212,65 @@ async def test_ingestion_prepared_does_not_overwrite_existing_baseline( assert ds.last_prepared_commit == "abc123" assert ds.last_extraction_baseline_commit == "existing-baseline" + async def test_ingestion_prepared_seeds_unset_baselines_for_sibling_sources( + self, + handler: SyncLifecycleHandler, + mock_sync_run_repo: AsyncMock, + mock_ds_repo: AsyncMock, + ): + run = _make_sync_run(status="ingesting", ds_id="ds-a") + mock_sync_run_repo.get_by_id.return_value = run + + from management.domain.aggregates import DataSource + from management.domain.value_objects import DataSourceId, Schedule, ScheduleType + from shared_kernel.datasource_types import DataSourceAdapterType + + now = datetime.now(UTC) + prepared_ds = DataSource( + id=DataSourceId(value="ds-a"), + knowledge_graph_id="kg-001", + tenant_id="tenant-001", + name="Prepared", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"owner": "org", "repo": "prepared"}, + credentials_path=None, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + created_at=now, + updated_at=now, + ) + sibling_ds = DataSource( + id=DataSourceId(value="ds-b"), + knowledge_graph_id="kg-001", + tenant_id="tenant-001", + name="Sibling", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"owner": "org", "repo": "sibling"}, + credentials_path=None, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + last_prepared_commit="sibling-prepared", + clone_head_commit="sibling-prepared", + created_at=now, + updated_at=now, + ) + mock_ds_repo.get_by_id.return_value = prepared_ds + mock_ds_repo.find_by_knowledge_graph.return_value = [prepared_ds, sibling_ds] + + await handler.handle( + "IngestionPrepared", + _payload( + sync_run_id=run.id, + job_package_id="pkg-001", + prepared_commit_sha="prepared-a", + prepared_file_count=12, + ), + ) + + assert prepared_ds.last_extraction_baseline_commit == "prepared-a" + assert sibling_ds.last_extraction_baseline_commit == "sibling-prepared" + assert mock_ds_repo.save.await_count == 2 + @pytest.mark.asyncio class TestJobPackageProducedTransition: diff --git a/src/api/tests/unit/management/presentation/test_data_sources_routes.py b/src/api/tests/unit/management/presentation/test_data_sources_routes.py index 0a17d7d71..e94ec7a24 100644 --- a/src/api/tests/unit/management/presentation/test_data_sources_routes.py +++ b/src/api/tests/unit/management/presentation/test_data_sources_routes.py @@ -114,6 +114,27 @@ def mock_write_session() -> AsyncMock: return session +@pytest.fixture(autouse=True) +def _noop_reconcile_quiescent_extraction_run(monkeypatch: pytest.MonkeyPatch) -> None: + """List data sources reconciles extraction runs; unit tests skip that path.""" + + async def _noop(**_kwargs: object) -> tuple[bool, bool]: + return False, False + + monkeypatch.setattr( + "management.presentation.data_sources.routes.reconcile_quiescent_extraction_run", + _noop, + ) + monkeypatch.setattr( + "management.presentation.data_sources.routes.get_extraction_run_orchestrator", + lambda **_kwargs: MagicMock(), + ) + monkeypatch.setattr( + "management.presentation.data_sources.routes.get_write_sessionmaker", + lambda _request: MagicMock(), + ) + + @pytest.fixture def test_client( mock_ds_service: AsyncMock, From 32465752ee41b3e841c2b095870c4e271dce1ae8 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 18 Jun 2026 14:21:34 -0400 Subject: [PATCH 145/153] feat(management): add maintenance pipeline and fix workspace hub status Wire scheduled delta ingest and by-files maintenance jobs through the background scheduler, with API and dev-ui support for commit checks and maintenance runs. Treat data sources as prepared once initial ingestion completes so new commits only surface on the Maintain step. Co-authored-by: Cursor <cursoragent@cursor.com> --- Makefile | 11 +- specs/extraction/maintenance-jobs.spec.md | 99 ++++ specs/index.spec.md | 1 + .../agentic_ci_extraction_job_runner.py | 4 +- .../infrastructure/maintenance_job_prompt.py | 28 + .../openshell_extraction_job_runner.py | 4 +- .../repositories/extraction_job_repository.py | 48 ++ .../workload_runtime_settings.py | 7 + .../management/maintenance_changed_files.py | 77 +++ .../maintenance_job_materializer.py | 102 ++++ .../maintenance_pipeline_dependencies.py | 99 ++++ .../maintenance_pipeline_service.py | 537 ++++++++++++++++++ src/api/main.py | 12 + .../services/knowledge_graph_service.py | 120 +--- .../dependencies/knowledge_graph.py | 10 +- src/api/management/domain/value_objects.py | 40 ++ .../extraction_baseline_updater.py | 22 +- .../git_diff_summary_service.py | 14 +- .../knowledge_graph_repository.py | 6 + .../management/ports/maintenance_pipeline.py | 21 + src/api/management/ports/repositories.py | 4 + .../presentation/data_sources/routes.py | 7 +- .../presentation/knowledge_graphs/models.py | 46 ++ .../presentation/knowledge_graphs/routes.py | 7 + src/api/tests/fakes/management.py | 3 + ...xtraction_run_reconciliation_repository.py | 40 ++ .../test_workload_runtime_settings.py | 27 +- .../test_maintenance_job_materializer.py | 48 ++ .../test_knowledge_graph_service.py | 60 +- .../test_maintenance_pipeline_service.py | 320 +++++++++++ .../test_extraction_baseline_updater.py | 15 + .../test_knowledge_graphs_routes.py | 12 +- .../GraphMaintenanceWorkspace.vue | 104 +--- .../pages/knowledge-graphs/[kgId]/manage.vue | 4 +- .../app/tests/kg-data-sources-phase1.test.ts | 8 + .../app/tests/kg-manage-workspace-hub.test.ts | 20 + src/dev-ui/app/utils/kgDataSourcesCommits.ts | 5 + src/dev-ui/app/utils/kgMaintenanceSchedule.ts | 9 + 38 files changed, 1772 insertions(+), 229 deletions(-) create mode 100644 specs/extraction/maintenance-jobs.spec.md create mode 100644 src/api/extraction/infrastructure/maintenance_job_prompt.py create mode 100644 src/api/infrastructure/management/maintenance_changed_files.py create mode 100644 src/api/infrastructure/management/maintenance_job_materializer.py create mode 100644 src/api/infrastructure/management/maintenance_pipeline_dependencies.py create mode 100644 src/api/infrastructure/management/maintenance_pipeline_service.py create mode 100644 src/api/management/ports/maintenance_pipeline.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation_repository.py create mode 100644 src/api/tests/unit/infrastructure/management/test_maintenance_job_materializer.py create mode 100644 src/api/tests/unit/management/application/test_maintenance_pipeline_service.py diff --git a/Makefile b/Makefile index 7e8871b1c..2e046b181 100755 --- a/Makefile +++ b/Makefile @@ -24,9 +24,14 @@ certs: dev: certs @echo "🧰 [Development] Starting application containers..." @./scripts/cleanup-openshell-sandboxes.sh - docker compose -f compose.yaml -f compose.dev.yaml --profile build-only build agent-runtime - docker compose -f compose.yaml build - HOST_UID=$$(id -u) HOST_GID=$$(id -g) docker compose -f compose.yaml -f compose.dev.yaml --profile ui up -d + @HOST_UID=$$(id -u) HOST_GID=$$(id -g) \ + docker compose -f compose.yaml -f compose.dev.yaml --profile build-only build agent-runtime + @HOST_UID=$$(id -u) HOST_GID=$$(id -g) \ + docker compose -f compose.yaml build + @HOST_UID=$$(id -u) HOST_GID=$$(id -g) \ + docker compose -f compose.yaml -f compose.dev.yaml --profile ui up -d --force-recreate api + @HOST_UID=$$(id -u) HOST_GID=$$(id -g) \ + docker compose -f compose.yaml -f compose.dev.yaml --profile ui up -d @echo "Done." @echo "----------------------------" @echo "API Root: http://localhost:8000" diff --git a/specs/extraction/maintenance-jobs.spec.md b/specs/extraction/maintenance-jobs.spec.md new file mode 100644 index 000000000..198c8b1ae --- /dev/null +++ b/specs/extraction/maintenance-jobs.spec.md @@ -0,0 +1,99 @@ +# Maintenance Jobs + +## Purpose +Knowledge-graph maintenance keeps the graph aligned with upstream Git sources after the +last extraction baseline. Maintenance uses **by-files** extraction jobs only: changed files +since `last_extraction_baseline_commit` are discovered across all connected data sources, +batched by `files_per_job`, and executed through the OpenShell extraction job runtime with +diff-aware agent prompts. + +Maintenance ingest uses `ingest_only` syncs (incremental JobPackages). Maintenance does +**not** run the legacy per-sync AI extraction pipeline. + +## Requirements + +### Requirement: Changed-File Discovery +The system SHALL discover maintenance work by comparing each Git-backed data source's +`last_extraction_baseline_commit` to its `tracked_branch_head_commit` using the Git compare +API, aggregating changed file paths across all sources on the knowledge graph. + +#### Scenario: No commit delta +- GIVEN every connected source has `tracked_branch_head_commit` equal to `last_extraction_baseline_commit` +- WHEN maintenance is triggered +- THEN the run outcome is `no-changes` +- AND no ingest syncs or extraction jobs are created + +#### Scenario: Cross-source file totals +- GIVEN sources A, B, and C have 1, 10, and 4 changed files respectively since baseline +- WHEN maintenance jobs are materialized with `files_per_job = 2` +- THEN 8 pending maintenance jobs are created spanning all 15 changed files + +### Requirement: Maintenance Ingest +The system SHALL prepare incremental ingestion context for every source with a commit delta +using `pipeline_mode = ingest_only` and baseline `last_extraction_baseline_commit`. + +#### Scenario: Ingest without legacy extraction +- GIVEN maintenance is triggered for sources with commit deltas +- WHEN ingest syncs complete +- THEN each sync run reaches `ingested` without `JobPackageProduced` extraction +- AND latest prepared JobPackages contain only incremental file changes + +### Requirement: Maintenance Job Materialization +The system SHALL materialize pending extraction jobs under job set name `maintenance` using +strategy `by_files`, ignoring configured `by_instances` job sets. + +#### Scenario: Job batching +- GIVEN changed files are resolved to prepared JobPackage paths +- WHEN jobs are materialized with `files_per_job = N` +- THEN each job receives at most `N` target files +- AND jobs are named under the `maintenance` job set + +### Requirement: Maintenance Agent Prompt +The system SHALL provide maintenance extraction jobs with prompts that list assigned changed +files, include available unified diff hunks, and instruct the agent to update the full +knowledge graph (all entity types and relationships) to reflect the changes. + +#### Scenario: Diff context in prompt +- GIVEN a maintenance job assigned two modified files with GitHub compare patches +- WHEN the OpenShell runner builds the job prompt +- THEN the prompt names both files and includes their diff content (truncated when necessary) + +### Requirement: Maintenance Extraction Execution +The system SHALL execute maintenance jobs through the same OpenShell extraction job worker +pool used by graph-management extraction runs. + +#### Scenario: Worker start after materialization +- GIVEN maintenance ingest has completed and jobs are materialized +- WHEN the pipeline advances +- THEN extraction workers are started for the knowledge graph +- AND pending maintenance jobs are claimed by workers + +### Requirement: Scheduled Maintenance +The system SHALL execute knowledge-graph maintenance schedules stored on the knowledge graph. + +#### Scenario: Daily schedule fires +- GIVEN a knowledge graph has `maintenance_schedule.enabled = true` and `next_run_at` in the past +- WHEN the maintenance scheduler polls +- THEN a maintenance pipeline is triggered for that knowledge graph +- AND `next_run_at` is advanced to the next cron occurrence + +### Requirement: Pipeline Orchestration +The system SHALL orchestrate maintenance as: detect deltas → ingest_only syncs → wait for +ingest completion → materialize maintenance jobs → start extraction workers. + +#### Scenario: Manual full pipeline +- GIVEN the operator triggers maintenance with `start_extraction = true` +- WHEN ingest syncs finish successfully +- THEN maintenance jobs are materialized and extraction workers start without a separate manual step + +#### Scenario: Ingest failure +- GIVEN any maintenance ingest sync fails +- WHEN the pipeline advances +- THEN the maintenance run outcome is `ingest-failed` +- AND extraction workers are not started + +## Traceability +- `src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue` +- `src/api/management/application/services/maintenance_pipeline_service.py` +- `src/api/infrastructure/management/maintenance_job_materializer.py` +- `src/api/extraction/infrastructure/maintenance_job_prompt.py` diff --git a/specs/index.spec.md b/specs/index.spec.md index 44dcce3c9..2aa88a79e 100644 --- a/specs/index.spec.md +++ b/specs/index.spec.md @@ -68,6 +68,7 @@ AI-assisted schema and extraction workflows that emit MutationLogs for Graph app | [Agent Sessions](extraction/agent-sessions.spec.md) | Session lifecycle, reset behavior, and session metrics | | [Chat Turns](extraction/chat-turns.spec.md) | Graph-management chat streaming, wait states, and turn persistence | | [Sticky Session Runtime](extraction/sticky-session-runtime.spec.md) | Isolated sticky containers, JobPackage context, Claude Agent SDK runtime | +| [Maintenance Jobs](extraction/maintenance-jobs.spec.md) | Scheduled delta ingest, by-files maintenance jobs, OpenShell execution | ### [Shared Kernel](shared-kernel/) — Cross-Cutting Contracts Capabilities shared across bounded contexts. diff --git a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py index ebcc8db22..5755e34fe 100644 --- a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py @@ -27,9 +27,9 @@ from extraction.infrastructure.extraction_job_metrics import merge_extraction_job_metrics from extraction.infrastructure.extraction_job_prompt import ( EXTRACTION_JOB_INVOKE_PROMPT, - build_extraction_job_prompt, write_extraction_prompt_file, ) +from extraction.infrastructure.maintenance_job_prompt import build_job_run_prompt from extraction.infrastructure.extraction_job_workdir_materializer import ( ExtractionJobWorkdirMaterializer, ) @@ -103,7 +103,7 @@ async def prepare_for_run( credentials=credentials, ) _patch_job_context_api_base(workdir, self._settings.agentic_ci_api_base_url) - prompt = build_extraction_job_prompt(job=job) + prompt = build_job_run_prompt(job=job) return PreparedExtractionJobRun(workdir=workdir, prompt=prompt) async def run_prepared( diff --git a/src/api/extraction/infrastructure/maintenance_job_prompt.py b/src/api/extraction/infrastructure/maintenance_job_prompt.py new file mode 100644 index 000000000..4e32c3eb1 --- /dev/null +++ b/src/api/extraction/infrastructure/maintenance_job_prompt.py @@ -0,0 +1,28 @@ +"""Prompt builders for maintenance extraction jobs.""" + +from __future__ import annotations + +from extraction.domain.extraction_job import ExtractionJobRecord +from extraction.infrastructure.extraction_job_prompt import build_extraction_job_prompt +from infrastructure.management.maintenance_job_materializer import MAINTENANCE_JOB_SET_NAME + + +def build_job_run_prompt(*, job: ExtractionJobRecord) -> str: + """Return the agent prompt for one extraction or maintenance job.""" + if job.job_set_name == MAINTENANCE_JOB_SET_NAME: + return build_maintenance_job_prompt(job=job) + return build_extraction_job_prompt(job=job) + + +def build_maintenance_job_prompt(*, job: ExtractionJobRecord) -> str: + """Return a maintenance-specific prompt with diff-aware file instructions.""" + base = build_extraction_job_prompt(job=job) + return ( + f"{base}\n\n" + "## Maintenance objective\n" + "These repository files changed since the last extraction baseline. Use the diff " + "sections above (when present) and the materialized files under repository-files/ " + "to update existing graph instances and relationships. Do not limit updates to " + "only the files' local entities — reconcile downstream references across the " + "entire knowledge graph schema." + ) diff --git a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py index 9ba241191..c8a104e3f 100644 --- a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py @@ -30,9 +30,9 @@ ) from extraction.infrastructure.extraction_job_prompt import ( build_extraction_job_invoke_prompt, - build_extraction_job_prompt, write_extraction_prompt_file, ) +from extraction.infrastructure.maintenance_job_prompt import build_job_run_prompt from extraction.infrastructure.extraction_job_verdict import require_successful_apply from extraction.infrastructure.extraction_job_workdir_layout import mutation_result_path from extraction.infrastructure.extraction_job_workdir_materializer import ( @@ -110,7 +110,7 @@ async def prepare_for_run( credentials=credentials, ) _patch_job_context_api_base(workdir, self._settings.sandbox_reachable_api_base_url()) - prompt = build_extraction_job_prompt(job=job) + prompt = build_job_run_prompt(job=job) return PreparedExtractionJobRun(workdir=workdir, prompt=prompt) async def run_prepared( diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index a239f0c50..b00fa7723 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -178,6 +178,54 @@ async def sync_pending_jobs( ) return generated, tuple(warnings) + async def sync_maintenance_pending_jobs( + self, + *, + knowledge_graph_id: str, + jobs: list[ExtractionJobRecord], + job_set_name: str, + ) -> int: + """Replace pending jobs for one maintenance job set.""" + from infrastructure.management.maintenance_job_materializer import ( + MAINTENANCE_JOB_SET_NAME, + ) + + if job_set_name != MAINTENANCE_JOB_SET_NAME: + raise ValueError(f"Unsupported maintenance job set: {job_set_name}") + in_progress = await self.count_in_progress_for_job_set( + knowledge_graph_id=knowledge_graph_id, + job_set_name=job_set_name, + ) + if in_progress > 0: + raise RuntimeError( + f"Cannot refresh maintenance jobs while {in_progress} job(s) are running" + ) + await self._delete_pending_for_job_set( + knowledge_graph_id=knowledge_graph_id, + job_set_name=job_set_name, + ) + for job in jobs: + if job.job_set_name != job_set_name: + continue + self._session.add( + ExtractionJobModel( + id=job.id, + knowledge_graph_id=job.knowledge_graph_id, + job_id=job.job_id, + job_set_name=job.job_set_name, + strategy=job.strategy, + status=job.status.value, + order_index=job.order_index, + description=job.description, + target_instances=[], + target_files=[ + target_file.to_dict() for target_file in job.target_files + ], + ) + ) + await self._session.flush() + return len(jobs) + async def _delete_pending_for_job_set( self, *, diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 37b0721b9..4c5af1fb7 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -209,6 +209,13 @@ def _apply_vertex_env_aliases(self) -> "ExtractionWorkloadRuntimeSettings": break return self + @field_validator("container_run_uid", "container_run_gid", mode="before") + @classmethod + def _empty_container_run_id_to_none(cls, value: object) -> object: + if value == "": + return None + return value + @field_validator("sticky_command", "worker_command", "container_tmpfs_mounts", mode="before") @classmethod def _parse_command(cls, value: object) -> tuple[str, ...]: diff --git a/src/api/infrastructure/management/maintenance_changed_files.py b/src/api/infrastructure/management/maintenance_changed_files.py new file mode 100644 index 000000000..6a1611275 --- /dev/null +++ b/src/api/infrastructure/management/maintenance_changed_files.py @@ -0,0 +1,77 @@ +"""Resolve changed maintenance files to prepared JobPackage targets.""" + +from __future__ import annotations + +from pathlib import Path + +from infrastructure.management.extraction_job_materializer import ( + build_repository_file_catalog, + match_file_patterns, +) +from infrastructure.management.maintenance_job_materializer import ChangedMaintenanceFile +from management.domain.aggregates import DataSource +from management.infrastructure.git_diff_summary_service import GitDiffSummaryService + + +async def collect_changed_maintenance_files( + *, + diff_summary_service: GitDiffSummaryService, + data_sources: list[DataSource], + job_package_work_dir: Path, + job_packages: tuple, + max_files_per_source: int = 10_000, +) -> list[ChangedMaintenanceFile]: + """Collect changed files across sources and map them to prepared package paths.""" + catalog = build_repository_file_catalog( + job_package_work_dir=job_package_work_dir, + job_packages=job_packages, + ) + packages_by_source = {source.data_source_id: source for source in job_packages} + + changed: list[ChangedMaintenanceFile] = [] + for data_source in sorted(data_sources, key=lambda ds: ds.name): + summary = await diff_summary_service.build_summary( + data_source=data_source, + max_files=max_files_per_source, + ) + if summary.total_changed_files <= 0: + continue + package = packages_by_source.get(data_source.id.value) + if package is None: + continue + patterns = tuple( + f"**/{entry['path']}" for entry in summary.changed_files if entry.get("path") + ) + matched = match_file_patterns(catalog, patterns) if patterns else [] + matched_by_path = { + target.path: target + for target in matched + if target.repository_folder == package.repository_folder + } + for entry in summary.changed_files: + path = str(entry.get("path", "")).strip() + if not path: + continue + target = matched_by_path.get(path) + if target is None: + continue + changed.append( + ChangedMaintenanceFile( + data_source_id=data_source.id.value, + repository_folder=target.repository_folder, + path=target.path, + status=str(entry.get("status", "modified")), + package_id=target.package_id, + patch=( + str(entry["patch"]) + if entry.get("patch") is not None + else None + ), + ) + ) + return changed + + +__all__ = [ + "collect_changed_maintenance_files", +] diff --git a/src/api/infrastructure/management/maintenance_job_materializer.py b/src/api/infrastructure/management/maintenance_job_materializer.py new file mode 100644 index 000000000..50eee3b97 --- /dev/null +++ b/src/api/infrastructure/management/maintenance_job_materializer.py @@ -0,0 +1,102 @@ +"""Materialize by-files maintenance extraction jobs from changed source files.""" + +from __future__ import annotations + +import hashlib +from dataclasses import dataclass +from typing import Sequence + +from ulid import ULID + +from extraction.domain.extraction_job import ( + ExtractionJobRecord, + ExtractionJobStatus, + ExtractionTargetFile, +) + +MAINTENANCE_JOB_SET_NAME = "maintenance" +_DEFAULT_MAINTENANCE_DESCRIPTION = ( + "Update the knowledge graph to reflect upstream source changes since the last " + "extraction baseline. Consider all entity types, relationship instances, and " + "ontology constraints so the graph stays accurate." +) + + +@dataclass(frozen=True) +class ChangedMaintenanceFile: + """One changed repository file eligible for a maintenance extraction job.""" + + data_source_id: str + repository_folder: str + path: str + status: str + package_id: str + patch: str | None = None + + +def _batch_items(items: Sequence[ChangedMaintenanceFile], batch_size: int) -> list[list[ChangedMaintenanceFile]]: + size = max(1, batch_size) + materialized = list(items) + return [list(materialized[i : i + size]) for i in range(0, len(materialized), size)] + + +def _generate_job_id(batch_idx: int, content_hash: str) -> str: + hash_suffix = hashlib.sha256(content_hash.encode()).hexdigest()[:8] + return f"{MAINTENANCE_JOB_SET_NAME}_batch_{batch_idx:04d}_{hash_suffix}" + + +def _build_maintenance_description(changed_files: Sequence[ChangedMaintenanceFile]) -> str: + lines = [_DEFAULT_MAINTENANCE_DESCRIPTION, "", "## Changed files in this job"] + for changed in changed_files: + lines.append(f"- [{changed.status}] {changed.repository_folder}/{changed.path}") + if changed.patch: + lines.extend(["", f"### Diff: {changed.path}", "```diff", changed.patch, "```"]) + lines.extend( + [ + "", + "## Scope", + "Inspect the assigned files under repository-files/, read the live graph via " + "workload-graph-read helpers, and emit JSONL mutations that keep every affected " + "entity and relationship accurate.", + ] + ) + return "\n".join(lines) + + +def materialize_maintenance_jobs( + *, + knowledge_graph_id: str, + changed_files: Sequence[ChangedMaintenanceFile], + files_per_job: int, +) -> list[ExtractionJobRecord]: + """Build pending maintenance jobs batched across all changed files on a KG.""" + if not changed_files: + return [] + + jobs: list[ExtractionJobRecord] = [] + for batch_idx, batch in enumerate(_batch_items(changed_files, files_per_job), start=1): + content_hash = "|".join( + f"{item.repository_folder}:{item.path}:{item.status}" for item in batch + ) + target_files = tuple( + ExtractionTargetFile( + path=item.path, + repository_folder=item.repository_folder, + package_id=item.package_id, + ) + for item in batch + ) + jobs.append( + ExtractionJobRecord( + id=str(ULID()), + knowledge_graph_id=knowledge_graph_id, + job_id=_generate_job_id(batch_idx, content_hash), + job_set_name=MAINTENANCE_JOB_SET_NAME, + strategy="by_files", + status=ExtractionJobStatus.PENDING, + order_index=batch_idx - 1, + description=_build_maintenance_description(batch), + target_files=target_files, + ) + ) + return jobs diff --git a/src/api/infrastructure/management/maintenance_pipeline_dependencies.py b/src/api/infrastructure/management/maintenance_pipeline_dependencies.py new file mode 100644 index 000000000..d30bab955 --- /dev/null +++ b/src/api/infrastructure/management/maintenance_pipeline_dependencies.py @@ -0,0 +1,99 @@ +"""FastAPI dependencies for maintenance pipeline orchestration.""" + +from __future__ import annotations + +from typing import Annotated, Any, Callable + +from fastapi import Depends, Request +from sqlalchemy.ext.asyncio import AsyncSession + +from extraction.infrastructure.repositories.extraction_job_repository import ( + ExtractionJobRepository, +) +from iam.application.value_objects import CurrentUser +from iam.dependencies.user import get_current_user +from infrastructure.authorization_dependencies import get_spicedb_client +from infrastructure.database.dependencies import get_write_session +from infrastructure.management.maintenance_pipeline_service import ( + MaintenancePipelineService, +) +from infrastructure.outbox.repository import OutboxRepository +from infrastructure.settings import get_management_settings, get_spicedb_settings +from management.infrastructure.git_diff_summary_service import GitDiffSummaryService +from management.infrastructure.repositories import ( + DataSourceRepository, + DataSourceSyncRunRepository, + FernetSecretStore, + KnowledgeGraphRepository, +) +from shared_kernel.authorization.protocols import AuthorizationProvider +from shared_kernel.authorization.spicedb.client import SpiceDBClient + + +def _diff_summary_service_factory( + secret_store: FernetSecretStore, +) -> Callable[[str], GitDiffSummaryService]: + def factory(tenant_id: str) -> GitDiffSummaryService: + return GitDiffSummaryService( + credential_reader=secret_store, + tenant_id=tenant_id, + ) + + return factory + + +def build_maintenance_pipeline_for_background( + *, + session_factory: Any, + session: AsyncSession, +) -> MaintenancePipelineService: + """Construct a maintenance pipeline for scheduler background tasks.""" + settings = get_management_settings() + outbox = OutboxRepository(session=session) + secret_store = FernetSecretStore( + session=session, + encryption_keys=settings.encryption_key.get_secret_value().split(","), + ) + spicedb_settings = get_spicedb_settings() + authz = SpiceDBClient( + endpoint=spicedb_settings.endpoint, + preshared_key=spicedb_settings.preshared_key.get_secret_value(), + use_tls=spicedb_settings.use_tls, + cert_path=spicedb_settings.cert_path, + ) + return MaintenancePipelineService( + session=session, + session_factory=session_factory, + knowledge_graph_repository=KnowledgeGraphRepository(session=session, outbox=outbox), + data_source_repository=DataSourceRepository(session=session, outbox=outbox), + sync_run_repository=DataSourceSyncRunRepository(session=session), + extraction_job_repository=ExtractionJobRepository(session=session), + authorization=authz, + tenant_id="", + diff_summary_service_factory=_diff_summary_service_factory(secret_store), + ) + + +def get_maintenance_pipeline_service( + request: Request, + session: Annotated[AsyncSession, Depends(get_write_session)], + authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], + current_user: Annotated[CurrentUser, Depends(get_current_user)], +) -> MaintenancePipelineService: + settings = get_management_settings() + outbox = OutboxRepository(session=session) + secret_store = FernetSecretStore( + session=session, + encryption_keys=settings.encryption_key.get_secret_value().split(","), + ) + return MaintenancePipelineService( + session=session, + session_factory=request.app.state.write_sessionmaker, + knowledge_graph_repository=KnowledgeGraphRepository(session=session, outbox=outbox), + data_source_repository=DataSourceRepository(session=session, outbox=outbox), + sync_run_repository=DataSourceSyncRunRepository(session=session), + extraction_job_repository=ExtractionJobRepository(session=session), + authorization=authz, + tenant_id=current_user.tenant_id.value, + diff_summary_service_factory=_diff_summary_service_factory(secret_store), + ) diff --git a/src/api/infrastructure/management/maintenance_pipeline_service.py b/src/api/infrastructure/management/maintenance_pipeline_service.py new file mode 100644 index 000000000..fae09df3c --- /dev/null +++ b/src/api/infrastructure/management/maintenance_pipeline_service.py @@ -0,0 +1,537 @@ +"""Orchestrate knowledge-graph maintenance ingest and extraction jobs.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from pathlib import Path +from typing import TYPE_CHECKING, Callable + +from croniter import croniter +from ulid import ULID + +from extraction.infrastructure.extraction_run_orchestrator import get_extraction_run_orchestrator +from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader +from extraction.infrastructure.repositories.extraction_job_repository import ( + ExtractionJobRepository, +) +from extraction.infrastructure.workload_runtime_settings import ( + get_extraction_workload_runtime_settings, +) +from infrastructure.management.maintenance_changed_files import collect_changed_maintenance_files +from infrastructure.management.maintenance_job_materializer import ( + MAINTENANCE_JOB_SET_NAME, + materialize_maintenance_jobs, +) +from management.domain.aggregates import DataSource, KnowledgeGraph +from management.domain.entities.data_source_sync_run import DataSourceSyncRun +from management.domain.value_objects import ( + KnowledgeGraphId, + KnowledgeGraphMaintenanceRunOutcome, + KnowledgeGraphMaintenanceRunRecord, + KnowledgeGraphMaintenanceSchedule, +) +from management.infrastructure.git_diff_summary_service import GitDiffSummaryService +from management.ports.exceptions import UnauthorizedError +from shared_kernel.authorization.protocols import AuthorizationProvider +from shared_kernel.authorization.types import ( + Permission, + ResourceType, + format_resource, + format_subject, +) + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + + from management.ports.repositories import ( + IDataSourceRepository, + IDataSourceSyncRunRepository, + IKnowledgeGraphRepository, + ) + + +class MaintenancePipelineService: + """Coordinate maintenance ingest, job materialization, and extraction workers.""" + + def __init__( + self, + *, + session: AsyncSession, + session_factory: async_sessionmaker[AsyncSession], + knowledge_graph_repository: IKnowledgeGraphRepository, + data_source_repository: IDataSourceRepository, + sync_run_repository: IDataSourceSyncRunRepository, + extraction_job_repository: ExtractionJobRepository, + authorization: AuthorizationProvider, + tenant_id: str, + diff_summary_service_factory: Callable[[str], GitDiffSummaryService], + ) -> None: + self._session = session + self._session_factory = session_factory + self._kg_repo = knowledge_graph_repository + self._ds_repo = data_source_repository + self._sync_run_repo = sync_run_repository + self._job_repo = extraction_job_repository + self._authz = authorization + self._tenant_id = tenant_id + self._diff_summary_service_factory = diff_summary_service_factory + + async def trigger_scheduled( + self, + *, + kg_id: str, + files_per_job: int, + worker_count: int, + ) -> KnowledgeGraphMaintenanceRunRecord: + """Start maintenance for a scheduled run without user authorization.""" + kg = await self._kg_repo.get_by_id(KnowledgeGraphId(value=kg_id)) + if kg is None: + raise ValueError(f"Knowledge graph {kg_id} not found") + return await self._trigger_for_kg( + kg=kg, + requested_by="maintenance-scheduler", + files_per_job=files_per_job, + worker_count=worker_count, + start_extraction=True, + ) + + async def trigger( + self, + *, + user_id: str, + kg_id: str, + files_per_job: int = 2, + worker_count: int = 8, + start_extraction: bool = True, + ) -> KnowledgeGraphMaintenanceRunRecord: + """Start maintenance ingest for changed sources on a knowledge graph.""" + kg = await self._require_manage_kg(user_id=user_id, kg_id=kg_id) + return await self._trigger_for_kg( + kg=kg, + requested_by=user_id, + files_per_job=files_per_job, + worker_count=worker_count, + start_extraction=start_extraction, + ) + + async def _trigger_for_kg( + self, + *, + kg: KnowledgeGraph, + requested_by: str, + files_per_job: int, + worker_count: int, + start_extraction: bool, + ) -> KnowledgeGraphMaintenanceRunRecord: + kg_id = kg.id.value + data_sources = await self._ds_repo.find_by_knowledge_graph(kg_id) + run_id = str(ULID()) + now = datetime.now(UTC) + normalized_files_per_job = max(1, int(files_per_job)) + normalized_workers = max(1, int(worker_count)) + + if not data_sources: + run = self._record_run( + kg=kg, + run=KnowledgeGraphMaintenanceRunRecord( + run_id=run_id, + triggered_at=now, + outcome=KnowledgeGraphMaintenanceRunOutcome.PREFLIGHT_FAILED, + message="No data sources connected to this knowledge graph", + files_per_job=normalized_files_per_job, + worker_count=normalized_workers, + ), + ) + await self._session.commit() + return run + + changed_sources = self._changed_sources(data_sources) + target_ids = tuple(ds.id.value for ds in data_sources) + if not changed_sources: + run = self._record_run( + kg=kg, + run=KnowledgeGraphMaintenanceRunRecord( + run_id=run_id, + triggered_at=now, + outcome=KnowledgeGraphMaintenanceRunOutcome.NO_CHANGES, + message="No source commit delta detected across connected data sources", + target_data_source_ids=target_ids, + files_per_job=normalized_files_per_job, + worker_count=normalized_workers, + ), + ) + await self._session.commit() + return run + + try: + sync_run_ids = await self._launch_ingest_only_syncs( + changed_sources=changed_sources, + requested_by=requested_by, + now=now, + ) + run = self._record_run( + kg=kg, + run=KnowledgeGraphMaintenanceRunRecord( + run_id=run_id, + triggered_at=now, + outcome=KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED, + message=( + "Maintenance ingest started for " + f"{len(changed_sources)} changed source(s)" + ), + target_data_source_ids=tuple(ds.id.value for ds in changed_sources), + sync_run_ids=sync_run_ids, + files_per_job=normalized_files_per_job, + worker_count=normalized_workers, + ), + ) + await self._session.commit() + if start_extraction: + advanced = await self.advance_for_knowledge_graph( + kg_id=kg_id, + tenant_id=kg.tenant_id, + ) + if advanced is not None: + return advanced + return run + except Exception as exc: + run = self._record_run( + kg=kg, + run=KnowledgeGraphMaintenanceRunRecord( + run_id=run_id, + triggered_at=now, + outcome=KnowledgeGraphMaintenanceRunOutcome.LAUNCH_FAILED, + message=f"Failed to launch maintenance ingest: {exc}", + target_data_source_ids=tuple(ds.id.value for ds in changed_sources), + files_per_job=normalized_files_per_job, + worker_count=normalized_workers, + ), + ) + await self._session.commit() + return run + + async def advance_pending_pipelines(self) -> int: + """Advance in-flight maintenance pipelines for all knowledge graphs.""" + advanced = 0 + async with self._session_factory() as session: + from infrastructure.outbox.repository import OutboxRepository + from management.infrastructure.repositories.knowledge_graph_repository import ( + KnowledgeGraphRepository, + ) + + outbox = OutboxRepository(session=session) + kg_repo = KnowledgeGraphRepository(session=session, outbox=outbox) + kgs = await kg_repo.find_all() + for kg in kgs: + if not kg.maintenance_run_history: + continue + latest = kg.maintenance_run_history[-1] + if latest.outcome != KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED: + continue + service = self._with_session(session) + result = await service.advance_for_knowledge_graph( + kg_id=kg.id.value, + tenant_id=kg.tenant_id, + ) + if result is not None: + advanced += 1 + await session.commit() + return advanced + + async def check_scheduled_triggers(self, *, now: datetime | None = None) -> int: + """Trigger maintenance for knowledge graphs whose schedule is due.""" + current = now or datetime.now(UTC) + triggered = 0 + async with self._session_factory() as session: + from infrastructure.outbox.repository import OutboxRepository + from management.infrastructure.repositories.knowledge_graph_repository import ( + KnowledgeGraphRepository, + ) + + outbox = OutboxRepository(session=session) + kg_repo = KnowledgeGraphRepository(session=session, outbox=outbox) + for kg in await kg_repo.find_all(): + schedule = kg.maintenance_schedule + if schedule is None or not schedule.enabled: + continue + if schedule.next_run_at is None or schedule.next_run_at > current: + continue + service = self._with_session(session) + await service.trigger_scheduled( + kg_id=kg.id.value, + files_per_job=schedule.files_per_job, + worker_count=schedule.worker_count, + ) + kg.set_maintenance_schedule( + KnowledgeGraphMaintenanceSchedule( + enabled=schedule.enabled, + cron_expression=schedule.cron_expression, + timezone_name=schedule.timezone_name, + next_run_at=service._compute_next_run_at( + cron_expression=schedule.cron_expression, + timezone_name=schedule.timezone_name, + now=current, + ), + files_per_job=schedule.files_per_job, + worker_count=schedule.worker_count, + ) + ) + await kg_repo.save(kg) + triggered += 1 + await session.commit() + return triggered + + async def advance_for_knowledge_graph( + self, + *, + kg_id: str, + tenant_id: str, + ) -> KnowledgeGraphMaintenanceRunRecord | None: + """Materialize maintenance jobs and start workers when ingest has finished.""" + kg = await self._kg_repo.get_by_id(KnowledgeGraphId(value=kg_id)) + if kg is None or not kg.maintenance_run_history: + return None + latest = kg.maintenance_run_history[-1] + if latest.outcome != KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED: + return None + + statuses = await self._sync_run_statuses(latest.sync_run_ids) + if not statuses: + return None + if any(status in {"pending", "ingesting"} for status in statuses): + return None + if any(status == "failed" for status in statuses): + run = self._replace_latest_run( + kg=kg, + latest=latest, + outcome=KnowledgeGraphMaintenanceRunOutcome.INGEST_FAILED, + message="One or more maintenance ingest syncs failed", + ) + await self._kg_repo.save(kg) + await self._session.commit() + return run + if not all(status == "ingested" for status in statuses): + return None + + data_sources = await self._ds_repo.find_by_knowledge_graph(kg_id) + changed_sources = [ + ds + for ds in data_sources + if ds.id.value in set(latest.target_data_source_ids) + ] + runtime_settings = get_extraction_workload_runtime_settings() + prepared_reader = SqlPreparedJobPackageReader( + session=self._session, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ) + job_packages = await prepared_reader.list_latest_for_knowledge_graph( + knowledge_graph_id=kg_id, + ) + diff_service = self._diff_summary_service_factory(tenant_id) + changed_files = await collect_changed_maintenance_files( + diff_summary_service=diff_service, + data_sources=changed_sources, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + job_packages=job_packages, + ) + files_per_job = latest.files_per_job or 2 + jobs = materialize_maintenance_jobs( + knowledge_graph_id=kg_id, + changed_files=changed_files, + files_per_job=files_per_job, + ) + if not jobs: + run = self._replace_latest_run( + kg=kg, + latest=latest, + outcome=KnowledgeGraphMaintenanceRunOutcome.NO_CHANGES, + message="Ingest completed but no changed files were mapped to JobPackages", + changed_file_count=0, + jobs_materialized=0, + ) + await self._kg_repo.save(kg) + await self._session.commit() + return run + + await self._job_repo.sync_maintenance_pending_jobs( + knowledge_graph_id=kg_id, + jobs=jobs, + job_set_name=MAINTENANCE_JOB_SET_NAME, + ) + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await orchestrator.start( + tenant_id=tenant_id, + knowledge_graph_id=kg_id, + worker_count=latest.worker_count or 8, + ) + run = self._replace_latest_run( + kg=kg, + latest=latest, + outcome=KnowledgeGraphMaintenanceRunOutcome.EXTRACTION_STARTED, + message=( + f"Materialized {len(jobs)} maintenance job(s) and started extraction workers" + ), + changed_file_count=len(changed_files), + jobs_materialized=len(jobs), + ) + await self._kg_repo.save(kg) + await self._session.commit() + return run + + def _with_session(self, session: AsyncSession) -> MaintenancePipelineService: + from extraction.infrastructure.repositories.extraction_job_repository import ( + ExtractionJobRepository, + ) + from infrastructure.outbox.repository import OutboxRepository + from management.infrastructure.repositories.data_source_repository import ( + DataSourceRepository, + ) + from management.infrastructure.repositories.data_source_sync_run_repository import ( + DataSourceSyncRunRepository, + ) + from management.infrastructure.repositories.knowledge_graph_repository import ( + KnowledgeGraphRepository, + ) + + outbox = OutboxRepository(session=session) + return MaintenancePipelineService( + session=session, + session_factory=self._session_factory, + knowledge_graph_repository=KnowledgeGraphRepository(session=session, outbox=outbox), + data_source_repository=DataSourceRepository(session=session, outbox=outbox), + sync_run_repository=DataSourceSyncRunRepository(session=session), + extraction_job_repository=ExtractionJobRepository(session=session), + authorization=self._authz, + tenant_id=self._tenant_id, + diff_summary_service_factory=self._diff_summary_service_factory, + ) + + async def _require_manage_kg(self, *, user_id: str, kg_id: str) -> KnowledgeGraph: + resource = format_resource(ResourceType.KNOWLEDGE_GRAPH, kg_id) + subject = format_subject(ResourceType.USER, user_id) + granted = await self._authz.check_permission( + resource=resource, + permission=Permission.MANAGE, + subject=subject, + ) + if not granted: + raise UnauthorizedError( + f"User {user_id} lacks manage permission on knowledge graph {kg_id}" + ) + kg = await self._kg_repo.get_by_id(KnowledgeGraphId(value=kg_id)) + if kg is None or kg.tenant_id != self._tenant_id: + from management.ports.exceptions import KnowledgeGraphNotFoundError + + raise KnowledgeGraphNotFoundError(f"Knowledge graph {kg_id} not found") + return kg + + @staticmethod + def _changed_sources(data_sources: list[DataSource]) -> list[DataSource]: + return [ + ds + for ds in data_sources + if ds.tracked_branch_head_commit is not None + and ds.last_extraction_baseline_commit is not None + and ds.tracked_branch_head_commit != ds.last_extraction_baseline_commit + ] + + async def _launch_ingest_only_syncs( + self, + *, + changed_sources: list[DataSource], + requested_by: str, + now: datetime, + ) -> tuple[str, ...]: + sync_run_ids: list[str] = [] + for data_source in changed_sources: + sync_run_id = str(ULID()) + sync_run = DataSourceSyncRun( + id=sync_run_id, + data_source_id=data_source.id.value, + status="pending", + started_at=now, + completed_at=None, + error=None, + created_at=now, + ) + await self._sync_run_repo.save(sync_run) + data_source.request_sync( + sync_run_id=sync_run_id, + requested_by=requested_by, + pipeline_mode="ingest_only", + ) + await self._ds_repo.save(data_source) + sync_run_ids.append(sync_run_id) + return tuple(sync_run_ids) + + async def _sync_run_statuses(self, sync_run_ids: tuple[str, ...]) -> list[str]: + statuses: list[str] = [] + for sync_run_id in sync_run_ids: + sync_run = await self._sync_run_repo.get_by_id(sync_run_id) + if sync_run is None: + continue + statuses.append(sync_run.status) + return statuses + + def _record_run( + self, + *, + kg: KnowledgeGraph, + run: KnowledgeGraphMaintenanceRunRecord, + ) -> KnowledgeGraphMaintenanceRunRecord: + kg.append_maintenance_run(run) + return run + + def _replace_latest_run( + self, + *, + kg: KnowledgeGraph, + latest: KnowledgeGraphMaintenanceRunRecord, + outcome: KnowledgeGraphMaintenanceRunOutcome, + message: str, + changed_file_count: int | None = None, + jobs_materialized: int | None = None, + ) -> KnowledgeGraphMaintenanceRunRecord: + updated = KnowledgeGraphMaintenanceRunRecord( + run_id=latest.run_id, + triggered_at=latest.triggered_at, + outcome=outcome, + message=message, + target_data_source_ids=latest.target_data_source_ids, + sync_run_ids=latest.sync_run_ids, + changed_file_count=( + changed_file_count + if changed_file_count is not None + else latest.changed_file_count + ), + jobs_materialized=( + jobs_materialized + if jobs_materialized is not None + else latest.jobs_materialized + ), + files_per_job=latest.files_per_job, + worker_count=latest.worker_count, + ) + history = list(kg.maintenance_run_history) + history[-1] = updated + kg.maintenance_run_history = tuple(history) + return updated + + @staticmethod + def _compute_next_run_at( + *, + cron_expression: str, + timezone_name: str, + now: datetime, + ) -> datetime: + from zoneinfo import ZoneInfo + + if not croniter.is_valid(cron_expression): + raise ValueError(f"Invalid cron expression: {cron_expression!r}") + tz = ZoneInfo(timezone_name) + local_now = now.astimezone(tz) + itr = croniter(cron_expression, local_now) + next_local = itr.get_next(datetime) + if next_local.tzinfo is None: + next_local = next_local.replace(tzinfo=tz) + return next_local.astimezone(UTC) diff --git a/src/api/main.py b/src/api/main.py index 3b08db27f..d577ce185 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -467,6 +467,9 @@ async def _run_scheduler_loop(session_factory: Any, poll_interval: int) -> None: """ from infrastructure.outbox.repository import OutboxRepository from management.application.services.sync_scheduler import SyncSchedulerService + from infrastructure.management.maintenance_pipeline_dependencies import ( + build_maintenance_pipeline_for_background, + ) from management.infrastructure.repositories.data_source_repository import ( DataSourceRepository, ) @@ -486,6 +489,15 @@ async def _run_scheduler_loop(session_factory: Any, poll_interval: int) -> None: ) await scheduler.check_and_trigger_due_syncs() await session.commit() + + async with session_factory() as session: + maintenance = build_maintenance_pipeline_for_background( + session_factory=session_factory, + session=session, + ) + await maintenance.check_scheduled_triggers() + await maintenance.advance_pending_pipelines() + await session.commit() except asyncio.CancelledError: break except Exception: diff --git a/src/api/management/application/services/knowledge_graph_service.py b/src/api/management/application/services/knowledge_graph_service.py index 67f6a0f8b..8d874d0e3 100644 --- a/src/api/management/application/services/knowledge_graph_service.py +++ b/src/api/management/application/services/knowledge_graph_service.py @@ -42,6 +42,7 @@ IKnowledgeGraphRepository, ) from management.ports.canonical_schema import ICanonicalSchemaRepository +from management.ports.maintenance_pipeline import MaintenancePipelinePort from management.ports.secret_store import ISecretStoreRepository from shared_kernel.authorization.protocols import AuthorizationProvider from shared_kernel.authorization.types import ( @@ -71,6 +72,7 @@ def __init__( sync_run_repository: IDataSourceSyncRunRepository | None = None, secret_store: ISecretStoreRepository | None = None, canonical_schema_repository: ICanonicalSchemaRepository | None = None, + maintenance_pipeline: MaintenancePipelinePort | None = None, ) -> None: """Initialize KnowledgeGraphService with dependencies. @@ -93,6 +95,7 @@ def __init__( self._sync_run_repo = sync_run_repository self._secret_store = secret_store self._canonical_schema_repo = canonical_schema_repository + self._maintenance_pipeline = maintenance_pipeline def _compute_next_run_at_utc( self, @@ -170,6 +173,8 @@ async def upsert_maintenance_schedule( cron_expression: str, timezone_name: str, enabled: bool, + files_per_job: int = 2, + worker_count: int = 8, ) -> KnowledgeGraphMaintenanceSchedule: """Create or update KG-level maintenance schedule configuration.""" kg = await self._get_tenant_scoped_kg( @@ -177,6 +182,9 @@ async def upsert_maintenance_schedule( user_id=user_id, permission=Permission.MANAGE, ) + existing = kg.maintenance_schedule + normalized_files_per_job = max(1, int(files_per_job)) + normalized_workers = max(1, int(worker_count)) next_run_at = ( self._compute_next_run_at_utc( cron_expression=cron_expression, @@ -190,6 +198,8 @@ async def upsert_maintenance_schedule( cron_expression=cron_expression, timezone_name=timezone_name, next_run_at=next_run_at, + files_per_job=normalized_files_per_job, + worker_count=normalized_workers, ) kg.set_maintenance_schedule(schedule) await self._kg_repo.save(kg) @@ -209,104 +219,24 @@ async def list_maintenance_runs( return list(kg.maintenance_run_history[-capped_limit:])[::-1] async def trigger_maintenance_run( - self, *, user_id: str, kg_id: str + self, + *, + user_id: str, + kg_id: str, + files_per_job: int = 2, + worker_count: int = 8, + start_extraction: bool = True, ) -> KnowledgeGraphMaintenanceRunRecord: - """Trigger maintenance orchestration across all data sources in a KG.""" - kg = await self._get_tenant_scoped_kg( - kg_id=kg_id, + """Trigger maintenance ingest and extraction jobs for a knowledge graph.""" + if self._maintenance_pipeline is None: + raise ValueError("Maintenance pipeline is not configured") + return await self._maintenance_pipeline.trigger( user_id=user_id, - permission=Permission.MANAGE, + kg_id=kg_id, + files_per_job=files_per_job, + worker_count=worker_count, + start_extraction=start_extraction, ) - if self._ds_repo is None: - raise ValueError("Data source repository is not configured") - - data_sources = await self._ds_repo.find_by_knowledge_graph(kg_id) - run_id = str(ULID()) - now = datetime.now(UTC) - - if not data_sources: - run = KnowledgeGraphMaintenanceRunRecord( - run_id=run_id, - triggered_at=now, - outcome=KnowledgeGraphMaintenanceRunOutcome.PREFLIGHT_FAILED, - message="No data sources connected to this knowledge graph", - ) - kg.append_maintenance_run(run) - await self._kg_repo.save(kg) - await self._session.commit() - return run - - changed_sources = [ - ds - for ds in data_sources - if ds.tracked_branch_head_commit is not None - and ds.last_extraction_baseline_commit is not None - and ds.tracked_branch_head_commit != ds.last_extraction_baseline_commit - ] - target_data_source_ids = tuple(ds.id.value for ds in data_sources) - - if not changed_sources: - run = KnowledgeGraphMaintenanceRunRecord( - run_id=run_id, - triggered_at=now, - outcome=KnowledgeGraphMaintenanceRunOutcome.NO_CHANGES, - message="No source commit delta detected across connected data sources", - target_data_source_ids=target_data_source_ids, - ) - kg.append_maintenance_run(run) - await self._kg_repo.save(kg) - await self._session.commit() - return run - - if self._sync_run_repo is None: - run = KnowledgeGraphMaintenanceRunRecord( - run_id=run_id, - triggered_at=now, - outcome=KnowledgeGraphMaintenanceRunOutcome.LAUNCH_FAILED, - message="Sync run repository is not configured", - target_data_source_ids=tuple(ds.id.value for ds in changed_sources), - ) - kg.append_maintenance_run(run) - await self._kg_repo.save(kg) - await self._session.commit() - return run - - try: - for data_source in changed_sources: - sync_run_id = str(ULID()) - sync_run = DataSourceSyncRun( - id=sync_run_id, - data_source_id=data_source.id.value, - status="pending", - started_at=now, - completed_at=None, - error=None, - created_at=now, - ) - await self._sync_run_repo.save(sync_run) - data_source.request_sync(sync_run_id=sync_run_id, requested_by=user_id) - await self._ds_repo.save(data_source) - - run = KnowledgeGraphMaintenanceRunRecord( - run_id=run_id, - triggered_at=now, - outcome=KnowledgeGraphMaintenanceRunOutcome.STARTED, - message="Scheduled maintenance sync runs started", - target_data_source_ids=tuple(ds.id.value for ds in changed_sources), - ) - except Exception as exc: - run = KnowledgeGraphMaintenanceRunRecord( - run_id=run_id, - triggered_at=now, - outcome=KnowledgeGraphMaintenanceRunOutcome.LAUNCH_FAILED, - message=f"Failed to launch maintenance syncs: {exc}", - target_data_source_ids=tuple(ds.id.value for ds in changed_sources), - ) - - kg.append_maintenance_run(run) - await self._kg_repo.save(kg) - await self._session.commit() - return run async def _check_permission( self, diff --git a/src/api/management/dependencies/knowledge_graph.py b/src/api/management/dependencies/knowledge_graph.py index 03ab069f4..54e95971e 100644 --- a/src/api/management/dependencies/knowledge_graph.py +++ b/src/api/management/dependencies/knowledge_graph.py @@ -16,9 +16,11 @@ from infrastructure.outbox.repository import OutboxRepository from infrastructure.settings import get_management_settings from management.application.observability import DefaultKnowledgeGraphServiceProbe -from management.application.services.knowledge_graph_service import ( - KnowledgeGraphService, +from management.application.services.knowledge_graph_service import KnowledgeGraphService +from infrastructure.management.maintenance_pipeline_dependencies import ( + get_maintenance_pipeline_service, ) +from management.ports.maintenance_pipeline import MaintenancePipelinePort from management.infrastructure.repositories import ( DataSourceRepository, DataSourceSyncRunRepository, @@ -35,6 +37,9 @@ def get_knowledge_graph_service( session: Annotated[AsyncSession, Depends(get_write_session)], authz: Annotated[AuthorizationProvider, Depends(get_spicedb_client)], current_user: Annotated[CurrentUser, Depends(get_current_user)], + maintenance_pipeline: Annotated[ + MaintenancePipelinePort, Depends(get_maintenance_pipeline_service) + ], ) -> KnowledgeGraphService: """Get KnowledgeGraphService instance. @@ -66,4 +71,5 @@ def get_knowledge_graph_service( scope_to_tenant=current_user.tenant_id.value, probe=DefaultKnowledgeGraphServiceProbe(), canonical_schema_repository=GraphCanonicalSchemaRepository(session), + maintenance_pipeline=maintenance_pipeline, ) diff --git a/src/api/management/domain/value_objects.py b/src/api/management/domain/value_objects.py index 06667e905..4dcd4c2d1 100644 --- a/src/api/management/domain/value_objects.py +++ b/src/api/management/domain/value_objects.py @@ -150,6 +150,9 @@ class KnowledgeGraphMaintenanceRunOutcome(StrEnum): """Allowed outcomes for a KG-scoped maintenance orchestration attempt.""" STARTED = "started" + INGEST_STARTED = "ingest-started" + EXTRACTION_STARTED = "extraction-started" + INGEST_FAILED = "ingest-failed" NO_CHANGES = "no-changes" PREFLIGHT_FAILED = "preflight-failed" LAUNCH_FAILED = "launch-failed" @@ -163,6 +166,8 @@ class KnowledgeGraphMaintenanceSchedule: cron_expression: str timezone_name: str next_run_at: datetime | None = None + files_per_job: int = 2 + worker_count: int = 8 def to_dict(self) -> dict[str, Any]: """Serialize to JSON-compatible dictionary.""" @@ -173,6 +178,8 @@ def to_dict(self) -> dict[str, Any]: "next_run_at": ( self.next_run_at.isoformat() if self.next_run_at is not None else None ), + "files_per_job": self.files_per_job, + "worker_count": self.worker_count, } @classmethod @@ -184,11 +191,15 @@ def from_dict(cls, data: dict[str, Any]) -> "KnowledgeGraphMaintenanceSchedule": if next_run_at_raw is not None else None ) + files_per_job = int(data.get("files_per_job", 2) or 2) + worker_count = int(data.get("worker_count", 8) or 8) return cls( enabled=bool(data.get("enabled", False)), cron_expression=str(data.get("cron_expression", "0 2 * * *")), timezone_name=str(data.get("timezone_name", "UTC")), next_run_at=next_run_at, + files_per_job=max(1, files_per_job), + worker_count=max(1, worker_count), ) @@ -201,6 +212,11 @@ class KnowledgeGraphMaintenanceRunRecord: outcome: KnowledgeGraphMaintenanceRunOutcome message: str | None = None target_data_source_ids: tuple[str, ...] = field(default_factory=tuple) + sync_run_ids: tuple[str, ...] = field(default_factory=tuple) + changed_file_count: int | None = None + jobs_materialized: int | None = None + files_per_job: int | None = None + worker_count: int | None = None def to_dict(self) -> dict[str, Any]: """Serialize to JSON-compatible dictionary.""" @@ -210,6 +226,11 @@ def to_dict(self) -> dict[str, Any]: "outcome": self.outcome.value, "message": self.message, "target_data_source_ids": list(self.target_data_source_ids), + "sync_run_ids": list(self.sync_run_ids), + "changed_file_count": self.changed_file_count, + "jobs_materialized": self.jobs_materialized, + "files_per_job": self.files_per_job, + "worker_count": self.worker_count, } @classmethod @@ -223,6 +244,25 @@ def from_dict(cls, data: dict[str, Any]) -> "KnowledgeGraphMaintenanceRunRecord" target_data_source_ids=tuple( str(ds_id) for ds_id in data.get("target_data_source_ids", []) ), + sync_run_ids=tuple(str(run_id) for run_id in data.get("sync_run_ids", [])), + changed_file_count=( + int(data["changed_file_count"]) + if data.get("changed_file_count") is not None + else None + ), + jobs_materialized=( + int(data["jobs_materialized"]) + if data.get("jobs_materialized") is not None + else None + ), + files_per_job=( + int(data["files_per_job"]) + if data.get("files_per_job") is not None + else None + ), + worker_count=( + int(data["worker_count"]) if data.get("worker_count") is not None else None + ), ) diff --git a/src/api/management/infrastructure/extraction_baseline_updater.py b/src/api/management/infrastructure/extraction_baseline_updater.py index e5215d020..68bf73143 100644 --- a/src/api/management/infrastructure/extraction_baseline_updater.py +++ b/src/api/management/infrastructure/extraction_baseline_updater.py @@ -10,6 +10,16 @@ from management.ports.repositories import IDataSourceRepository +def _default_data_source_repository(session: AsyncSession) -> IDataSourceRepository: + from infrastructure.outbox.repository import OutboxRepository + from management.infrastructure.repositories.data_source_repository import ( + DataSourceRepository, + ) + + outbox = OutboxRepository(session=session) + return DataSourceRepository(session=session, outbox=outbox) + + async def advance_extraction_baselines_for_knowledge_graph( *, session: AsyncSession, @@ -18,11 +28,7 @@ async def advance_extraction_baselines_for_knowledge_graph( ) -> int: """Advance extraction baselines for every prepared source on a knowledge graph.""" if data_source_repository is None: - from management.infrastructure.repositories.data_source_repository import ( - DataSourceRepository, - ) - - data_source_repository = DataSourceRepository(session) + data_source_repository = _default_data_source_repository(session) data_sources = await data_source_repository.find_by_knowledge_graph(knowledge_graph_id) updated = 0 @@ -44,11 +50,7 @@ async def seed_unset_extraction_baselines_for_knowledge_graph( ) -> int: """Seed NULL extraction baselines from each source's ingested head on a KG.""" if data_source_repository is None: - from management.infrastructure.repositories.data_source_repository import ( - DataSourceRepository, - ) - - data_source_repository = DataSourceRepository(session) + data_source_repository = _default_data_source_repository(session) data_sources = await data_source_repository.find_by_knowledge_graph(knowledge_graph_id) updated = 0 diff --git a/src/api/management/infrastructure/git_diff_summary_service.py b/src/api/management/infrastructure/git_diff_summary_service.py index 2a270bfaa..9868ad688 100644 --- a/src/api/management/infrastructure/git_diff_summary_service.py +++ b/src/api/management/infrastructure/git_diff_summary_service.py @@ -120,12 +120,14 @@ async def build_summary( status = str(file.get("status", "modified")) if status in counts: counts[status] += 1 - files.append( - { - "path": str(file.get("filename", "")), - "status": status, - } - ) + patch = file.get("patch") + entry: dict[str, str] = { + "path": str(file.get("filename", "")), + "status": status, + } + if isinstance(patch, str) and patch.strip(): + entry["patch"] = patch[:32_000] + files.append(entry) files_truncated = len(files) > max_files visible_files = tuple(files[:max_files]) diff --git a/src/api/management/infrastructure/repositories/knowledge_graph_repository.py b/src/api/management/infrastructure/repositories/knowledge_graph_repository.py index ec086af06..54de0854c 100644 --- a/src/api/management/infrastructure/repositories/knowledge_graph_repository.py +++ b/src/api/management/infrastructure/repositories/knowledge_graph_repository.py @@ -178,6 +178,12 @@ async def find_by_tenant(self, tenant_id: str) -> list[KnowledgeGraph]: self._probe.knowledge_graphs_listed(tenant_id, len(kgs)) return kgs + async def find_all(self) -> list[KnowledgeGraph]: + stmt = select(KnowledgeGraphModel) + result = await self._session.execute(stmt) + models = result.scalars().all() + return [self._to_domain(model) for model in models] + async def delete(self, knowledge_graph: KnowledgeGraph) -> bool: stmt = select(KnowledgeGraphModel).where( KnowledgeGraphModel.id == knowledge_graph.id.value diff --git a/src/api/management/ports/maintenance_pipeline.py b/src/api/management/ports/maintenance_pipeline.py new file mode 100644 index 000000000..5756cbc29 --- /dev/null +++ b/src/api/management/ports/maintenance_pipeline.py @@ -0,0 +1,21 @@ +"""Port for knowledge-graph maintenance pipeline orchestration.""" + +from __future__ import annotations + +from typing import Protocol + +from management.domain.value_objects import KnowledgeGraphMaintenanceRunRecord + + +class MaintenancePipelinePort(Protocol): + """Coordinates maintenance ingest, job materialization, and extraction.""" + + async def trigger( + self, + *, + user_id: str, + kg_id: str, + files_per_job: int = 2, + worker_count: int = 8, + start_extraction: bool = True, + ) -> KnowledgeGraphMaintenanceRunRecord: ... diff --git a/src/api/management/ports/repositories.py b/src/api/management/ports/repositories.py index 03a8c1dfb..fa0b71e89 100644 --- a/src/api/management/ports/repositories.py +++ b/src/api/management/ports/repositories.py @@ -66,6 +66,10 @@ async def find_by_tenant(self, tenant_id: str) -> list[KnowledgeGraph]: """ ... + async def find_all(self) -> list[KnowledgeGraph]: + """List all knowledge graphs across tenants.""" + ... + async def delete(self, knowledge_graph: KnowledgeGraph) -> bool: """Delete a knowledge graph and emit domain events. diff --git a/src/api/management/presentation/data_sources/routes.py b/src/api/management/presentation/data_sources/routes.py index ca05d74f9..44f353754 100644 --- a/src/api/management/presentation/data_sources/routes.py +++ b/src/api/management/presentation/data_sources/routes.py @@ -28,7 +28,7 @@ ) from management.infrastructure.git_diff_summary_service import GitDiffSummaryService from management.infrastructure.job_package_archive_reader import SqlJobPackageArchiveReader -from management.ports.exceptions import UnauthorizedError +from management.ports.exceptions import DuplicateDataSourceNameError, UnauthorizedError from management.ports.repositories import IDataSourceSyncRunRepository from shared_kernel.job_package.archive_availability import ( job_package_archive_exists, @@ -400,6 +400,11 @@ async def create_data_source( status_code=status.HTTP_404_NOT_FOUND, detail=str(e), ) + except DuplicateDataSourceNameError as e: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=str(e), + ) except Exception: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index 1843c5f05..e1ff89a77 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -193,6 +193,38 @@ class MaintenanceScheduleUpsertRequest(BaseModel): default="UTC", description="IANA timezone identifier used for schedule evaluation", ) + files_per_job: int = Field( + default=2, + ge=1, + description="Number of changed files batched into each maintenance job", + ) + worker_count: int = Field( + default=8, + ge=1, + description="Parallel OpenShell workers for maintenance extraction", + ) + + +class MaintenanceRunTriggerRequest(BaseModel): + """Request body for manual KG maintenance orchestration.""" + + files_per_job: int = Field( + default=2, + ge=1, + description="Number of changed files batched into each maintenance job", + ) + worker_count: int = Field( + default=8, + ge=1, + description="Parallel OpenShell workers for maintenance extraction", + ) + start_extraction: bool = Field( + default=True, + description=( + "When true, advance to extraction after ingest completes " + "(may return ingest-started if ingest is still running)" + ), + ) class MaintenanceScheduleResponse(BaseModel): @@ -202,6 +234,8 @@ class MaintenanceScheduleResponse(BaseModel): cron_expression: str timezone_name: str next_run_at: datetime | None + files_per_job: int + worker_count: int @classmethod def from_domain( @@ -212,6 +246,8 @@ def from_domain( cron_expression=schedule.cron_expression, timezone_name=schedule.timezone_name, next_run_at=schedule.next_run_at, + files_per_job=schedule.files_per_job, + worker_count=schedule.worker_count, ) @@ -223,6 +259,11 @@ class MaintenanceRunResponse(BaseModel): outcome: str message: str | None target_data_source_ids: list[str] + sync_run_ids: list[str] = Field(default_factory=list) + changed_file_count: int | None = None + jobs_materialized: int | None = None + files_per_job: int | None = None + worker_count: int | None = None @classmethod def from_domain( @@ -234,6 +275,11 @@ def from_domain( outcome=run.outcome.value, message=run.message, target_data_source_ids=list(run.target_data_source_ids), + sync_run_ids=list(run.sync_run_ids), + changed_file_count=run.changed_file_count, + jobs_materialized=run.jobs_materialized, + files_per_job=run.files_per_job, + worker_count=run.worker_count, ) diff --git a/src/api/management/presentation/knowledge_graphs/routes.py b/src/api/management/presentation/knowledge_graphs/routes.py index 4d33a0b4b..871f1d45e 100644 --- a/src/api/management/presentation/knowledge_graphs/routes.py +++ b/src/api/management/presentation/knowledge_graphs/routes.py @@ -25,6 +25,7 @@ KnowledgeGraphWorkspaceStatusResponse, MaintenanceRunListResponse, MaintenanceRunResponse, + MaintenanceRunTriggerRequest, MaintenanceScheduleResponse, MaintenanceScheduleUpsertRequest, OntologyConfigRequest, @@ -95,6 +96,8 @@ async def upsert_knowledge_graph_maintenance_schedule( cron_expression=request.cron_expression, timezone_name=request.timezone_name, enabled=request.enabled, + files_per_job=request.files_per_job, + worker_count=request.worker_count, ) return MaintenanceScheduleResponse.from_domain(schedule) except UnauthorizedError: @@ -164,6 +167,7 @@ async def list_knowledge_graph_maintenance_runs( ) async def trigger_knowledge_graph_maintenance_run( kg_id: str, + request: MaintenanceRunTriggerRequest, current_user: Annotated[CurrentUser, Depends(get_current_user)], service: Annotated[KnowledgeGraphService, Depends(get_knowledge_graph_service)], ) -> MaintenanceRunResponse: @@ -172,6 +176,9 @@ async def trigger_knowledge_graph_maintenance_run( run = await service.trigger_maintenance_run( user_id=current_user.user_id.value, kg_id=kg_id, + files_per_job=request.files_per_job, + worker_count=request.worker_count, + start_extraction=request.start_extraction, ) return MaintenanceRunResponse.from_domain(run) except UnauthorizedError: diff --git a/src/api/tests/fakes/management.py b/src/api/tests/fakes/management.py index fba71c304..43beb7152 100644 --- a/src/api/tests/fakes/management.py +++ b/src/api/tests/fakes/management.py @@ -58,6 +58,9 @@ async def get_by_id( async def find_by_tenant(self, tenant_id: str) -> list[KnowledgeGraph]: return [kg for kg in self._store.values() if kg.tenant_id == tenant_id] + async def find_all(self) -> list[KnowledgeGraph]: + return list(self._store.values()) + async def delete(self, knowledge_graph: KnowledgeGraph) -> bool: self.deleted.append(knowledge_graph) if knowledge_graph.id.value in self._store: diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation_repository.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation_repository.py new file mode 100644 index 000000000..3fd12e2b7 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation_repository.py @@ -0,0 +1,40 @@ +"""Tests for reconciliation repository wiring.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, patch + +import pytest + +from extraction.infrastructure.extraction_run_reconciliation import ( + reconcile_quiescent_extraction_run, +) + + +@pytest.mark.asyncio +async def test_reconcile_uses_data_source_repository_with_outbox() -> None: + session = AsyncMock() + repo = AsyncMock() + repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} + repo.get_run.return_value = None + + with ( + patch( + "extraction.infrastructure.extraction_run_reconciliation.ExtractionJobRepository", + return_value=repo, + ), + patch( + "extraction.infrastructure.extraction_run_reconciliation.advance_extraction_baselines_for_knowledge_graph", + new_callable=AsyncMock, + return_value=0, + ) as advance_baselines, + ): + await reconcile_quiescent_extraction_run( + session=session, + knowledge_graph_id="kg-001", + ) + + advance_baselines.assert_awaited_once_with( + session=session, + knowledge_graph_id="kg-001", + ) diff --git a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py index 042488d23..3c3a951dd 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py +++ b/src/api/tests/unit/extraction/infrastructure/test_workload_runtime_settings.py @@ -2,10 +2,11 @@ from __future__ import annotations +import os +from unittest.mock import patch + from extraction.infrastructure.workload_runtime_factory import resolve_workload_token_signing_key -from extraction.infrastructure.workload_runtime_settings import ( - ExtractionWorkloadRuntimeSettings, -) +from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings class TestExtractionWorkloadRuntimeSettings: @@ -39,3 +40,23 @@ def test_sticky_turn_timeout_accepts_one_hour(self) -> None: settings = ExtractionWorkloadRuntimeSettings(sticky_turn_timeout_seconds=3600.0) assert settings.sticky_turn_timeout_seconds == 3600.0 + + def test_empty_container_run_env_strings_do_not_crash_settings_load(self) -> None: + env = { + "KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_UID": "", + "KARTOGRAPH_EXTRACTION_RUNTIME_CONTAINER_RUN_GID": "", + "HOST_UID": "", + "HOST_GID": "", + } + with patch.dict(os.environ, env, clear=False): + settings = ExtractionWorkloadRuntimeSettings() + + assert settings.container_run_uid is None + assert settings.container_run_gid is None + + def test_container_run_uid_falls_back_to_host_uid_env(self) -> None: + with patch.dict(os.environ, {"HOST_UID": "1000", "HOST_GID": "1001"}, clear=False): + settings = ExtractionWorkloadRuntimeSettings() + + assert settings.container_run_uid == 1000 + assert settings.container_run_gid == 1001 diff --git a/src/api/tests/unit/infrastructure/management/test_maintenance_job_materializer.py b/src/api/tests/unit/infrastructure/management/test_maintenance_job_materializer.py new file mode 100644 index 000000000..4061ce672 --- /dev/null +++ b/src/api/tests/unit/infrastructure/management/test_maintenance_job_materializer.py @@ -0,0 +1,48 @@ +"""Tests for maintenance job materialization.""" + +from __future__ import annotations + +from infrastructure.management.maintenance_job_materializer import ( + MAINTENANCE_JOB_SET_NAME, + ChangedMaintenanceFile, + materialize_maintenance_jobs, +) + + +def _changed(path: str, *, folder: str = "repo-a", status: str = "modified") -> ChangedMaintenanceFile: + return ChangedMaintenanceFile( + data_source_id="ds-a", + repository_folder=folder, + path=path, + status=status, + package_id="pkg-a", + patch=f"diff for {path}", + ) + + +def test_materialize_maintenance_jobs_batches_across_sources() -> None: + changed = [ + _changed("a.txt", folder="repo-a"), + *[_changed(f"b{i}.txt", folder="repo-b") for i in range(10)], + *[_changed(f"c{i}.txt", folder="repo-c") for i in range(4)], + ] + + jobs = materialize_maintenance_jobs( + knowledge_graph_id="kg-001", + changed_files=changed, + files_per_job=2, + ) + + assert len(jobs) == 8 + assert all(job.job_set_name == MAINTENANCE_JOB_SET_NAME for job in jobs) + assert all(job.strategy == "by_files" for job in jobs) + assert sum(len(job.target_files) for job in jobs) == 15 + assert "diff for a.txt" in jobs[0].description + + +def test_materialize_maintenance_jobs_returns_empty_for_no_changes() -> None: + assert materialize_maintenance_jobs( + knowledge_graph_id="kg-001", + changed_files=[], + files_per_job=2, + ) == [] diff --git a/src/api/tests/unit/management/application/test_knowledge_graph_service.py b/src/api/tests/unit/management/application/test_knowledge_graph_service.py index cc6c58abe..df34995fa 100644 --- a/src/api/tests/unit/management/application/test_knowledge_graph_service.py +++ b/src/api/tests/unit/management/application/test_knowledge_graph_service.py @@ -1425,12 +1425,15 @@ async def test_trigger_maintenance_run_records_no_changes_outcome( kg_repo.seed(kg) await _grant_kg_manage(authz, kg.id.value, user_id) - ds_no_change = _make_ds(ds_id="ds-no-change", kg_id=kg.id.value, tenant_id=tenant_id) - ds_no_change.last_extraction_baseline_commit = "abc123" - ds_no_change.tracked_branch_head_commit = "abc123" - ds_repo.seed(ds_no_change) + expected = KnowledgeGraphMaintenanceRunRecord( + run_id="run-no-change", + triggered_at=datetime.now(UTC), + outcome=KnowledgeGraphMaintenanceRunOutcome.NO_CHANGES, + target_data_source_ids=("ds-no-change",), + ) + pipeline = AsyncMock() + pipeline.trigger = AsyncMock(return_value=expected) - sync_run_repo = _InMemorySyncRunRepository() svc = KnowledgeGraphService( session=mock_session, knowledge_graph_repository=kg_repo, @@ -1439,7 +1442,7 @@ async def test_trigger_maintenance_run_records_no_changes_outcome( authz=authz, scope_to_tenant=tenant_id, probe=probe, - sync_run_repository=sync_run_repo, + maintenance_pipeline=pipeline, ) run = await svc.trigger_maintenance_run( @@ -1447,26 +1450,33 @@ async def test_trigger_maintenance_run_records_no_changes_outcome( kg_id=kg.id.value, ) - assert isinstance(run, KnowledgeGraphMaintenanceRunRecord) - assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.NO_CHANGES - assert run.target_data_source_ids == ("ds-no-change",) - assert len(sync_run_repo.saved) == 0 + assert run is expected + pipeline.trigger.assert_awaited_once_with( + user_id=user_id, + kg_id=kg.id.value, + files_per_job=2, + worker_count=8, + start_extraction=True, + ) @pytest.mark.asyncio - async def test_trigger_maintenance_run_records_started_and_creates_sync_runs( + async def test_trigger_maintenance_run_delegates_to_pipeline( self, mock_session, kg_repo, ds_repo, secret_store, authz, probe, tenant_id, user_id ): - """When DS commit deltas exist, trigger records STARTED and enqueues sync runs.""" + """Maintenance trigger delegates ingest orchestration to the pipeline service.""" kg = _make_kg(kg_id="kg-maint-003", tenant_id=tenant_id) kg_repo.seed(kg) await _grant_kg_manage(authz, kg.id.value, user_id) - ds_changed = _make_ds(ds_id="ds-changed", kg_id=kg.id.value, tenant_id=tenant_id) - ds_changed.last_extraction_baseline_commit = "abc123" - ds_changed.tracked_branch_head_commit = "def456" - ds_repo.seed(ds_changed) + expected = KnowledgeGraphMaintenanceRunRecord( + run_id="run-ingest", + triggered_at=datetime.now(UTC), + outcome=KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED, + target_data_source_ids=("ds-changed",), + ) + pipeline = AsyncMock() + pipeline.trigger = AsyncMock(return_value=expected) - sync_run_repo = _InMemorySyncRunRepository() svc = KnowledgeGraphService( session=mock_session, knowledge_graph_repository=kg_repo, @@ -1475,17 +1485,25 @@ async def test_trigger_maintenance_run_records_started_and_creates_sync_runs( authz=authz, scope_to_tenant=tenant_id, probe=probe, - sync_run_repository=sync_run_repo, + maintenance_pipeline=pipeline, ) run = await svc.trigger_maintenance_run( user_id=user_id, kg_id=kg.id.value, + files_per_job=5, + worker_count=3, + start_extraction=False, ) - assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.STARTED - assert run.target_data_source_ids == ("ds-changed",) - assert len(sync_run_repo.saved) == 1 + assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED + pipeline.trigger.assert_awaited_once_with( + user_id=user_id, + kg_id=kg.id.value, + files_per_job=5, + worker_count=3, + start_extraction=False, + ) @pytest.mark.asyncio async def test_returns_empty_list_when_no_kgs_in_workspace( diff --git a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py new file mode 100644 index 000000000..70d9d3bf9 --- /dev/null +++ b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py @@ -0,0 +1,320 @@ +"""Unit tests for MaintenancePipelineService.""" + +from __future__ import annotations + +from contextlib import asynccontextmanager +from datetime import UTC, datetime, timedelta +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from infrastructure.management.maintenance_pipeline_service import ( + MaintenancePipelineService, +) +from management.domain.aggregates import DataSource, KnowledgeGraph +from management.domain.entities.data_source_sync_run import DataSourceSyncRun +from management.domain.events.data_source import SyncStarted +from management.domain.value_objects import ( + DataSourceId, + KnowledgeGraphMaintenanceRunOutcome, + KnowledgeGraphMaintenanceRunRecord, + KnowledgeGraphMaintenanceSchedule, + Schedule, + ScheduleType, +) +from shared_kernel.datasource_types import DataSourceAdapterType +from tests.fakes.authorization import InMemoryAuthorizationProvider +from tests.fakes.management import InMemoryDataSourceRepository, InMemoryKnowledgeGraphRepository + + +def _make_kg(*, tenant_id: str = "tenant-1") -> KnowledgeGraph: + return KnowledgeGraph.create( + tenant_id=tenant_id, + workspace_id="ws-1", + name="Test KG", + description="", + ) + + +def _make_ds( + *, + ds_id: str, + kg_id: str, + tenant_id: str, + baseline: str = "abc", + head: str = "def", +) -> DataSource: + now = datetime.now(UTC) + ds = DataSource( + id=DataSourceId(value=ds_id), + knowledge_graph_id=kg_id, + tenant_id=tenant_id, + name=f"source-{ds_id}", + adapter_type=DataSourceAdapterType.GITHUB, + connection_config={"repo_url": "https://example.com/repo.git"}, + credentials_path=None, + schedule=Schedule(schedule_type=ScheduleType.MANUAL), + last_sync_at=None, + created_at=now, + updated_at=now, + ) + ds.last_extraction_baseline_commit = baseline + ds.tracked_branch_head_commit = head + ds.collect_events() + return ds + + +async def _grant_kg_manage( + authz: InMemoryAuthorizationProvider, kg_id: str, user_id: str +) -> None: + await authz.write_relationship( + f"knowledge_graph:{kg_id}", "admin", f"user:{user_id}" + ) + + +class _InMemorySyncRunRepository: + def __init__(self) -> None: + self.saved: dict[str, DataSourceSyncRun] = {} + + async def save(self, sync_run: DataSourceSyncRun) -> None: + self.saved[sync_run.id] = sync_run + + async def get_by_id(self, sync_run_id: str) -> DataSourceSyncRun | None: + return self.saved.get(sync_run_id) + + +@pytest.fixture +def mock_session(): + session = MagicMock() + session.commit = AsyncMock() + return session + + +@pytest.fixture +def session_factory(mock_session): + @asynccontextmanager + async def _cm(): + yield mock_session + + def factory(): + return _cm() + + return factory + + +@pytest.fixture +def kg_repo(): + return InMemoryKnowledgeGraphRepository() + + +@pytest.fixture +def ds_repo(): + return InMemoryDataSourceRepository() + + +@pytest.fixture +def sync_run_repo(): + return _InMemorySyncRunRepository() + + +@pytest.fixture +def authz(): + return InMemoryAuthorizationProvider() + + +def _service( + *, + mock_session, + session_factory, + kg_repo, + ds_repo, + sync_run_repo, + authz, + tenant_id: str = "tenant-1", +) -> MaintenancePipelineService: + job_repo = MagicMock() + job_repo.sync_maintenance_pending_jobs = AsyncMock() + return MaintenancePipelineService( + session=mock_session, + session_factory=session_factory, + knowledge_graph_repository=kg_repo, + data_source_repository=ds_repo, + sync_run_repository=sync_run_repo, + extraction_job_repository=job_repo, + authorization=authz, + tenant_id=tenant_id, + diff_summary_service_factory=lambda _tenant: MagicMock(), + ) + + +@pytest.mark.asyncio +async def test_trigger_records_no_changes_when_baselines_match( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + ds = _make_ds( + ds_id="ds-1", + kg_id=kg.id.value, + tenant_id=kg.tenant_id, + baseline="same", + head="same", + ) + ds_repo.seed(ds) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + svc = _service( + mock_session=mock_session, + session_factory=session_factory, + kg_repo=kg_repo, + ds_repo=ds_repo, + sync_run_repo=sync_run_repo, + authz=authz, + ) + + run = await svc.trigger(user_id="user-1", kg_id=kg.id.value) + + assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.NO_CHANGES + assert sync_run_repo.saved == {} + + +@pytest.mark.asyncio +async def test_trigger_starts_ingest_only_for_changed_sources( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + ds = _make_ds(ds_id="ds-changed", kg_id=kg.id.value, tenant_id=kg.tenant_id) + ds_repo.seed(ds) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + svc = _service( + mock_session=mock_session, + session_factory=session_factory, + kg_repo=kg_repo, + ds_repo=ds_repo, + sync_run_repo=sync_run_repo, + authz=authz, + ) + + with patch.object(svc, "advance_for_knowledge_graph", AsyncMock(return_value=None)): + run = await svc.trigger( + user_id="user-1", + kg_id=kg.id.value, + files_per_job=3, + worker_count=4, + start_extraction=True, + ) + + assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED + assert run.files_per_job == 3 + assert run.worker_count == 4 + assert len(sync_run_repo.saved) == 1 + + saved_ds = await ds_repo.get_by_id(ds.id) + assert saved_ds is not None + events = saved_ds.collect_events() + assert len(events) == 1 + assert isinstance(events[0], SyncStarted) + assert events[0].pipeline_mode == "ingest_only" + + +@pytest.mark.asyncio +async def test_advance_marks_ingest_failed_when_sync_fails( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + now = datetime.now(UTC) + sync_run_id = "sync-failed-1" + await sync_run_repo.save( + DataSourceSyncRun( + id=sync_run_id, + data_source_id="ds-changed", + status="failed", + started_at=now, + completed_at=now, + error="boom", + created_at=now, + ) + ) + kg.maintenance_run_history = ( + KnowledgeGraphMaintenanceRunRecord( + run_id="run-1", + triggered_at=now, + outcome=KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED, + target_data_source_ids=("ds-changed",), + sync_run_ids=(sync_run_id,), + files_per_job=2, + worker_count=8, + ), + ) + kg_repo.seed(kg) + + svc = _service( + mock_session=mock_session, + session_factory=session_factory, + kg_repo=kg_repo, + ds_repo=ds_repo, + sync_run_repo=sync_run_repo, + authz=authz, + ) + + run = await svc.advance_for_knowledge_graph(kg_id=kg.id.value, tenant_id=kg.tenant_id) + + assert run is not None + assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.INGEST_FAILED + + +@pytest.mark.asyncio +async def test_check_scheduled_triggers_due_knowledge_graph( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + now = datetime.now(UTC) + kg = _make_kg() + kg.set_maintenance_schedule( + KnowledgeGraphMaintenanceSchedule( + enabled=True, + cron_expression="0 2 * * *", + timezone_name="UTC", + next_run_at=now - timedelta(minutes=1), + files_per_job=2, + worker_count=8, + ) + ) + kg_repo.seed(kg) + + svc = _service( + mock_session=mock_session, + session_factory=session_factory, + kg_repo=kg_repo, + ds_repo=ds_repo, + sync_run_repo=sync_run_repo, + authz=authz, + ) + + fake_repo = MagicMock() + fake_repo.find_all = AsyncMock(return_value=[kg]) + fake_repo.save = AsyncMock() + + with ( + patch( + "management.infrastructure.repositories.knowledge_graph_repository.KnowledgeGraphRepository", + return_value=fake_repo, + ), + patch.object( + MaintenancePipelineService, + "trigger_scheduled", + AsyncMock( + return_value=KnowledgeGraphMaintenanceRunRecord( + run_id="run-scheduled", + triggered_at=now, + outcome=KnowledgeGraphMaintenanceRunOutcome.NO_CHANGES, + ) + ), + ) as trigger_scheduled, + ): + triggered = await svc.check_scheduled_triggers(now=now) + + assert triggered == 1 + trigger_scheduled.assert_awaited_once() + fake_repo.save.assert_awaited_once() diff --git a/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py b/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py index 9bd7ae2fb..b90d51c3d 100644 --- a/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py +++ b/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py @@ -113,3 +113,18 @@ async def test_seed_unset_extraction_baselines_sets_only_null_baselines() -> Non assert ds_unprepared.last_extraction_baseline_commit is None assert ds_already_set.last_extraction_baseline_commit == "existing" mock_repo.save.assert_awaited_once_with(ds_prepared) + + +@pytest.mark.asyncio +async def test_default_data_source_repository_includes_outbox() -> None: + from management.infrastructure.extraction_baseline_updater import ( + _default_data_source_repository, + ) + from management.infrastructure.repositories.data_source_repository import ( + DataSourceRepository, + ) + + session = AsyncMock() + repo = _default_data_source_repository(session) + + assert isinstance(repo, DataSourceRepository) diff --git a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py index cc74f9ed6..050e2b24e 100644 --- a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py +++ b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py @@ -427,6 +427,8 @@ def test_put_maintenance_schedule_calls_service( cron_expression="30 8 * * *", timezone_name="America/New_York", enabled=True, + files_per_job=2, + worker_count=8, ) def test_list_maintenance_runs_returns_200( @@ -471,22 +473,26 @@ def test_trigger_maintenance_run_returns_201( KnowledgeGraphMaintenanceRunRecord( run_id="01JTRIGGER1234567890ABCDE", triggered_at=datetime.now(UTC), - outcome=KnowledgeGraphMaintenanceRunOutcome.STARTED, + outcome=KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED, message="Scheduled maintenance launched", target_data_source_ids=("ds-1", "ds-2"), ) ) response = test_client.post( - f"/management/knowledge-graphs/{sample_knowledge_graph.id.value}/maintenance-runs/trigger" + f"/management/knowledge-graphs/{sample_knowledge_graph.id.value}/maintenance-runs/trigger", + json={"files_per_job": 4, "worker_count": 6, "start_extraction": True}, ) assert response.status_code == status.HTTP_201_CREATED payload = response.json() - assert payload["outcome"] == "started" + assert payload["outcome"] == "ingest-started" mock_kg_service.trigger_maintenance_run.assert_called_once_with( user_id=mock_current_user.user_id.value, kg_id=sample_knowledge_graph.id.value, + files_per_job=4, + worker_count=6, + start_extraction=True, ) diff --git a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue index 56158589d..0c3219251 100644 --- a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue @@ -64,22 +64,13 @@ interface DataSourceRow { diff_summary?: DiffSummary | null } -interface ExtractionJobSet { - name: string - strategy: string - enabled?: boolean - files_per_job?: number - file_patterns?: string[] - description?: string - entity_type?: string - instances_per_job?: number -} - interface MaintenanceSchedule { enabled: boolean cron_expression: string timezone_name: string next_run_at: string | null + files_per_job?: number + worker_count?: number } interface MaintenanceRun { @@ -121,7 +112,6 @@ const filesPerJob = ref(2) const checkingCommits = ref(false) const updatingLocalCommits = ref(false) const runningMaintenance = ref(false) -const startingExtraction = ref(false) let refreshInterval: ReturnType<typeof setInterval> | null = null @@ -197,6 +187,8 @@ async function loadSchedule() { scheduleEnabled.value = payload.enabled scheduleTimezone.value = payload.timezone_name || 'UTC' scheduleTime.value = cronToDailyTime(payload.cron_expression) || '02:00' + if (payload.files_per_job) filesPerJob.value = payload.files_per_job + if (payload.worker_count) workers.value = payload.worker_count } async function loadRunHistory() { @@ -324,6 +316,11 @@ async function saveSchedule() { enabled: scheduleEnabled.value, cron_expression: cron, timezone_name: scheduleTimezone.value, + files_per_job: normalizedFilesPerJob.value, + worker_count: Math.min( + MAX_MAINTENANCE_WORKERS, + Math.max(1, Math.floor(Number(workers.value) || 1)), + ), }, }, ) @@ -335,35 +332,23 @@ async function saveSchedule() { } } -async function applyFilesPerJobToJobSets() { - const perJob = normalizedFilesPerJob.value - const doc = await apiFetch<{ version?: string; job_sets: ExtractionJobSet[] }>( - `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`, - ) - const hasByFiles = doc.job_sets.some((js) => js.strategy === 'by_files' && js.enabled !== false) - if (!hasByFiles) return - await apiFetch( - `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`, - { - method: 'PUT', - body: { - version: doc.version || '1.0', - job_sets: doc.job_sets.map((js) => - js.strategy === 'by_files' - ? { ...js, files_per_job: perJob } - : js, - ), - }, - }, - ) -} - -async function runMaintenanceNow() { +async function runMaintenanceNow(options?: { startExtraction?: boolean }) { runningMaintenance.value = true + const workerTotal = Math.min( + MAX_MAINTENANCE_WORKERS, + Math.max(1, Math.floor(Number(workers.value) || 1)), + ) try { const run = await apiFetch<MaintenanceRun>( `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/maintenance-runs/trigger`, - { method: 'POST' }, + { + method: 'POST', + body: { + files_per_job: normalizedFilesPerJob.value, + worker_count: workerTotal, + start_extraction: options?.startExtraction ?? false, + }, + }, ) toast.success('Maintenance run recorded', { description: run.message || formatMaintenanceRunOutcome(run.outcome), @@ -376,43 +361,8 @@ async function runMaintenanceNow() { } } -async function startExtractionJobs() { - startingExtraction.value = true - const requested = Math.floor(Number(workers.value) || 1) - if (requested > MAX_MAINTENANCE_WORKERS) { - workers.value = MAX_MAINTENANCE_WORKERS - toast.info(`Worker concurrency capped at ${MAX_MAINTENANCE_WORKERS}`) - } - const workerTotal = Math.min(MAX_MAINTENANCE_WORKERS, Math.max(1, requested)) - try { - try { - await applyFilesPerJobToJobSets() - } catch (e: unknown) { - toast.warning('Could not update files-per-job on job sets', { - description: resolveApiError(e), - }) - } - const res = await apiFetch<{ message?: string }>( - `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs/start`, - { - method: 'POST', - body: { workers: workerTotal }, - }, - ) - toast.success('Extraction started', { description: res.message }) - await loadExtractionState() - } catch (e: unknown) { - toast.error('Failed to start extraction', { description: resolveApiError(e) }) - } finally { - startingExtraction.value = false - } -} - async function runMaintenancePipeline() { - await runMaintenanceNow() - if (maintenanceReadySources.value.length > 0) { - await startExtractionJobs() - } + await runMaintenanceNow({ startExtraction: true }) } function startAutoRefresh() { @@ -637,17 +587,13 @@ onUnmounted(() => { </div> <div class="flex flex-wrap gap-2"> - <Button :disabled="runningMaintenance" @click="runMaintenanceNow"> + <Button :disabled="runningMaintenance" @click="runMaintenanceNow()"> <Loader2 v-if="runningMaintenance" class="mr-2 size-4 animate-spin" /> Sync changed sources </Button> - <Button variant="secondary" :disabled="startingExtraction" @click="startExtractionJobs"> - <Loader2 v-if="startingExtraction" class="mr-2 size-4 animate-spin" /> - Start extraction jobs - </Button> <Button variant="outline" - :disabled="runningMaintenance || startingExtraction || maintenanceReadySources.length === 0" + :disabled="runningMaintenance || maintenanceReadySources.length === 0" @click="runMaintenancePipeline" > Run full pipeline diff --git a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue index 8189f5fad..0896339e6 100644 --- a/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue +++ b/src/dev-ui/app/pages/knowledge-graphs/[kgId]/manage.vue @@ -89,7 +89,7 @@ import { type WorkspaceHubOverview, type WorkspaceHubSourceRow, } from '@/utils/kgManageWorkspaceHub' -import { isIngestionPreparedAtHead, resolvePrepStatusLabel, resolveRepoUrl } from '@/utils/kgDataSourcesCommits' +import { hasIngestionContextPrepared, resolvePrepStatusLabel, resolveRepoUrl } from '@/utils/kgDataSourcesCommits' import { latestSyncRun } from '@/utils/kgDataSourcesSync' import { appendLocalChatMessage, @@ -601,7 +601,7 @@ async function loadOverviewMetrics() { } catch { // keep default status } - if (isIngestionPreparedAtHead(ds)) { + if (hasIngestionContextPrepared(ds)) { prepared += 1 if (status === 'not prepared') { status = 'prepared' diff --git a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts index cef5230da..cbae4d19d 100644 --- a/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts +++ b/src/dev-ui/app/tests/kg-data-sources-phase1.test.ts @@ -2,6 +2,7 @@ import { describe, it, expect } from 'vitest' import { readFileSync } from 'fs' import { resolve } from 'path' import { + hasIngestionContextPrepared, hasUnpulledCommits, isIngestionPreparedAtHead, needsIngestionPrepare, @@ -109,6 +110,13 @@ describe('kgDataSourcesCommits helpers', () => { expect(needsIngestionPrepare({ tracked_branch_head_commit: 'abc', last_prepared_commit: null })).toBe(true) expect(hasUnpulledCommits({ tracked_branch_head_commit: 'abc', clone_head_commit: 'abc' })).toBe(false) expect(isIngestionPreparedAtHead({ tracked_branch_head_commit: 'abc', clone_head_commit: 'abc' })).toBe(true) + expect( + hasIngestionContextPrepared({ + tracked_branch_head_commit: 'new-remote', + clone_head_commit: 'old-local', + }), + ).toBe(true) + expect(hasIngestionContextPrepared({ tracked_branch_head_commit: 'abc' })).toBe(false) }) }) diff --git a/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts b/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts index 5c8cfaf61..5b2ad10dc 100644 --- a/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts +++ b/src/dev-ui/app/tests/kg-manage-workspace-hub.test.ts @@ -70,6 +70,26 @@ describe('kgManageWorkspaceHub', () => { }).label).toBe('Graph Management') }) + it('keeps sources phase complete when maintenance is pending but ingestion was prepared', () => { + const input = { + ...baseInput, + dataSourceCount: 5, + preparedSourceCount: 5, + maintenanceReadyCount: 3, + workspaceStatus: { + ...baseStatus, + workspace_mode: 'extraction_operations' as const, + }, + } + const sourcesTile = buildWorkspaceHubTiles(input).find((tile) => tile.key === 'data-sources') + const maintainTile = buildWorkspaceHubTiles(input).find((tile) => tile.key === 'maintain') + expect(sourcesTile?.done).toBe(true) + expect(sourcesTile?.subtitle).toContain('ingestion ready') + expect(sourcesTile?.tone).toBe('success') + expect(maintainTile?.tone).toBe('warning') + expect(maintainTile?.subtitle).toBe('3 sources need maintenance') + }) + it('builds a primary next-step CTA while sources phase is incomplete', () => { const next = buildWorkspaceHubNextStep(baseInput) expect(next.primaryPhase).toBe(true) diff --git a/src/dev-ui/app/utils/kgDataSourcesCommits.ts b/src/dev-ui/app/utils/kgDataSourcesCommits.ts index 7d92a3090..ab8fba9e2 100644 --- a/src/dev-ui/app/utils/kgDataSourcesCommits.ts +++ b/src/dev-ui/app/utils/kgDataSourcesCommits.ts @@ -94,6 +94,11 @@ export function isIngestionPreparedAtHead(ds: Parameters<typeof hasUnpulledCommi return !!tip && !!ingested && ingested === tip } +/** True once initial ingestion prep has completed (new commits are a maintenance concern). */ +export function hasIngestionContextPrepared(ds: Parameters<typeof resolveIngestedHeadCommit>[0]): boolean { + return resolveIngestedHeadCommit(ds) !== null +} + export function formatPreparedFileCount(count: number | null | undefined): string { if (count === null || count === undefined) return '—' return count.toLocaleString() diff --git a/src/dev-ui/app/utils/kgMaintenanceSchedule.ts b/src/dev-ui/app/utils/kgMaintenanceSchedule.ts index 85613c087..b3a3ae8c2 100644 --- a/src/dev-ui/app/utils/kgMaintenanceSchedule.ts +++ b/src/dev-ui/app/utils/kgMaintenanceSchedule.ts @@ -32,6 +32,12 @@ export function formatMaintenanceRunOutcome(outcome: string): string { switch (outcome) { case 'started': return 'Started' + case 'ingest-started': + return 'Ingest started' + case 'extraction-started': + return 'Extraction started' + case 'ingest-failed': + return 'Ingest failed' case 'no-changes': return 'No changes' case 'preflight-failed': @@ -48,11 +54,14 @@ export function maintenanceRunOutcomeVariant( ): 'default' | 'secondary' | 'destructive' | 'outline' | 'success' { switch (outcome) { case 'started': + case 'ingest-started': + case 'extraction-started': return 'success' case 'no-changes': return 'secondary' case 'preflight-failed': case 'launch-failed': + case 'ingest-failed': return 'destructive' default: return 'outline' From 108fee37972ec4818b57954dae56374024045b38 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 18 Jun 2026 15:19:29 -0400 Subject: [PATCH 146/153] feat(extraction): capture instance change snapshots for archived job history Record before/after graph instance snapshots as JSONL on extraction jobs and GMA sessions so archived history can show property-level diffs instead of raw mutation logs. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../archived_extraction_history.py | 5 + .../graph_management_session_journal.py | 32 +++ src/api/extraction/domain/extraction_job.py | 1 + .../domain/instance_change_record.py | 112 +++++++++++ .../agentic_ci_extraction_job_runner.py | 2 +- .../infrastructure/extraction_job_metrics.py | 4 + .../extraction_job_mutation_metrics.py | 4 + .../job_mutation_artifact_store.py | 55 ++++++ .../infrastructure/models/extraction_job.py | 1 + .../openshell_extraction_job_runner.py | 2 +- .../repositories/extraction_job_repository.py | 8 + src/api/extraction/ports/workload_graph.py | 20 ++ .../extraction/presentation/workload_auth.py | 6 + .../presentation/workload_routes.py | 17 ++ .../graph/infrastructure/graph_repository.py | 56 ++++++ .../extraction_workload/graph_reader.py | 75 +++++++ .../instance_change_journal.py | 183 ++++++++++++++++++ .../extraction_workload/schema_service.py | 37 +++- .../management/extraction_jobs_service.py | 5 + ...p9q0_add_applied_instance_changes_jsonl.py | 27 +++ .../test_archived_extraction_history.py | 2 + .../domain/test_instance_change_record.py | 51 +++++ .../GraphExtractionArchivedHistory.vue | 156 ++++++++++++--- 23 files changed, 828 insertions(+), 33 deletions(-) create mode 100644 src/api/extraction/domain/instance_change_record.py create mode 100644 src/api/extraction/infrastructure/job_mutation_artifact_store.py create mode 100644 src/api/infrastructure/extraction_workload/instance_change_journal.py create mode 100644 src/api/infrastructure/migrations/versions/l5m6n7o8p9q0_add_applied_instance_changes_jsonl.py create mode 100644 src/api/tests/unit/extraction/domain/test_instance_change_record.py diff --git a/src/api/extraction/application/archived_extraction_history.py b/src/api/extraction/application/archived_extraction_history.py index 49f0a3439..c50f7867a 100644 --- a/src/api/extraction/application/archived_extraction_history.py +++ b/src/api/extraction/application/archived_extraction_history.py @@ -26,9 +26,14 @@ def serialize_archived_job(job: ExtractionJobRecord) -> dict[str, Any]: "jobSet": job.job_set_name, "writeOps": archived_job_write_ops(job), "hasMutations": bool(job.applied_mutations_jsonl), + "hasInstanceChanges": bool(job.applied_instance_changes_jsonl), "inputTokens": job.input_tokens, "outputTokens": job.output_tokens, "costUsd": job.cost_usd, + "entitiesCreated": job.entities_created, + "entitiesModified": job.entities_modified, + "relationshipsCreated": job.relationships_created, + "relationshipsModified": job.relationships_modified, "archivedAt": job.archived_at.isoformat() if job.archived_at else None, "strategy": job.strategy, } diff --git a/src/api/extraction/application/graph_management_session_journal.py b/src/api/extraction/application/graph_management_session_journal.py index c3c986ae0..51df91372 100644 --- a/src/api/extraction/application/graph_management_session_journal.py +++ b/src/api/extraction/application/graph_management_session_journal.py @@ -78,6 +78,21 @@ def append_applied_jsonl_to_session( session.runtime_context["mutation_journal"] = journal +def append_instance_changes_to_session( + session: ExtractionAgentSession, + *, + instance_changes_jsonl: str, +) -> None: + chunk = instance_changes_jsonl.strip() + if not chunk: + return + journal = _ensure_journal(session) + previous = str(journal.get("instance_changes_jsonl") or "").strip() + combined = "\n".join(part for part in (previous, chunk) if part) + journal["instance_changes_jsonl"] = combined + session.runtime_context["mutation_journal"] = journal + + def append_turn_usage_to_session( session: ExtractionAgentSession, *, @@ -117,10 +132,26 @@ async def append_applied_jsonl( append_applied_jsonl_to_session(session, applied_jsonl=applied_jsonl) await self._session_repository.save(session) + async def append_instance_changes( + self, + *, + session_id: str, + instance_changes_jsonl: str, + ) -> None: + session = await self._session_repository.get_by_id(session_id) + if session is None or not session.is_active: + return + append_instance_changes_to_session( + session, + instance_changes_jsonl=instance_changes_jsonl, + ) + await self._session_repository.save(session) + async def archive_session_mutations(self, session: ExtractionAgentSession) -> None: """Write one ARCHIVED extraction job row for the full GMA session.""" journal = session.runtime_context.get("mutation_journal") or {} jsonl = str(journal.get("jsonl") or "").strip() + instance_changes_jsonl = str(journal.get("instance_changes_jsonl") or "").strip() metrics = metrics_from_mutation_jsonl(jsonl) if jsonl else {} write_ops = int(metrics.get("write_ops") or 0) if write_ops <= 0: @@ -152,6 +183,7 @@ async def archive_session_mutations(self, session: ExtractionAgentSession) -> No completed_at=now, archived_at=now, applied_mutations_jsonl=jsonl or None, + applied_instance_changes_jsonl=instance_changes_jsonl or None, input_tokens=int(journal.get("input_tokens") or 0), output_tokens=int(journal.get("output_tokens") or 0), cache_read_tokens=int(journal.get("cache_read_tokens") or 0), diff --git a/src/api/extraction/domain/extraction_job.py b/src/api/extraction/domain/extraction_job.py index f61cd41f9..39e54e2de 100644 --- a/src/api/extraction/domain/extraction_job.py +++ b/src/api/extraction/domain/extraction_job.py @@ -107,6 +107,7 @@ class ExtractionJobRecord: run_started_at: datetime | None = None archived_at: datetime | None = None applied_mutations_jsonl: str | None = None + applied_instance_changes_jsonl: str | None = None def write_ops(self) -> int: return ( diff --git a/src/api/extraction/domain/instance_change_record.py b/src/api/extraction/domain/instance_change_record.py new file mode 100644 index 000000000..9b5060d8f --- /dev/null +++ b/src/api/extraction/domain/instance_change_record.py @@ -0,0 +1,112 @@ +"""Structured before/after snapshots for applied graph instance mutations.""" + +from __future__ import annotations + +import json +from typing import Any + + +def serialize_instance_snapshot( + *, + instance_id: str, + label: str, + entity_kind: str, + properties: dict[str, Any] | None, + start_id: str | None = None, + end_id: str | None = None, +) -> dict[str, Any]: + payload: dict[str, Any] = { + "id": instance_id, + "label": label, + "type": entity_kind, + "properties": dict(properties or {}), + } + if start_id is not None: + payload["start_id"] = start_id + if end_id is not None: + payload["end_id"] = end_id + return payload + + +def property_changes( + before: dict[str, Any] | None, + after: dict[str, Any] | None, +) -> list[dict[str, Any]]: + """Return per-property before/after rows for one instance snapshot pair.""" + before_props = dict(before or {}) + after_props = dict(after or {}) + keys = sorted(set(before_props) | set(after_props)) + changes: list[dict[str, Any]] = [] + for key in keys: + old_value = before_props.get(key) + new_value = after_props.get(key) + if old_value == new_value: + continue + changes.append({"key": key, "before": old_value, "after": new_value}) + return changes + + +def build_instance_change_record( + *, + op: str, + entity_kind: str, + instance_id: str, + label: str | None, + before: dict[str, Any] | None, + after: dict[str, Any] | None, + start_id: str | None = None, + end_id: str | None = None, +) -> dict[str, Any]: + before_snapshot = ( + serialize_instance_snapshot( + instance_id=instance_id, + label=str(label or ""), + entity_kind=entity_kind, + properties=before, + start_id=start_id, + end_id=end_id, + ) + if before is not None + else None + ) + after_snapshot = ( + serialize_instance_snapshot( + instance_id=instance_id, + label=str(label or ""), + entity_kind=entity_kind, + properties=after, + start_id=start_id, + end_id=end_id, + ) + if after is not None + else None + ) + return { + "op": op.upper(), + "type": entity_kind, + "id": instance_id, + "label": label, + "start_id": start_id, + "end_id": end_id, + "before": before_snapshot, + "after": after_snapshot, + "property_changes": property_changes(before, after), + } + + +def instance_changes_to_jsonl(records: list[dict[str, Any]]) -> str: + return "\n".join( + json.dumps(record, separators=(",", ":"), sort_keys=True) for record in records + ) + + +def parse_instance_changes_jsonl(jsonl_content: str) -> list[dict[str, Any]]: + records: list[dict[str, Any]] = [] + for raw_line in jsonl_content.splitlines(): + line = raw_line.strip() + if not line: + continue + row = json.loads(line) + if isinstance(row, dict): + records.append(row) + return records diff --git a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py index 5755e34fe..445a9fa27 100644 --- a/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/agentic_ci_extraction_job_runner.py @@ -95,7 +95,7 @@ async def prepare_for_run( credentials = get_workload_credential_issuer().issue( tenant_id=tenant_id, knowledge_graph_id=job.knowledge_graph_id, - extra_scopes=("workload:chat",), + extra_scopes=("workload:chat", f"job:{job.job_id}"), ) workdir = await self._workdir_materializer.prepare( job=job, diff --git a/src/api/extraction/infrastructure/extraction_job_metrics.py b/src/api/extraction/infrastructure/extraction_job_metrics.py index 47514acbf..e2ca76e1c 100644 --- a/src/api/extraction/infrastructure/extraction_job_metrics.py +++ b/src/api/extraction/infrastructure/extraction_job_metrics.py @@ -10,6 +10,7 @@ applied_mutation_jsonl_from_workdir, metrics_from_mutation_workdir, ) +from extraction.infrastructure.job_mutation_artifact_store import read_instance_changes_from_workdir def merge_extraction_job_metrics( @@ -34,6 +35,9 @@ def merge_extraction_job_metrics( applied_jsonl = applied_mutation_jsonl_from_workdir(workdir) if applied_jsonl: metrics["applied_mutations_jsonl"] = applied_jsonl + instance_changes_jsonl = read_instance_changes_from_workdir(workdir) + if instance_changes_jsonl: + metrics["applied_instance_changes_jsonl"] = instance_changes_jsonl return metrics diff --git a/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py index ab106c2ac..e97cabeb6 100644 --- a/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py +++ b/src/api/extraction/infrastructure/extraction_job_mutation_metrics.py @@ -6,6 +6,7 @@ from typing import Any from extraction.domain.mutation_jsonl_metrics import metrics_from_mutation_jsonl +from extraction.infrastructure.job_mutation_artifact_store import read_instance_changes_from_workdir __all__ = [ "applied_mutation_jsonl_from_workdir", @@ -63,6 +64,9 @@ def reconcile_mutation_metrics( applied_jsonl = applied_mutation_jsonl_from_workdir(workdir) if applied_jsonl: merged["applied_mutations_jsonl"] = applied_jsonl + instance_changes_jsonl = read_instance_changes_from_workdir(workdir) + if instance_changes_jsonl: + merged["applied_instance_changes_jsonl"] = instance_changes_jsonl return merged if operations_applied > 0: diff --git a/src/api/extraction/infrastructure/job_mutation_artifact_store.py b/src/api/extraction/infrastructure/job_mutation_artifact_store.py new file mode 100644 index 000000000..c76825145 --- /dev/null +++ b/src/api/extraction/infrastructure/job_mutation_artifact_store.py @@ -0,0 +1,55 @@ +"""Persist applied mutation artifacts from extraction job workload runs.""" + +from __future__ import annotations + +from pathlib import Path + +from extraction.infrastructure.extraction_job_activity import job_workdir +from extraction.infrastructure.extraction_job_workdir_layout import mutation_result_path +from extraction.infrastructure.workload_runtime_settings import ( + ExtractionWorkloadRuntimeSettings, + get_extraction_workload_runtime_settings, +) + +APPLIED_MUTATIONS_FILENAME = "applied.jsonl" +INSTANCE_CHANGES_FILENAME = "instance-changes.jsonl" + + +def append_job_mutation_artifacts( + *, + knowledge_graph_id: str, + job_id: str, + applied_jsonl: str | None = None, + instance_changes_jsonl: str | None = None, + settings: ExtractionWorkloadRuntimeSettings | None = None, +) -> None: + """Append applied JSONL and instance change records to a job workdir.""" + workdir = job_workdir( + knowledge_graph_id=knowledge_graph_id, + job_id=job_id, + settings=settings or get_extraction_workload_runtime_settings(), + ) + mutations_dir = workdir / "mutations" + mutations_dir.mkdir(parents=True, exist_ok=True) + mutation_result_path(workdir).parent.mkdir(parents=True, exist_ok=True) + + if applied_jsonl and applied_jsonl.strip(): + _append_lines(mutations_dir / APPLIED_MUTATIONS_FILENAME, applied_jsonl.strip()) + if instance_changes_jsonl and instance_changes_jsonl.strip(): + _append_lines(mutations_dir / INSTANCE_CHANGES_FILENAME, instance_changes_jsonl.strip()) + + +def read_instance_changes_from_workdir( + job_root: Path, +) -> str | None: + path = job_root / "mutations" / INSTANCE_CHANGES_FILENAME + if not path.is_file(): + return None + content = path.read_text(encoding="utf-8").strip() + return content or None + + +def _append_lines(path: Path, chunk: str) -> None: + existing = path.read_text(encoding="utf-8").strip() if path.is_file() else "" + combined = "\n".join(part for part in (existing, chunk) if part) + path.write_text(combined + ("\n" if combined else ""), encoding="utf-8") diff --git a/src/api/extraction/infrastructure/models/extraction_job.py b/src/api/extraction/infrastructure/models/extraction_job.py index c34fb7fa9..340b3374a 100644 --- a/src/api/extraction/infrastructure/models/extraction_job.py +++ b/src/api/extraction/infrastructure/models/extraction_job.py @@ -43,6 +43,7 @@ class ExtractionJobModel(Base, TimestampMixin): run_started_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) archived_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True), nullable=True) applied_mutations_jsonl: Mapped[str | None] = mapped_column(sa.Text(), nullable=True) + applied_instance_changes_jsonl: Mapped[str | None] = mapped_column(sa.Text(), nullable=True) class ExtractionRunModel(Base, TimestampMixin): diff --git a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py index c8a104e3f..eda82cdad 100644 --- a/src/api/extraction/infrastructure/openshell_extraction_job_runner.py +++ b/src/api/extraction/infrastructure/openshell_extraction_job_runner.py @@ -102,7 +102,7 @@ async def prepare_for_run( credentials = get_workload_credential_issuer().issue( tenant_id=tenant_id, knowledge_graph_id=job.knowledge_graph_id, - extra_scopes=("workload:chat",), + extra_scopes=("workload:chat", f"job:{job.job_id}"), ) workdir = await self._workdir_materializer.prepare( job=job, diff --git a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py index b00fa7723..8410fcd34 100644 --- a/src/api/extraction/infrastructure/repositories/extraction_job_repository.py +++ b/src/api/extraction/infrastructure/repositories/extraction_job_repository.py @@ -53,6 +53,7 @@ def _job_model_to_record(model: ExtractionJobModel) -> ExtractionJobRecord: run_started_at=model.run_started_at, archived_at=model.archived_at, applied_mutations_jsonl=model.applied_mutations_jsonl, + applied_instance_changes_jsonl=model.applied_instance_changes_jsonl, ) @@ -423,6 +424,9 @@ async def promote_completed_job_to_archived( applied_jsonl = metrics.get("applied_mutations_jsonl") if isinstance(applied_jsonl, str) and applied_jsonl.strip(): values["applied_mutations_jsonl"] = applied_jsonl + instance_changes_jsonl = metrics.get("applied_instance_changes_jsonl") + if isinstance(instance_changes_jsonl, str) and instance_changes_jsonl.strip(): + values["applied_instance_changes_jsonl"] = instance_changes_jsonl result = await self._session.execute( update(ExtractionJobModel) .where( @@ -530,6 +534,9 @@ async def mark_job_completed( applied_jsonl = payload.get("applied_mutations_jsonl") if isinstance(applied_jsonl, str) and applied_jsonl.strip(): values["applied_mutations_jsonl"] = applied_jsonl + instance_changes_jsonl = payload.get("applied_instance_changes_jsonl") + if isinstance(instance_changes_jsonl, str) and instance_changes_jsonl.strip(): + values["applied_instance_changes_jsonl"] = instance_changes_jsonl await self._session.execute( update(ExtractionJobModel) .where( @@ -642,6 +649,7 @@ async def insert_archived_session_job(self, job: ExtractionJobRecord) -> None: run_started_at=job.run_started_at, archived_at=job.archived_at, applied_mutations_jsonl=job.applied_mutations_jsonl, + applied_instance_changes_jsonl=job.applied_instance_changes_jsonl, input_tokens=job.input_tokens, output_tokens=job.output_tokens, cache_read_tokens=job.cache_read_tokens, diff --git a/src/api/extraction/ports/workload_graph.py b/src/api/extraction/ports/workload_graph.py index 64272913e..9f59cd918 100644 --- a/src/api/extraction/ports/workload_graph.py +++ b/src/api/extraction/ports/workload_graph.py @@ -134,3 +134,23 @@ async def partition_slugs_by_existence( ) -> tuple[list[str], list[str]]: """Return (existing_slugs, missing_slugs) sorted for one entity type.""" ... + + async def fetch_nodes_by_ids( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + node_ids: tuple[str, ...], + ) -> dict[str, WorkloadGraphNode]: + """Return node snapshots keyed by application id.""" + ... + + async def fetch_edges_by_ids( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + edge_ids: tuple[str, ...], + ) -> dict[str, WorkloadGraphRelationship]: + """Return edge snapshots keyed by application id.""" + ... diff --git a/src/api/extraction/presentation/workload_auth.py b/src/api/extraction/presentation/workload_auth.py index 529cfc599..0f53744a6 100644 --- a/src/api/extraction/presentation/workload_auth.py +++ b/src/api/extraction/presentation/workload_auth.py @@ -23,6 +23,7 @@ class WorkloadAuthContext: tenant_id: str knowledge_graph_id: str session_id: str | None = None + job_id: str | None = None def get_workload_auth_context( @@ -67,10 +68,15 @@ def get_workload_auth_context( (scope.removeprefix("session:") for scope in credentials.scopes if scope.startswith("session:")), None, ) + job_scope = next( + (scope.removeprefix("job:") for scope in credentials.scopes if scope.startswith("job:")), + None, + ) return WorkloadAuthContext( credentials=credentials, tenant_id=tenant_scope, knowledge_graph_id=kg_scope, session_id=session_scope, + job_id=job_scope, ) diff --git a/src/api/extraction/presentation/workload_routes.py b/src/api/extraction/presentation/workload_routes.py index c47dad229..a2d6afda8 100644 --- a/src/api/extraction/presentation/workload_routes.py +++ b/src/api/extraction/presentation/workload_routes.py @@ -280,11 +280,28 @@ async def workload_apply_mutations( remaining_relationship_gaps: list[str] = [] if result.get("applied"): applied_jsonl = str(result.get("applied_jsonl") or "").strip() + instance_changes_jsonl = str(result.get("instance_changes_jsonl") or "").strip() if auth.session_id and applied_jsonl: await session_journal.append_applied_jsonl( session_id=auth.session_id, applied_jsonl=applied_jsonl, ) + if auth.session_id and instance_changes_jsonl: + await session_journal.append_instance_changes( + session_id=auth.session_id, + instance_changes_jsonl=instance_changes_jsonl, + ) + if auth.job_id and (applied_jsonl or instance_changes_jsonl): + from extraction.infrastructure.job_mutation_artifact_store import ( + append_job_mutation_artifacts, + ) + + append_job_mutation_artifacts( + knowledge_graph_id=auth.knowledge_graph_id, + job_id=auth.job_id, + applied_jsonl=applied_jsonl or None, + instance_changes_jsonl=instance_changes_jsonl or None, + ) from infrastructure.extraction_workload.workspace_readiness import ( build_workload_readiness_snapshot, diff --git a/src/api/graph/infrastructure/graph_repository.py b/src/api/graph/infrastructure/graph_repository.py index b8c9f74ad..5fe79d7fa 100644 --- a/src/api/graph/infrastructure/graph_repository.py +++ b/src/api/graph/infrastructure/graph_repository.py @@ -250,6 +250,62 @@ def find_existing_edge_ids( existing.add(str(row[0])) return existing + def find_nodes_by_ids( + self, + node_ids: list[str], + *, + knowledge_graph_id: str, + chunk_size: int = 200, + ) -> dict[str, NodeRecord]: + """Return node snapshots keyed by application id.""" + if not node_ids: + return {} + snapshots: dict[str, NodeRecord] = {} + kg = _escape_cypher_string(knowledge_graph_id) + for offset in range(0, len(node_ids), chunk_size): + chunk = node_ids[offset : offset + chunk_size] + literals = ", ".join(f"'{_escape_cypher_string(node_id)}'" for node_id in chunk) + query = f""" + MATCH (n {{graph_id: '{self._graph_id}', knowledge_graph_id: '{kg}'}}) + WHERE n.id IN [{literals}] + RETURN n + """ + result = self._client.execute_cypher(query) + for row in result.rows: + if not row or row[0] is None: + continue + node = self._vertex_to_node_record(row[0]) + snapshots[node.id] = node + return snapshots + + def find_edges_by_ids( + self, + edge_ids: list[str], + *, + knowledge_graph_id: str, + chunk_size: int = 200, + ) -> dict[str, EdgeRecord]: + """Return edge snapshots keyed by application id.""" + if not edge_ids: + return {} + snapshots: dict[str, EdgeRecord] = {} + kg = _escape_cypher_string(knowledge_graph_id) + for offset in range(0, len(edge_ids), chunk_size): + chunk = edge_ids[offset : offset + chunk_size] + literals = ", ".join(f"'{_escape_cypher_string(edge_id)}'" for edge_id in chunk) + query = f""" + MATCH ()-[r {{graph_id: '{self._graph_id}', knowledge_graph_id: '{kg}'}}]->() + WHERE r.id IN [{literals}] + RETURN r + """ + result = self._client.execute_cypher(query) + for row in result.rows: + if not row or row[0] is None: + continue + edge = self._edge_to_edge_record(row[0]) + snapshots[edge.id] = edge + return snapshots + def find_existing_slugs_for_entity_type( self, entity_type: str, diff --git a/src/api/infrastructure/extraction_workload/graph_reader.py b/src/api/infrastructure/extraction_workload/graph_reader.py index acda8b195..5b12c5c9d 100644 --- a/src/api/infrastructure/extraction_workload/graph_reader.py +++ b/src/api/infrastructure/extraction_workload/graph_reader.py @@ -308,3 +308,78 @@ async def partition_slugs_by_existence( existing_sorted = sorted(existing) missing_sorted = sorted(slug for slug in slugs if slug not in existing) return existing_sorted, missing_sorted + + async def fetch_nodes_by_ids( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + node_ids: tuple[str, ...], + ) -> dict[str, WorkloadGraphNode]: + if not node_ids: + return {} + + def _query() -> dict[str, WorkloadGraphNode]: + client = self._connect_for_tenant(tenant_id) + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=client.graph_name, + ) + nodes = repository.find_nodes_by_ids( + list(node_ids), + knowledge_graph_id=knowledge_graph_id, + ) + return { + node.id: WorkloadGraphNode( + id=node.id, + entity_type=node.label, + slug=node.properties.get("slug"), + properties=dict(node.properties), + ) + for node in nodes.values() + } + finally: + client.disconnect() + + return await asyncio.to_thread(_query) + + async def fetch_edges_by_ids( + self, + *, + tenant_id: str, + knowledge_graph_id: str, + edge_ids: tuple[str, ...], + ) -> dict[str, WorkloadGraphRelationship]: + if not edge_ids: + return {} + + def _query() -> dict[str, WorkloadGraphRelationship]: + client = self._connect_for_tenant(tenant_id) + try: + repository = GraphExtractionReadOnlyRepository( + client=client, + graph_id=client.graph_name, + ) + edges = repository.find_edges_by_ids( + list(edge_ids), + knowledge_graph_id=knowledge_graph_id, + ) + return { + edge.id: WorkloadGraphRelationship( + id=edge.id, + relationship_type=edge.label, + start_id=edge.start_id, + end_id=edge.end_id, + source_slug=None, + target_slug=None, + source_entity_type="", + target_entity_type="", + properties=dict(edge.properties), + ) + for edge in edges.values() + } + finally: + client.disconnect() + + return await asyncio.to_thread(_query) diff --git a/src/api/infrastructure/extraction_workload/instance_change_journal.py b/src/api/infrastructure/extraction_workload/instance_change_journal.py new file mode 100644 index 000000000..1ef4d6ae6 --- /dev/null +++ b/src/api/infrastructure/extraction_workload/instance_change_journal.py @@ -0,0 +1,183 @@ +"""Capture before/after graph instance snapshots for applied mutation batches.""" + +from __future__ import annotations + +from graph.domain.value_objects import EntityType, MutationOperation, MutationOperationType +from extraction.domain.instance_change_record import build_instance_change_record +from extraction.ports.workload_graph import IWorkloadGraphReader, WorkloadGraphNode, WorkloadGraphRelationship + +_INSTANCE_OPS = frozenset( + { + MutationOperationType.CREATE, + MutationOperationType.UPDATE, + MutationOperationType.DELETE, + } +) + + +async def capture_before_snapshots( + *, + tenant_id: str, + knowledge_graph_id: str, + operations: list[MutationOperation], + graph_reader: IWorkloadGraphReader, +) -> tuple[dict[str, WorkloadGraphNode], dict[str, WorkloadGraphRelationship]]: + node_ids = _collect_ids(operations, EntityType.NODE, include_create=False) + edge_ids = _collect_ids(operations, EntityType.EDGE, include_create=False) + nodes = await graph_reader.fetch_nodes_by_ids( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + node_ids=node_ids, + ) + edges = await graph_reader.fetch_edges_by_ids( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + edge_ids=edge_ids, + ) + return nodes, edges + + +async def capture_after_snapshots( + *, + tenant_id: str, + knowledge_graph_id: str, + operations: list[MutationOperation], + graph_reader: IWorkloadGraphReader, +) -> tuple[dict[str, WorkloadGraphNode], dict[str, WorkloadGraphRelationship]]: + node_ids = _collect_ids(operations, EntityType.NODE, include_delete=False) + edge_ids = _collect_ids(operations, EntityType.EDGE, include_delete=False) + nodes = await graph_reader.fetch_nodes_by_ids( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + node_ids=node_ids, + ) + edges = await graph_reader.fetch_edges_by_ids( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + edge_ids=edge_ids, + ) + return nodes, edges + + +def build_instance_change_records( + *, + operations: list[MutationOperation], + nodes_before: dict[str, WorkloadGraphNode], + edges_before: dict[str, WorkloadGraphRelationship], + nodes_after: dict[str, WorkloadGraphNode], + edges_after: dict[str, WorkloadGraphRelationship], +) -> list[dict]: + records: list[dict] = [] + for op in operations: + if op.op not in _INSTANCE_OPS or not op.id: + continue + instance_id = str(op.id) + if op.type == EntityType.NODE: + before_node = nodes_before.get(instance_id) + after_node = nodes_after.get(instance_id) + before_props = ( + None + if op.op == MutationOperationType.CREATE + else dict(before_node.properties) if before_node else None + ) + after_props = ( + None + if op.op == MutationOperationType.DELETE + else dict(after_node.properties) if after_node else dict(op.set_properties or {}) + ) + label = op.label or (after_node.entity_type if after_node else before_node.entity_type if before_node else None) + records.append( + build_instance_change_record( + op=op.op.value, + entity_kind=EntityType.NODE.value, + instance_id=instance_id, + label=label, + before=before_props, + after=after_props, + ) + ) + continue + + before_edge = edges_before.get(instance_id) + after_edge = edges_after.get(instance_id) + before_props = ( + None + if op.op == MutationOperationType.CREATE + else dict(before_edge.properties) if before_edge else None + ) + after_props = ( + None + if op.op == MutationOperationType.DELETE + else dict(after_edge.properties) if after_edge else dict(op.set_properties or {}) + ) + label = op.label or ( + after_edge.relationship_type if after_edge else before_edge.relationship_type if before_edge else None + ) + start_id = op.start_id or (after_edge.start_id if after_edge else before_edge.start_id if before_edge else None) + end_id = op.end_id or (after_edge.end_id if after_edge else before_edge.end_id if before_edge else None) + records.append( + build_instance_change_record( + op=op.op.value, + entity_kind=EntityType.EDGE.value, + instance_id=instance_id, + label=label, + before=before_props, + after=after_props, + start_id=start_id, + end_id=end_id, + ) + ) + return records + + +async def merge_instance_change_records( + *, + tenant_id: str, + knowledge_graph_id: str, + operations: list[MutationOperation], + graph_reader: IWorkloadGraphReader, + nodes_before: dict[str, WorkloadGraphNode] | None = None, + edges_before: dict[str, WorkloadGraphRelationship] | None = None, +) -> list[dict]: + """Build complete before/after records using snapshots captured around apply.""" + captured_nodes_before = nodes_before + captured_edges_before = edges_before + if captured_nodes_before is None or captured_edges_before is None: + captured_nodes_before, captured_edges_before = await capture_before_snapshots( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + operations=operations, + graph_reader=graph_reader, + ) + nodes_after, edges_after = await capture_after_snapshots( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + operations=operations, + graph_reader=graph_reader, + ) + return build_instance_change_records( + operations=operations, + nodes_before=captured_nodes_before, + edges_before=captured_edges_before, + nodes_after=nodes_after, + edges_after=edges_after, + ) + + +def _collect_ids( + operations: list[MutationOperation], + entity_type: EntityType, + *, + include_create: bool = True, + include_delete: bool = True, +) -> tuple[str, ...]: + ids: list[str] = [] + for op in operations: + if op.type != entity_type or op.op not in _INSTANCE_OPS or not op.id: + continue + if op.op == MutationOperationType.CREATE and not include_create: + continue + if op.op == MutationOperationType.DELETE and not include_delete: + continue + ids.append(str(op.id)) + return tuple(dict.fromkeys(ids)) diff --git a/src/api/infrastructure/extraction_workload/schema_service.py b/src/api/infrastructure/extraction_workload/schema_service.py index da86efb52..b0bf77dd1 100644 --- a/src/api/infrastructure/extraction_workload/schema_service.py +++ b/src/api/infrastructure/extraction_workload/schema_service.py @@ -17,12 +17,17 @@ prepare_mutation_operations, validate_mutation_jsonl, ) +from infrastructure.extraction_workload.instance_change_journal import ( + capture_before_snapshots, + merge_instance_change_records, +) from infrastructure.extraction_workload.workspace_readiness import ( sync_prepopulated_instance_counts, ) from graph.domain.value_objects import EntityType from management.domain.value_objects import OntologyConfig from management.ports.exceptions import CanonicalSchemaMutationError +from extraction.domain.instance_change_record import instance_changes_to_jsonl class GraphWorkloadSchemaService: @@ -129,7 +134,13 @@ async def apply_mutation_jsonl( return {"applied": False, "errors": [str(exc)]} if not define_ops and not instance_ops: - return {"applied": True, "errors": [], "operations_applied": 0, "applied_jsonl": ""} + return { + "applied": True, + "errors": [], + "operations_applied": 0, + "applied_jsonl": "", + "instance_changes_jsonl": "", + } errors: list[str] = [] operations_applied = 0 @@ -145,6 +156,15 @@ async def apply_mutation_jsonl( errors.append(str(exc)) if instance_ops and not errors: + nodes_before = {} + edges_before = {} + if self._graph_reader is not None: + nodes_before, edges_before = await capture_before_snapshots( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + operations=instance_ops, + graph_reader=self._graph_reader, + ) instance_result = await self._mutation_writer.apply_instance_operations( tenant_id=tenant_id, knowledge_graph_id=knowledge_graph_id, @@ -154,6 +174,9 @@ async def apply_mutation_jsonl( errors.extend(str(item) for item in instance_result.get("errors", [])) else: operations_applied = int(instance_result.get("operations_applied", 0)) + else: + nodes_before = {} + edges_before = {} if errors: await self._session.rollback() @@ -176,9 +199,21 @@ async def apply_mutation_jsonl( json.dumps(operation.model_dump(mode="json"), separators=(",", ":")) for operation in applied_operations ) + instance_changes_jsonl = "" + if instance_ops and self._graph_reader is not None: + change_records = await merge_instance_change_records( + tenant_id=tenant_id, + knowledge_graph_id=knowledge_graph_id, + operations=instance_ops, + graph_reader=self._graph_reader, + nodes_before=nodes_before, + edges_before=edges_before, + ) + instance_changes_jsonl = instance_changes_to_jsonl(change_records) return { "applied": True, "errors": [], "operations_applied": operations_applied, "applied_jsonl": applied_jsonl, + "instance_changes_jsonl": instance_changes_jsonl, } diff --git a/src/api/infrastructure/management/extraction_jobs_service.py b/src/api/infrastructure/management/extraction_jobs_service.py index f8174e251..4bd913eec 100644 --- a/src/api/infrastructure/management/extraction_jobs_service.py +++ b/src/api/infrastructure/management/extraction_jobs_service.py @@ -646,7 +646,12 @@ async def get_archived_job_mutations( "runStartedAt": job.run_started_at.isoformat() if job.run_started_at else None, "archivedAt": job.archived_at.isoformat() if job.archived_at else None, "jsonl": job.applied_mutations_jsonl or "", + "instanceChanges": job.applied_instance_changes_jsonl or "", "writeOps": job.write_ops(), + "entitiesCreated": job.entities_created, + "entitiesModified": job.entities_modified, + "relationshipsCreated": job.relationships_created, + "relationshipsModified": job.relationships_modified, } async def reset_completed_jobs(self, *, user_id: str, kg_id: str) -> dict[str, Any]: diff --git a/src/api/infrastructure/migrations/versions/l5m6n7o8p9q0_add_applied_instance_changes_jsonl.py b/src/api/infrastructure/migrations/versions/l5m6n7o8p9q0_add_applied_instance_changes_jsonl.py new file mode 100644 index 000000000..6077ca706 --- /dev/null +++ b/src/api/infrastructure/migrations/versions/l5m6n7o8p9q0_add_applied_instance_changes_jsonl.py @@ -0,0 +1,27 @@ +"""Add applied instance change snapshots to extraction jobs. + +Revision ID: l5m6n7o8p9q0 +Revises: k4l5m6n7o8p9 +Create Date: 2026-06-18 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +revision: str = "l5m6n7o8p9q0" +down_revision: Union[str, Sequence[str], None] = "k4l5m6n7o8p9" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "extraction_jobs", + sa.Column("applied_instance_changes_jsonl", sa.Text(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("extraction_jobs", "applied_instance_changes_jsonl") diff --git a/src/api/tests/unit/extraction/application/test_archived_extraction_history.py b/src/api/tests/unit/extraction/application/test_archived_extraction_history.py index 2ecec28bd..b4a1ed470 100644 --- a/src/api/tests/unit/extraction/application/test_archived_extraction_history.py +++ b/src/api/tests/unit/extraction/application/test_archived_extraction_history.py @@ -72,4 +72,6 @@ def test_serialize_archived_job_includes_camel_case_metrics() -> None: assert payload["inputTokens"] == 1200 assert payload["outputTokens"] == 400 assert payload["costUsd"] == 0.45 + assert payload["entitiesCreated"] == 0 + assert payload["entitiesModified"] == 2 assert payload["strategy"] == "graph_management_session" diff --git a/src/api/tests/unit/extraction/domain/test_instance_change_record.py b/src/api/tests/unit/extraction/domain/test_instance_change_record.py new file mode 100644 index 000000000..616a24124 --- /dev/null +++ b/src/api/tests/unit/extraction/domain/test_instance_change_record.py @@ -0,0 +1,51 @@ +"""Unit tests for instance change record helpers.""" + +from __future__ import annotations + +from extraction.domain.instance_change_record import ( + build_instance_change_record, + instance_changes_to_jsonl, + parse_instance_changes_jsonl, + property_changes, +) + + +def test_property_changes_detects_added_modified_removed_fields() -> None: + changes = property_changes( + {"name": "old", "status": "ready"}, + {"name": "new", "owner": "team-a"}, + ) + assert {"key": "name", "before": "old", "after": "new"} in changes + assert {"key": "owner", "before": None, "after": "team-a"} in changes + assert {"key": "status", "before": "ready", "after": None} in changes + + +def test_build_instance_change_record_for_create() -> None: + record = build_instance_change_record( + op="CREATE", + entity_kind="node", + instance_id="service:abc", + label="service", + before=None, + after={"name": "api", "slug": "api"}, + ) + assert record["before"] is None + assert record["after"]["properties"]["slug"] == "api" + assert record["property_changes"] + + +def test_instance_changes_jsonl_round_trip() -> None: + record = build_instance_change_record( + op="UPDATE", + entity_kind="edge", + instance_id="depends_on:abc", + label="depends_on", + before={"weight": 1}, + after={"weight": 2}, + start_id="service:a", + end_id="service:b", + ) + jsonl = instance_changes_to_jsonl([record]) + parsed = parse_instance_changes_jsonl(jsonl) + assert parsed[0]["id"] == "depends_on:abc" + assert parsed[0]["start_id"] == "service:a" diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue b/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue index 4bf265137..58e78197d 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionArchivedHistory.vue @@ -12,6 +12,22 @@ const props = defineProps<{ const { apiFetch } = useApiClient() +interface PropertyChange { + key: string + before: unknown + after: unknown +} + +interface InstanceChangeRecord { + op: string + type: string + id: string + label?: string | null + start_id?: string | null + end_id?: string | null + property_changes?: PropertyChange[] +} + interface ArchivedJob { jobId: string jobSet: string @@ -32,6 +48,7 @@ interface ArchivedJob { writeOps: number instanceCount: number hasMutations: boolean + hasInstanceChanges?: boolean } interface ArchivedJobSetGroup { @@ -56,14 +73,23 @@ interface ArchivedHistoryPayload { runs: ArchivedRunGroup[] } +interface ArchivedJobDetail { + jsonl: string + instanceChanges: string + entitiesCreated?: number + entitiesModified?: number + relationshipsCreated?: number + relationshipsModified?: number +} + const loading = ref(false) const error = ref<string | null>(null) const payload = ref<ArchivedHistoryPayload | null>(null) const selectedRunIndex = ref(0) const selectedJobSetIndex = ref(0) const selectedJobId = ref<string | null>(null) -const mutationJsonl = ref<string | null>(null) -const mutationLoading = ref(false) +const jobDetail = ref<ArchivedJobDetail | null>(null) +const detailLoading = ref(false) const selectedRun = computed(() => payload.value?.runs[selectedRunIndex.value] ?? null) const selectedJobSet = computed(() => selectedRun.value?.jobSets[selectedJobSetIndex.value] ?? null) @@ -71,6 +97,27 @@ const selectedJob = computed( () => selectedJobSet.value?.jobs.find((job) => job.jobId === selectedJobId.value) ?? null, ) +const instanceChanges = computed<InstanceChangeRecord[]>(() => { + const raw = jobDetail.value?.instanceChanges?.trim() + if (!raw) return [] + return raw.split('\n').flatMap((line) => { + const trimmed = line.trim() + if (!trimmed) return [] + try { + return [JSON.parse(trimmed) as InstanceChangeRecord] + } catch { + return [] + } + }) +}) + +const detailMetrics = computed(() => ({ + entitiesCreated: jobDetail.value?.entitiesCreated ?? selectedJob.value?.entitiesCreated ?? 0, + entitiesModified: jobDetail.value?.entitiesModified ?? selectedJob.value?.entitiesModified ?? 0, + relationshipsCreated: jobDetail.value?.relationshipsCreated ?? selectedJob.value?.relationshipsCreated ?? 0, + relationshipsModified: jobDetail.value?.relationshipsModified ?? selectedJob.value?.relationshipsModified ?? 0, +})) + async function loadHistory() { loading.value = true error.value = null @@ -81,7 +128,7 @@ async function loadHistory() { selectedRunIndex.value = 0 selectedJobSetIndex.value = 0 selectedJobId.value = payload.value?.runs[0]?.jobSets[0]?.jobs[0]?.jobId ?? null - await loadSelectedMutations() + await loadSelectedDetail() } catch (e: unknown) { error.value = e instanceof Error ? e.message : 'Failed to load graph writes history' payload.value = null @@ -90,19 +137,18 @@ async function loadHistory() { } } -async function loadSelectedMutations() { - mutationJsonl.value = null +async function loadSelectedDetail() { + jobDetail.value = null if (!selectedJobId.value) return - mutationLoading.value = true + detailLoading.value = true try { - const detail = await apiFetch<{ jsonl: string }>( + jobDetail.value = await apiFetch<ArchivedJobDetail>( `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs/jobs/${encodeURIComponent(selectedJobId.value)}/archived-mutations`, ) - mutationJsonl.value = detail.jsonl || '' } catch { - mutationJsonl.value = null + jobDetail.value = null } finally { - mutationLoading.value = false + detailLoading.value = false } } @@ -110,18 +156,18 @@ function selectRun(index: number) { selectedRunIndex.value = index selectedJobSetIndex.value = 0 selectedJobId.value = payload.value?.runs[index]?.jobSets[0]?.jobs[0]?.jobId ?? null - void loadSelectedMutations() + void loadSelectedDetail() } function selectJobSet(index: number) { selectedJobSetIndex.value = index selectedJobId.value = selectedRun.value?.jobSets[index]?.jobs[0]?.jobId ?? null - void loadSelectedMutations() + void loadSelectedDetail() } function selectJob(jobId: string) { selectedJobId.value = jobId - void loadSelectedMutations() + void loadSelectedDetail() } function formatWhen(value: string | null | undefined): string { @@ -136,6 +182,12 @@ function formatCost(value: number | null | undefined): string { return `$${amount.toFixed(2)}` } +function formatValue(value: unknown): string { + if (value === null || value === undefined || value === '') return '—' + if (typeof value === 'object') return JSON.stringify(value) + return String(value) +} + function jobKindLabel(job: ArchivedJob): string { return job.strategy === 'graph_management_session' ? 'GMA session' : 'Extraction job' } @@ -144,6 +196,11 @@ function jobKindVariant(job: ArchivedJob): 'secondary' | 'outline' { return job.strategy === 'graph_management_session' ? 'secondary' : 'outline' } +function instanceTitle(change: InstanceChangeRecord): string { + const label = change.label ? `${change.label} · ` : '' + return `${change.op} ${change.type} · ${label}${change.id}` +} + watch( () => props.kgId, () => { void loadHistory() }, @@ -246,17 +303,15 @@ watch( </div> <div v-if="selectedJob" class="rounded border p-3 text-xs"> - <div class="flex flex-wrap items-center gap-2"> - <Badge variant="outline">{{ selectedJob.status }}</Badge> - <Badge :variant="jobKindVariant(selectedJob)">{{ jobKindLabel(selectedJob) }}</Badge> - <span v-if="selectedJob.workerId" class="font-mono text-muted-foreground">{{ selectedJob.workerId }}</span> + <div v-if="selectedJob.workerId" class="font-mono text-muted-foreground"> + Worker {{ selectedJob.workerId }} </div> - <Separator class="my-2" /> + <Separator v-if="selectedJob.workerId" class="my-2" /> <div class="grid gap-1 text-muted-foreground sm:grid-cols-2"> - <p>{{ selectedJob.entitiesCreated }} entities created</p> - <p>{{ selectedJob.entitiesModified }} entities modified</p> - <p>{{ selectedJob.relationshipsCreated }} relationships created</p> - <p>{{ selectedJob.relationshipsModified }} relationships modified</p> + <p>{{ detailMetrics.entitiesCreated }} entities created</p> + <p>{{ detailMetrics.entitiesModified }} entities modified</p> + <p>{{ detailMetrics.relationshipsCreated }} relationships created</p> + <p>{{ detailMetrics.relationshipsModified }} relationships modified</p> <p class="font-medium text-foreground sm:col-span-2"> {{ selectedJob.writeOps }} total write ops · {{ formatCost(selectedJob.costUsd) }} </p> @@ -265,18 +320,59 @@ watch( <div class="rounded border"> <div class="border-b px-3 py-2"> - <p class="text-xs font-medium text-muted-foreground">Applied mutations (JSONL)</p> + <p class="text-xs font-medium text-muted-foreground">Instance changes</p> </div> - <div v-if="mutationLoading" class="flex items-center gap-2 px-3 py-4 text-xs text-muted-foreground"> + <div v-if="detailLoading" class="flex items-center gap-2 px-3 py-4 text-xs text-muted-foreground"> <Loader2 class="size-3.5 animate-spin" /> - Loading mutations... + Loading instance changes... + </div> + <div v-else-if="instanceChanges.length" class="max-h-96 space-y-3 overflow-auto p-3"> + <div + v-for="change in instanceChanges" + :key="`${change.id}-${change.op}`" + class="rounded border p-2" + > + <p class="font-medium">{{ instanceTitle(change) }}</p> + <p + v-if="change.start_id || change.end_id" + class="mt-1 font-mono text-[10px] text-muted-foreground" + > + {{ change.start_id || '—' }} → {{ change.end_id || '—' }} + </p> + <div v-if="change.property_changes?.length" class="mt-2 overflow-x-auto"> + <table class="w-full text-[10px]"> + <thead> + <tr class="text-left text-muted-foreground"> + <th class="pb-1 pr-2 font-medium">Property</th> + <th class="pb-1 pr-2 font-medium">Before</th> + <th class="pb-1 font-medium">After</th> + </tr> + </thead> + <tbody> + <tr + v-for="row in change.property_changes" + :key="`${change.id}-${row.key}`" + class="border-t border-border/60" + > + <td class="py-1 pr-2 align-top font-mono">{{ row.key }}</td> + <td class="py-1 pr-2 align-top text-red-600 dark:text-red-400"> + {{ formatValue(row.before) }} + </td> + <td class="py-1 align-top text-green-600 dark:text-green-400"> + {{ formatValue(row.after) }} + </td> + </tr> + </tbody> + </table> + </div> + <p v-else class="mt-2 text-[10px] text-muted-foreground"> + No property-level diff recorded for this instance. + </p> + </div> </div> - <pre - v-else-if="mutationJsonl" - class="max-h-64 overflow-auto p-3 font-mono text-[10px] leading-relaxed whitespace-pre-wrap break-all" - >{{ mutationJsonl }}</pre> <p v-else class="px-3 py-4 text-xs text-muted-foreground"> - No stored mutation JSONL for this entry (token-only GMA session or no graph writes). + No stored instance change history for this entry. New extraction and maintenance jobs + capture before/after snapshots automatically when graph writes are applied. </p> </div> </div> From 4c14113b3822f40c712e012951d94a344ae7b4a9 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Thu, 18 Jun 2026 15:55:28 -0400 Subject: [PATCH 147/153] feat(dev-ui): redesign Maintain step with live job activity and clearer actions Replace maintenance run history with extraction-style live progress, stack recurring schedule below run controls, and align Run maintenance / Run extraction button labels with their actual behavior. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../GraphExtractionJobsWorkspace.vue | 2 +- .../GraphMaintenanceWorkspace.vue | 497 +++++++++++++++--- .../knowledge-graph-manage-workspace.test.ts | 10 + 3 files changed, 428 insertions(+), 81 deletions(-) diff --git a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue index b4a0d58ca..2f3f5a44a 100644 --- a/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphExtractionJobsWorkspace.vue @@ -571,7 +571,7 @@ onUnmounted(() => { <div class="flex flex-wrap gap-2"> <Button size="sm" :disabled="startingExtraction" @click="startExtraction"> <Loader2 v-if="startingExtraction" class="mr-1.5 size-3.5 animate-spin" /> - Start + Run extraction </Button> <Button size="sm" variant="outline" :disabled="pausingExtraction" @click="pauseExtraction"> Pause diff --git a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue index 0c3219251..06483d418 100644 --- a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue @@ -1,21 +1,22 @@ <script setup lang="ts"> -import { computed, onMounted, onUnmounted, ref } from 'vue' +import { computed, onMounted, onUnmounted, ref, watch } from 'vue' import { toast } from 'vue-sonner' import { Calendar, + Eye, GitBranch, Loader2, Play, RefreshCw, Settings, - ArrowRight, + XCircle, } from 'lucide-vue-next' +import GraphExtractionJobWatchDialog from '@/components/graph-management/GraphExtractionJobWatchDialog.vue' import { Card, CardHeader, CardTitle, CardDescription, CardContent } from '@/components/ui/card' import { Button } from '@/components/ui/button' import { Badge } from '@/components/ui/badge' import { Input } from '@/components/ui/input' import { isMaintenanceReady } from '@/utils/kgManageWorkspace' -import { buildGraphManagementStepUrl } from '@/utils/kgGraphManagement' import { commitStatusClass, formatFilesOnDisk, @@ -31,10 +32,21 @@ import { cronToDailyTime, dailyTimeToCron, formatMaintenanceRunOutcome, - maintenanceRunOutcomeVariant, MAINTENANCE_TIMEZONE_OPTIONS, } from '@/utils/kgMaintenanceSchedule' +const MAINTENANCE_JOB_SET = 'maintenance' + +type RecentJobStatusFilter = 'all' | 'pending' | 'in_progress' | 'archived' | 'failed' + +const RECENT_JOB_STATUS_FILTERS: Array<{ value: RecentJobStatusFilter; label: string }> = [ + { value: 'all', label: 'All' }, + { value: 'pending', label: 'Pending' }, + { value: 'in_progress', label: 'In progress' }, + { value: 'archived', label: 'Archived' }, + { value: 'failed', label: 'Failed' }, +] + const props = defineProps<{ kgId: string }>() @@ -90,15 +102,53 @@ interface ExtractionRunState { interface DbStatus { jobsByStatus: Record<string, number> + jobsBySet?: Record<string, { pending: number; in_progress: number; completed: number; failed: number; total: number }> + recentJobs: Array<{ + jobId: string + jobSet: string + status: string + workerId: string | null + startedAt: string | null + completedAt: string | null + inputTokens: number + outputTokens: number + writeOps: number + entitiesCreated?: number + entitiesModified?: number + relationshipsCreated?: number + errorMessage?: string | null + }> + activeWorkers?: Array<{ + workerId: string + jobId: string + jobSet: string + strategy: string + instanceCount: number + startedAt: string | null + }> +} + +type RecentJobEvent = DbStatus['recentJobs'][number] & { + eventKey: string + seenAtMs: number } const loading = ref(true) const refreshing = ref(false) const dataSources = ref<DataSourceRow[]>([]) const schedule = ref<MaintenanceSchedule | null>(null) -const runHistory = ref<MaintenanceRun[]>([]) const extractionRunState = ref<ExtractionRunState | null>(null) const dbStatus = ref<DbStatus | null>(null) +const pausingExtraction = ref(false) +const killingExtraction = ref(false) +const optimisticLiveUntilMs = ref<number | null>(null) +const nowMs = ref(Date.now()) +const lastStatusRefreshMs = ref<number | null>(null) +const recentJobEvents = ref<RecentJobEvent[]>([]) +const recentJobStatusFilter = ref<RecentJobStatusFilter>('all') +const watchJobId = ref<string | null>(null) +const watchDialogOpen = ref(false) +const cancellingJobId = ref<string | null>(null) const scheduleEnabled = ref(false) const scheduleTime = ref('02:00') @@ -114,6 +164,11 @@ const updatingLocalCommits = ref(false) const runningMaintenance = ref(false) let refreshInterval: ReturnType<typeof setInterval> | null = null +let clockInterval: ReturnType<typeof setInterval> | null = null + +const extractionJobsBasePath = computed( + () => `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs`, +) const maintenanceReadySources = computed(() => dataSources.value.filter((ds) => isMaintenanceReady(ds)), @@ -137,15 +192,66 @@ const estimatedJobsFromFiles = computed(() => { return Math.ceil(total / normalizedFilesPerJob.value) }) -const pendingJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.pending || 0)) -const inProgressJobsCount = computed(() => Number(dbStatus.value?.jobsByStatus?.in_progress || 0)) -const extractionLive = computed(() => - Boolean(extractionRunState.value?.live || inProgressJobsCount.value > 0), +const workerCount = computed(() => + Math.min(MAX_MAINTENANCE_WORKERS, Math.max(1, Math.floor(Number(workers.value) || 1))), ) - -const extractionJobsUrl = computed(() => - buildGraphManagementStepUrl(props.kgId, 'extraction-jobs'), +const maintenanceSetStats = computed( + () => dbStatus.value?.jobsBySet?.[MAINTENANCE_JOB_SET] ?? { + pending: 0, + in_progress: 0, + completed: 0, + failed: 0, + total: 0, + }, ) +const pendingJobsCount = computed(() => maintenanceSetStats.value.pending) +const inProgressJobsCount = computed(() => maintenanceSetStats.value.in_progress) +const completedJobsCount = computed(() => maintenanceSetStats.value.completed) +const failedJobsCount = computed(() => maintenanceSetStats.value.failed) +const remainingJobsCount = computed(() => pendingJobsCount.value + inProgressJobsCount.value) +const activeQueueJobsTotal = computed( + () => pendingJobsCount.value + inProgressJobsCount.value + failedJobsCount.value + completedJobsCount.value, +) +const extractionRunLive = computed(() => { + if (optimisticLiveUntilMs.value && nowMs.value < optimisticLiveUntilMs.value) return true + return Boolean(extractionRunState.value?.live) +}) +const hasRunningJobs = computed(() => inProgressJobsCount.value > 0) +const extractionLive = computed(() => extractionRunLive.value || hasRunningJobs.value) +const maintenanceProgressPercent = computed(() => { + const total = activeQueueJobsTotal.value + if (total <= 0) return 0 + return Math.round(((completedJobsCount.value + failedJobsCount.value) / total) * 100) +}) +const maintenanceRecentJobs = computed(() => { + const maintenanceOnly = recentJobEvents.value.filter((event) => event.jobSet === MAINTENANCE_JOB_SET) + if (recentJobStatusFilter.value === 'all') return maintenanceOnly + return maintenanceOnly.filter((event) => event.status === recentJobStatusFilter.value) +}) +const activeWorkerCount = computed( + () => (dbStatus.value?.activeWorkers || []).filter((worker) => worker.jobSet === MAINTENANCE_JOB_SET).length, +) +const idleWorkerCount = computed(() => Math.max(0, workerCount.value - activeWorkerCount.value)) +const statusAgeSeconds = computed(() => { + if (!lastStatusRefreshMs.value) return null + return Math.max(0, Math.floor((nowMs.value - lastStatusRefreshMs.value) / 1000)) +}) +const showOptimisticLiveActivity = computed( + () => Boolean(optimisticLiveUntilMs.value && nowMs.value < optimisticLiveUntilMs.value), +) +const recentJobsEmptyMessage = computed(() => { + if (runningMaintenance.value || showOptimisticLiveActivity.value) { + return 'Starting maintenance workers. Job events will appear as jobs are claimed and completed.' + } + if (recentJobEvents.value.filter((event) => event.jobSet === MAINTENANCE_JOB_SET).length === 0) { + return 'No maintenance job events yet. Run maintenance to materialize by-file jobs and start workers.' + } + const filterLabel = RECENT_JOB_STATUS_FILTERS.find( + (option) => option.value === recentJobStatusFilter.value, + )?.label + if (recentJobStatusFilter.value === 'all') return 'No maintenance job events yet.' + return `No ${filterLabel?.toLowerCase() ?? recentJobStatusFilter.value} maintenance job events in the recent window.` +}) function resolveApiError(e: unknown): string { const err = e as { data?: { detail?: unknown }; message?: string } @@ -191,27 +297,143 @@ async function loadSchedule() { if (payload.worker_count) workers.value = payload.worker_count } -async function loadRunHistory() { - const payload = await apiFetch<{ runs: MaintenanceRun[] }>( - `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/maintenance-runs?limit=20`, - ) - runHistory.value = payload.runs || [] -} - async function loadExtractionState() { - const base = `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/extraction-jobs` + const base = extractionJobsBasePath.value try { extractionRunState.value = await apiFetch<ExtractionRunState>(`${base}/run-state`) } catch { extractionRunState.value = null } try { - dbStatus.value = await apiFetch<DbStatus>(`${base}/database-status`) + const status = await apiFetch<DbStatus>(`${base}/database-status`) + dbStatus.value = status + mergeRecentJobEvents(status) + lastStatusRefreshMs.value = Date.now() } catch { dbStatus.value = null } } +function mergeRecentJobEvents(status: DbStatus) { + const incoming = (status.recentJobs || []).filter((job) => job.jobSet === MAINTENANCE_JOB_SET) + const now = Date.now() + const activeWorkerJobIds = new Set( + (status.activeWorkers || []) + .filter((worker) => worker.jobSet === MAINTENANCE_JOB_SET) + .map((worker) => worker.jobId), + ) + const inProgressCount = Number(status.jobsBySet?.[MAINTENANCE_JOB_SET]?.in_progress || 0) + const existingByJobId = new Map( + recentJobEvents.value.map((event) => [event.jobId, event] as const), + ) + for (const job of incoming) { + existingByJobId.set(job.jobId, { ...job, eventKey: job.jobId, seenAtMs: now }) + } + const maxAgeMs = 15 * 60 * 1000 + let merged = Array.from(existingByJobId.values()).filter((event) => now - event.seenAtMs <= maxAgeMs) + if (inProgressCount === 0) { + merged = merged.filter( + (event) => event.status !== 'in_progress' || activeWorkerJobIds.has(event.jobId), + ) + } + merged.sort((a, b) => { + const aTs = Date.parse(a.completedAt || a.startedAt || '') || a.seenAtMs + const bTs = Date.parse(b.completedAt || b.startedAt || '') || b.seenAtMs + return bTs - aTs + }) + recentJobEvents.value = merged.slice(0, 80) +} + +function clearRecentJobEvents() { + recentJobEvents.value = [] +} + +function openWatch(jobId: string) { + watchJobId.value = jobId + watchDialogOpen.value = true +} + +function recentJobBadgeVariant(status: string): 'default' | 'outline' | 'secondary' | 'destructive' | 'success' { + if (status === 'in_progress') return 'default' + if (status === 'failed') return 'destructive' + if (status === 'completed') return 'success' + if (status === 'archived') return 'secondary' + return 'outline' +} + +function formatRecentWhen(startedAt: string | null, completedAt: string | null): string { + if (completedAt && startedAt) { + const startMs = Date.parse(startedAt) + const endMs = Date.parse(completedAt) + if (Number.isFinite(startMs) && Number.isFinite(endMs) && endMs >= startMs) { + const deltaSec = Math.max(0, Math.floor((endMs - startMs) / 1000)) + if (deltaSec < 60) return `${deltaSec}s` + const mins = Math.floor(deltaSec / 60) + const secs = deltaSec % 60 + if (mins < 60) return `${mins}m ${secs}s` + const hours = Math.floor(mins / 60) + return `${hours}h ${mins % 60}m` + } + } + return completedAt || startedAt || '—' +} + +function formatCompactNumber(value: number): string { + return new Intl.NumberFormat(undefined, { notation: 'compact', maximumFractionDigits: 1 }).format(value) +} + +function canCancelJob(status: string): boolean { + return status === 'pending' || status === 'in_progress' +} + +async function pauseExtractionWorkers() { + pausingExtraction.value = true + try { + const res = await apiFetch<{ message?: string }>(`${extractionJobsBasePath.value}/pause`, { + method: 'POST', + }) + toast.success('Pause requested', { description: res.message }) + await refreshAll({ background: true }) + } catch (e: unknown) { + toast.error('Failed to pause workers', { description: resolveApiError(e) }) + } finally { + pausingExtraction.value = false + } +} + +async function killExtractionWorkers() { + killingExtraction.value = true + try { + const res = await apiFetch<{ message?: string }>(`${extractionJobsBasePath.value}/halt`, { + method: 'POST', + }) + toast.success('Workers stopped', { description: res.message }) + optimisticLiveUntilMs.value = null + stopFastAutoRefresh() + await refreshAll({ background: true }) + } catch (e: unknown) { + toast.error('Failed to stop workers', { description: resolveApiError(e) }) + } finally { + killingExtraction.value = false + } +} + +async function cancelJob(jobId: string) { + cancellingJobId.value = jobId + try { + const res = await apiFetch<{ message?: string }>( + `${extractionJobsBasePath.value}/jobs/${encodeURIComponent(jobId)}/cancel`, + { method: 'POST' }, + ) + toast.success('Job cancelled', { description: res.message }) + await refreshAll({ background: true }) + } catch (e: unknown) { + toast.error('Cancel failed', { description: resolveApiError(e) }) + } finally { + cancellingJobId.value = null + } +} + async function refreshAll(options?: { background?: boolean }) { const background = options?.background ?? false if (background) refreshing.value = true @@ -220,7 +442,6 @@ async function refreshAll(options?: { background?: boolean }) { await Promise.all([ loadDataSources(), loadSchedule(), - loadRunHistory(), loadExtractionState(), ]) } catch (e: unknown) { @@ -362,7 +583,9 @@ async function runMaintenanceNow(options?: { startExtraction?: boolean }) { } async function runMaintenancePipeline() { + optimisticLiveUntilMs.value = Date.now() + 30000 await runMaintenanceNow({ startExtraction: true }) + startFastAutoRefresh() } function startAutoRefresh() { @@ -370,28 +593,56 @@ function startAutoRefresh() { refreshInterval = setInterval(() => { void refreshAll({ background: true }) }, 3000) } +function startFastAutoRefresh() { + stopAutoRefresh() + refreshInterval = setInterval(() => { void refreshAll({ background: true }) }, 1500) +} + function stopAutoRefresh() { if (!refreshInterval) return clearInterval(refreshInterval) refreshInterval = null } +function stopFastAutoRefresh() { + stopAutoRefresh() + startAutoRefresh() +} + onMounted(async () => { await refreshAll() startAutoRefresh() + clockInterval = setInterval(() => { nowMs.value = Date.now() }, 1000) }) +watch( + () => extractionRunLive.value || hasRunningJobs.value, + (active) => { + if (active) startFastAutoRefresh() + else if (!optimisticLiveUntilMs.value) stopFastAutoRefresh() + }, + { immediate: true }, +) + onUnmounted(() => { stopAutoRefresh() + if (clockInterval) clearInterval(clockInterval) }) </script> <template> <div class="space-y-6"> <div class="flex flex-wrap items-start justify-between gap-3"> - <div class="space-y-1"> + <div class="max-w-3xl space-y-2"> <p class="text-sm text-muted-foreground"> - Schedule and run incremental maintenance: sync changed sources, then execute extraction jobs. + Maintenance jobs keep your knowledge graph aligned with upstream repository changes. + Each job is <span class="font-medium text-foreground">by-file</span>: changed files since + the last extraction baseline are batched and processed so the graph reflects what is on + disk for those paths. + </p> + <p class="text-sm text-muted-foreground"> + Run maintenance manually below when you are ready, or schedule recurring maintenance jobs + to sync and extract on a daily cadence. Completed runs appear in Graph Writes History. </p> </div> <Button variant="outline" size="sm" :disabled="refreshing || loading" @click="refreshAll({ background: true })"> @@ -531,15 +782,16 @@ onUnmounted(() => { </CardContent> </Card> - <div class="grid gap-6 lg:grid-cols-2"> + <div class="space-y-6"> <Card> <CardHeader> <CardTitle class="flex items-center gap-2 text-base"> <Play class="size-4 text-primary" /> - Run maintenance jobs + Run maintenance </CardTitle> <CardDescription> - Set files per job and worker concurrency, then run maintenance across all data sources. + Materialize by-file maintenance jobs from changed sources, then run parallel workers + until the queue drains. Per-job results appear in Graph Writes History. </CardDescription> </CardHeader> <CardContent class="space-y-4"> @@ -554,7 +806,7 @@ onUnmounted(() => { /> </div> <div class="space-y-1.5"> - <label for="maintain-workers" class="text-sm font-medium">Parallel workers</label> + <label for="maintain-workers" class="text-sm font-medium">Worker concurrency</label> <Input id="maintain-workers" v-model.number="workers" @@ -565,47 +817,156 @@ onUnmounted(() => { </div> <div class="rounded-lg border bg-muted/20 p-3"> - <p class="text-xs font-medium text-foreground/90">Maintain run preview</p> - <div class="mt-2 grid gap-2 sm:grid-cols-3"> + <p class="text-xs font-medium text-foreground/90">Run preview</p> + <div class="mt-2 grid gap-2 sm:grid-cols-2 lg:grid-cols-3"> <div class="rounded-md border bg-background px-3 py-2"> <p class="text-[11px] uppercase tracking-wide text-muted-foreground">Changed files</p> <p class="text-lg font-semibold tabular-nums">{{ totalChangedFiles }}</p> </div> - <div class="rounded-md border bg-background px-3 py-2"> - <p class="text-[11px] uppercase tracking-wide text-muted-foreground">Files per job</p> - <p class="text-lg font-semibold tabular-nums">{{ normalizedFilesPerJob }}</p> - </div> <div class="rounded-md border bg-background px-3 py-2"> <p class="text-[11px] uppercase tracking-wide text-muted-foreground">Estimated jobs</p> <p class="text-lg font-semibold tabular-nums">{{ estimatedJobsFromFiles }}</p> </div> + <div class="rounded-md border bg-background px-3 py-2"> + <p class="text-[11px] uppercase tracking-wide text-muted-foreground">Remaining jobs</p> + <p class="text-lg font-semibold tabular-nums">{{ remainingJobsCount }}</p> + </div> </div> <p class="mt-2 text-xs text-muted-foreground"> - Extraction queue: {{ pendingJobsCount }} ready · {{ inProgressJobsCount }} running + Maintenance queue: {{ pendingJobsCount }} ready · {{ inProgressJobsCount }} running <span v-if="extractionLive"> · live</span> </p> </div> - <div class="flex flex-wrap gap-2"> - <Button :disabled="runningMaintenance" @click="runMaintenanceNow()"> - <Loader2 v-if="runningMaintenance" class="mr-2 size-4 animate-spin" /> - Sync changed sources - </Button> + <div class="flex flex-wrap items-end gap-2"> <Button - variant="outline" :disabled="runningMaintenance || maintenanceReadySources.length === 0" @click="runMaintenancePipeline" > - Run full pipeline + <Loader2 v-if="runningMaintenance" class="mr-2 size-4 animate-spin" /> + Run maintenance + </Button> + <Button size="sm" variant="outline" :disabled="pausingExtraction" @click="pauseExtractionWorkers"> + Pause + </Button> + <Button size="sm" variant="destructive" :disabled="killingExtraction" @click="killExtractionWorkers"> + Kill </Button> </div> - <Button as-child variant="link" class="h-auto px-0 text-xs"> - <NuxtLink :to="extractionJobsUrl" class="inline-flex items-center gap-1"> - Configure job sets and monitor workers - <ArrowRight class="size-3.5" /> - </NuxtLink> - </Button> + <div class="grid gap-3 sm:grid-cols-2 text-sm"> + <div class="rounded-lg border bg-muted/30 p-3"> + <p class="text-xs text-muted-foreground">Remaining maintenance jobs</p> + <p class="text-lg font-semibold">{{ remainingJobsCount }}</p> + </div> + <div class="rounded-lg border bg-muted/30 p-3"> + <p class="text-xs text-muted-foreground">Progress</p> + <p class="text-lg font-semibold">{{ maintenanceProgressPercent }}%</p> + </div> + </div> + + <div class="rounded-lg border bg-card p-3"> + <div class="mb-2 flex flex-wrap items-center justify-between gap-2"> + <p class="text-xs font-medium text-foreground/90">Live maintenance activity</p> + <div class="flex flex-wrap items-center gap-1.5"> + <Badge variant="outline" class="font-mono text-[11px]"> + {{ completedJobsCount }} completed · {{ inProgressJobsCount }} running · {{ pendingJobsCount }} ready + </Badge> + <Badge variant="outline" class="font-mono text-[11px]"> + workers: {{ activeWorkerCount }}/{{ workerCount }} + </Badge> + <Badge v-if="idleWorkerCount > 0" variant="outline" class="font-mono text-[11px]"> + {{ idleWorkerCount }} idle + </Badge> + <Badge v-if="statusAgeSeconds !== null" variant="outline" class="font-mono text-[11px]"> + updated {{ statusAgeSeconds }}s ago + </Badge> + </div> + </div> + <div class="mb-3 h-1.5 overflow-hidden rounded-full bg-muted"> + <div + class="h-full bg-primary/80 transition-all" + :style="{ width: `${maintenanceProgressPercent}%` }" + /> + </div> + <div class="space-y-2"> + <div class="flex flex-wrap items-center justify-between gap-2"> + <div class="flex flex-wrap items-center gap-2"> + <p class="text-xs font-medium text-foreground/90">Recent maintenance jobs</p> + <div class="flex flex-wrap gap-1"> + <Button + v-for="option in RECENT_JOB_STATUS_FILTERS" + :key="option.value" + variant="ghost" + size="sm" + class="h-7 px-2 text-[11px]" + :class="recentJobStatusFilter === option.value ? 'bg-muted text-foreground' : 'text-muted-foreground'" + @click="recentJobStatusFilter = option.value" + > + {{ option.label }} + </Button> + </div> + </div> + <Button + variant="ghost" + size="sm" + class="h-7 px-2 text-[11px]" + :disabled="maintenanceRecentJobs.length === 0" + @click="clearRecentJobEvents" + > + Clear events + </Button> + </div> + <div v-if="maintenanceRecentJobs.length === 0" class="text-xs text-muted-foreground"> + {{ recentJobsEmptyMessage }} + </div> + <div v-else class="max-h-64 space-y-1 overflow-y-auto pr-1"> + <div + v-for="job in maintenanceRecentJobs" + :key="`recent-${job.jobId}`" + class="rounded-md border bg-muted/10 px-2 py-1.5" + > + <div class="flex flex-wrap items-center justify-between gap-2 text-[11px]"> + <div class="flex flex-wrap items-center gap-2"> + <Badge :variant="recentJobBadgeVariant(job.status)" class="font-mono">{{ job.status }}</Badge> + <span class="font-mono text-muted-foreground">{{ job.jobId }}</span> + </div> + <div class="flex flex-wrap items-center gap-2 text-muted-foreground"> + <span v-if="job.workerId" class="font-mono">{{ job.workerId }}</span> + <span>{{ formatRecentWhen(job.startedAt, job.completedAt) }}</span> + <Button + variant="ghost" + size="sm" + class="h-6 px-2 text-[10px]" + @click="openWatch(job.jobId)" + > + <Eye class="mr-1 size-3" /> + Watch + </Button> + <Button + v-if="canCancelJob(job.status)" + variant="ghost" + size="sm" + class="h-6 px-2 text-[10px] text-destructive hover:text-destructive" + :disabled="cancellingJobId === job.jobId" + @click="cancelJob(job.jobId)" + > + <Loader2 v-if="cancellingJobId === job.jobId" class="mr-1 size-3 animate-spin" /> + <XCircle v-else class="mr-1 size-3" /> + Cancel + </Button> + </div> + </div> + <div class="flex flex-wrap items-center gap-2 text-[10px] text-muted-foreground"> + <span class="font-mono"> + tokens {{ formatCompactNumber(job.inputTokens) }} in / {{ formatCompactNumber(job.outputTokens) }} out + </span> + <span class="font-mono">writes {{ job.writeOps }}</span> + </div> + </div> + </div> + </div> + </div> </CardContent> </Card> @@ -613,16 +974,16 @@ onUnmounted(() => { <CardHeader> <CardTitle class="flex items-center gap-2 text-base"> <Calendar class="size-4 text-primary" /> - Scheduled maintenance + Schedule recurring maintenance jobs </CardTitle> <CardDescription> - Daily cron schedule for automatic maintenance orchestration (sync changed sources). + Daily schedule to sync changed sources and run maintenance extraction automatically. </CardDescription> </CardHeader> <CardContent class="space-y-4"> <label class="flex items-center gap-2 text-sm"> <input v-model="scheduleEnabled" type="checkbox" class="size-4 rounded border" /> - Enable scheduled maintenance + Enable recurring maintenance schedule </label> <div class="grid gap-3 sm:grid-cols-2"> @@ -665,35 +1026,11 @@ onUnmounted(() => { </Card> </div> - <Card> - <CardHeader> - <CardTitle class="text-base">Maintenance run history</CardTitle> - <CardDescription>Recent manual and scheduled maintenance orchestration attempts.</CardDescription> - </CardHeader> - <CardContent> - <div v-if="runHistory.length === 0" class="text-sm text-muted-foreground"> - No maintenance runs recorded yet. - </div> - <div v-else class="space-y-2"> - <div - v-for="run in runHistory" - :key="run.run_id" - class="rounded-lg border p-3 text-sm" - > - <div class="flex flex-wrap items-center gap-2"> - <Badge :variant="maintenanceRunOutcomeVariant(run.outcome)" class="font-mono text-[11px]"> - {{ formatMaintenanceRunOutcome(run.outcome) }} - </Badge> - <span class="font-mono text-xs text-muted-foreground">{{ formatWhen(run.triggered_at) }}</span> - <span v-if="run.target_data_source_ids.length" class="text-xs text-muted-foreground"> - · {{ run.target_data_source_ids.length }} source(s) - </span> - </div> - <p v-if="run.message" class="mt-1 text-xs text-muted-foreground">{{ run.message }}</p> - </div> - </div> - </CardContent> - </Card> + <GraphExtractionJobWatchDialog + v-model:open="watchDialogOpen" + :kg-id="kgId" + :job-id="watchJobId" + /> </template> </div> </template> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 16eecb905..05ccf0db9 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -141,6 +141,16 @@ describe('KG-MANAGE-012b - maintain workspace commit and job controls', () => { expect(graphMaintenanceWorkspaceVue).toContain('Files per job') expect(graphMaintenanceWorkspaceVue).toContain('estimatedJobsFromFiles') }) + + it('explains by-file maintenance and hybrid run controls', () => { + expect(graphMaintenanceWorkspaceVue).toContain('by-file') + expect(graphMaintenanceWorkspaceVue).toContain('Run maintenance') + expect(graphMaintenanceWorkspaceVue).toContain('Schedule recurring maintenance jobs') + expect(graphMaintenanceWorkspaceVue).toContain('Live maintenance activity') + expect(graphMaintenanceWorkspaceVue).not.toContain('Maintenance run history') + expect(graphMaintenanceWorkspaceVue).not.toContain('Sync changed sources only') + expect(graphMaintenanceWorkspaceVue).not.toContain('Run maintenance jobs now') + }) }) describe('KG-MANAGE-012 - archived graph writes grouping', () => { From e822043743c88149d9f010a9ebfbc5a50b50bbed Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Fri, 19 Jun 2026 23:15:47 -0400 Subject: [PATCH 148/153] fix(maintenance): complete manual runs synchronously and guard baseline advances Run maintenance now waits for ingest when needed, materializes jobs, and starts extraction workers in one request instead of relying on the background scheduler. Also stop advancing extraction baselines on idle status polls and improve Maintain UI job counts, worker concurrency persistence, and outcome toasts. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../extraction_run_reconciliation.py | 30 +-- .../maintenance_pipeline_service.py | 155 +++++++++++-- .../extraction_baseline_updater.py | 4 +- .../test_extraction_run_reconciliation.py | 36 +-- ...xtraction_run_reconciliation_repository.py | 15 +- .../test_maintenance_pipeline_service.py | 93 +++++++- .../test_extraction_baseline_updater.py | 12 +- .../GraphMaintenanceWorkspace.vue | 218 ++++++++++++++++-- .../knowledge-graph-manage-workspace.test.ts | 4 + 9 files changed, 489 insertions(+), 78 deletions(-) diff --git a/src/api/extraction/infrastructure/extraction_run_reconciliation.py b/src/api/extraction/infrastructure/extraction_run_reconciliation.py index 63afd2697..868e56b77 100644 --- a/src/api/extraction/infrastructure/extraction_run_reconciliation.py +++ b/src/api/extraction/infrastructure/extraction_run_reconciliation.py @@ -29,6 +29,9 @@ async def reconcile_quiescent_extraction_run( ) -> tuple[bool, bool]: """Finish active runs and advance baselines when the job queue has drained. + Baselines advance only after an active extraction run quiesces with no failed + jobs remaining. Idle polls with an already-idle run do not move baselines. + Returns: A tuple of (reconciled, run_was_active). ``reconciled`` is True when the database was updated. ``run_was_active`` is True when an active run row @@ -42,27 +45,26 @@ async def reconcile_quiescent_extraction_run( run = await repo.get_run(knowledge_graph_id=knowledge_graph_id) run_was_active = run is not None and run.status != ExtractionRunStatus.IDLE + if not run_was_active: + return False, False - if run_was_active: - await repo.upsert_run( - knowledge_graph_id=knowledge_graph_id, - status=ExtractionRunStatus.IDLE, - worker_count=run.worker_count, - pause_requested=False, - completed_at=datetime.now(UTC), - ) - - baselines_updated = await advance_extraction_baselines_for_knowledge_graph( - session=session, + await repo.upsert_run( knowledge_graph_id=knowledge_graph_id, + status=ExtractionRunStatus.IDLE, + worker_count=run.worker_count, + pause_requested=False, + completed_at=datetime.now(UTC), ) - if not run_was_active and baselines_updated <= 0: - return False, False + if counts.get("failed", 0) == 0: + await advance_extraction_baselines_for_knowledge_graph( + session=session, + knowledge_graph_id=knowledge_graph_id, + ) await session.commit() - if orchestrator is not None and run_was_active: + if orchestrator is not None: orchestrator.stop_active_run(knowledge_graph_id=knowledge_graph_id) return True, run_was_active diff --git a/src/api/infrastructure/management/maintenance_pipeline_service.py b/src/api/infrastructure/management/maintenance_pipeline_service.py index fae09df3c..107522a39 100644 --- a/src/api/infrastructure/management/maintenance_pipeline_service.py +++ b/src/api/infrastructure/management/maintenance_pipeline_service.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio from datetime import UTC, datetime from pathlib import Path from typing import TYPE_CHECKING, Callable @@ -49,6 +50,9 @@ IKnowledgeGraphRepository, ) +_MAINTENANCE_INGEST_WAIT_TIMEOUT_SECONDS = 300.0 +_MAINTENANCE_INGEST_POLL_INTERVAL_SECONDS = 1.0 + class MaintenancePipelineService: """Coordinate maintenance ingest, job materialization, and extraction workers.""" @@ -146,6 +150,9 @@ async def _trigger_for_kg( return run changed_sources = self._changed_sources(data_sources) + needs_ingest = [ + ds for ds in changed_sources if self._source_needs_maintenance_ingest(ds) + ] target_ids = tuple(ds.id.value for ds in data_sources) if not changed_sources: run = self._record_run( @@ -164,21 +171,32 @@ async def _trigger_for_kg( return run try: - sync_run_ids = await self._launch_ingest_only_syncs( - changed_sources=changed_sources, - requested_by=requested_by, - now=now, - ) + sync_run_ids: tuple[str, ...] = () + if needs_ingest: + sync_run_ids = await self._launch_ingest_only_syncs( + changed_sources=needs_ingest, + requested_by=requested_by, + now=now, + ) + if needs_ingest: + message = ( + "Maintenance ingest started for " + f"{len(needs_ingest)} changed source(s)" + ) + outcome = KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED + else: + message = ( + f"Materializing maintenance jobs for {len(changed_sources)} " + "prepared source(s)" + ) + outcome = KnowledgeGraphMaintenanceRunOutcome.STARTED run = self._record_run( kg=kg, run=KnowledgeGraphMaintenanceRunRecord( run_id=run_id, triggered_at=now, - outcome=KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED, - message=( - "Maintenance ingest started for " - f"{len(changed_sources)} changed source(s)" - ), + outcome=outcome, + message=message, target_data_source_ids=tuple(ds.id.value for ds in changed_sources), sync_run_ids=sync_run_ids, files_per_job=normalized_files_per_job, @@ -186,13 +204,38 @@ async def _trigger_for_kg( ), ) await self._session.commit() - if start_extraction: - advanced = await self.advance_for_knowledge_graph( - kg_id=kg_id, - tenant_id=kg.tenant_id, - ) - if advanced is not None: - return advanced + if not start_extraction: + return run + if needs_ingest: + try: + statuses = await self._wait_for_sync_runs(sync_run_ids) + except TimeoutError: + return await self._fail_latest_run( + kg_id=kg_id, + outcome=KnowledgeGraphMaintenanceRunOutcome.LAUNCH_FAILED, + message=( + "Maintenance ingest did not complete within " + f"{int(_MAINTENANCE_INGEST_WAIT_TIMEOUT_SECONDS)} seconds" + ), + ) + if any(status == "failed" for status in statuses): + return await self._fail_latest_run( + kg_id=kg_id, + outcome=KnowledgeGraphMaintenanceRunOutcome.INGEST_FAILED, + message="One or more maintenance ingest syncs failed", + ) + if not all(status == "ingested" for status in statuses): + return await self._fail_latest_run( + kg_id=kg_id, + outcome=KnowledgeGraphMaintenanceRunOutcome.LAUNCH_FAILED, + message="Maintenance ingest finished in an unexpected state", + ) + advanced = await self._materialize_and_start_extraction( + kg_id=kg_id, + tenant_id=kg.tenant_id, + ) + if advanced is not None: + return advanced return run except Exception as exc: run = self._record_run( @@ -313,6 +356,28 @@ async def advance_for_knowledge_graph( if not all(status == "ingested" for status in statuses): return None + return await self._materialize_and_start_extraction( + kg_id=kg_id, + tenant_id=tenant_id, + ) + + async def _materialize_and_start_extraction( + self, + *, + kg_id: str, + tenant_id: str, + ) -> KnowledgeGraphMaintenanceRunRecord | None: + """Materialize pending maintenance jobs and start extraction workers.""" + kg = await self._kg_repo.get_by_id(KnowledgeGraphId(value=kg_id)) + if kg is None or not kg.maintenance_run_history: + return None + latest = kg.maintenance_run_history[-1] + if latest.outcome not in { + KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED, + KnowledgeGraphMaintenanceRunOutcome.STARTED, + }: + return None + data_sources = await self._ds_repo.find_by_knowledge_graph(kg_id) changed_sources = [ ds @@ -378,6 +443,50 @@ async def advance_for_knowledge_graph( await self._session.commit() return run + async def _fail_latest_run( + self, + *, + kg_id: str, + outcome: KnowledgeGraphMaintenanceRunOutcome, + message: str, + ) -> KnowledgeGraphMaintenanceRunRecord: + kg = await self._kg_repo.get_by_id(KnowledgeGraphId(value=kg_id)) + if kg is None or not kg.maintenance_run_history: + raise ValueError(f"Knowledge graph {kg_id} has no maintenance run history") + latest = kg.maintenance_run_history[-1] + run = self._replace_latest_run( + kg=kg, + latest=latest, + outcome=outcome, + message=message, + ) + await self._kg_repo.save(kg) + await self._session.commit() + return run + + async def _wait_for_sync_runs( + self, + sync_run_ids: tuple[str, ...], + *, + timeout_seconds: float = _MAINTENANCE_INGEST_WAIT_TIMEOUT_SECONDS, + poll_interval_seconds: float = _MAINTENANCE_INGEST_POLL_INTERVAL_SECONDS, + ) -> list[str]: + """Poll sync runs until all reach a terminal state or timeout.""" + if not sync_run_ids: + return [] + deadline = asyncio.get_running_loop().time() + timeout_seconds + while asyncio.get_running_loop().time() < deadline: + self._session.expire_all() + statuses = await self._sync_run_statuses(sync_run_ids) + if len(statuses) != len(sync_run_ids): + await asyncio.sleep(poll_interval_seconds) + continue + if any(status in {"pending", "ingesting"} for status in statuses): + await asyncio.sleep(poll_interval_seconds) + continue + return statuses + raise TimeoutError("Maintenance ingest sync runs did not finish in time") + def _with_session(self, session: AsyncSession) -> MaintenancePipelineService: from extraction.infrastructure.repositories.extraction_job_repository import ( ExtractionJobRepository, @@ -435,6 +544,18 @@ def _changed_sources(data_sources: list[DataSource]) -> list[DataSource]: and ds.tracked_branch_head_commit != ds.last_extraction_baseline_commit ] + @staticmethod + def _source_needs_maintenance_ingest(data_source: DataSource) -> bool: + """True when local JobPackages must be refreshed before maintenance jobs run.""" + from management.domain.commit_pull_state import ( + has_unpulled_commits, + resolve_ingested_head_commit, + ) + + if resolve_ingested_head_commit(data_source) is None: + return True + return has_unpulled_commits(data_source) + async def _launch_ingest_only_syncs( self, *, diff --git a/src/api/management/infrastructure/extraction_baseline_updater.py b/src/api/management/infrastructure/extraction_baseline_updater.py index 68bf73143..87bec3e6f 100644 --- a/src/api/management/infrastructure/extraction_baseline_updater.py +++ b/src/api/management/infrastructure/extraction_baseline_updater.py @@ -26,7 +26,7 @@ async def advance_extraction_baselines_for_knowledge_graph( knowledge_graph_id: str, data_source_repository: IDataSourceRepository | None = None, ) -> int: - """Advance extraction baselines for every prepared source on a knowledge graph.""" + """Advance extraction baselines to tracked branch head after a successful extraction run.""" if data_source_repository is None: data_source_repository = _default_data_source_repository(session) @@ -34,7 +34,7 @@ async def advance_extraction_baselines_for_knowledge_graph( updated = 0 for data_source in data_sources: before = data_source.last_extraction_baseline_commit - data_source.advance_extraction_baseline_to_ingested_head() + data_source.advance_extraction_baseline_to_tracked_head() if data_source.last_extraction_baseline_commit == before: continue await data_source_repository.save(data_source) diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation.py index b22b83fc9..2a82d0152 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation.py @@ -37,7 +37,7 @@ async def test_reconcile_skips_when_jobs_remain() -> None: async def test_reconcile_finishes_active_run_and_advances_baselines() -> None: session = AsyncMock() repo = AsyncMock() - repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} + repo.count_by_status.return_value = {"pending": 0, "in_progress": 0, "failed": 0} repo.get_run.return_value = ExtractionRunRecord( id="run-001", knowledge_graph_id="kg-001", @@ -80,10 +80,10 @@ async def test_reconcile_finishes_active_run_and_advances_baselines() -> None: @pytest.mark.asyncio -async def test_reconcile_advances_baselines_when_run_already_idle() -> None: +async def test_reconcile_does_not_advance_baselines_when_run_already_idle() -> None: session = AsyncMock() repo = AsyncMock() - repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} + repo.count_by_status.return_value = {"pending": 0, "in_progress": 0, "failed": 0} repo.get_run.return_value = ExtractionRunRecord( id="run-001", knowledge_graph_id="kg-001", @@ -103,35 +103,36 @@ async def test_reconcile_advances_baselines_when_run_already_idle() -> None: patch( "extraction.infrastructure.extraction_run_reconciliation.advance_extraction_baselines_for_knowledge_graph", new_callable=AsyncMock, - return_value=1, - ), + ) as advance_baselines, ): reconciled, run_was_active = await reconcile_quiescent_extraction_run( session=session, knowledge_graph_id="kg-001", ) - assert reconciled is True + assert reconciled is False assert run_was_active is False repo.upsert_run.assert_not_awaited() - session.commit.assert_awaited_once() + advance_baselines.assert_not_awaited() + session.commit.assert_not_awaited() @pytest.mark.asyncio -async def test_reconcile_noop_when_queue_and_baselines_are_current() -> None: +async def test_reconcile_finishes_run_without_advancing_when_failed_jobs_remain() -> None: session = AsyncMock() repo = AsyncMock() - repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} + repo.count_by_status.return_value = {"pending": 0, "in_progress": 0, "failed": 2} repo.get_run.return_value = ExtractionRunRecord( id="run-001", knowledge_graph_id="kg-001", - status=ExtractionRunStatus.IDLE, - worker_count=0, + status=ExtractionRunStatus.RUNNING, + worker_count=4, pause_requested=False, started_at=None, completed_at=None, orchestrator_pid=None, ) + orchestrator = MagicMock() with ( patch( @@ -141,14 +142,17 @@ async def test_reconcile_noop_when_queue_and_baselines_are_current() -> None: patch( "extraction.infrastructure.extraction_run_reconciliation.advance_extraction_baselines_for_knowledge_graph", new_callable=AsyncMock, - return_value=0, - ), + ) as advance_baselines, ): reconciled, run_was_active = await reconcile_quiescent_extraction_run( session=session, knowledge_graph_id="kg-001", + orchestrator=orchestrator, ) - assert reconciled is False - assert run_was_active is False - session.commit.assert_not_awaited() + assert reconciled is True + assert run_was_active is True + repo.upsert_run.assert_awaited_once() + advance_baselines.assert_not_awaited() + session.commit.assert_awaited_once() + orchestrator.stop_active_run.assert_called_once_with(knowledge_graph_id="kg-001") diff --git a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation_repository.py b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation_repository.py index 3fd12e2b7..2d410334f 100644 --- a/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation_repository.py +++ b/src/api/tests/unit/extraction/infrastructure/test_extraction_run_reconciliation_repository.py @@ -12,10 +12,10 @@ @pytest.mark.asyncio -async def test_reconcile_uses_data_source_repository_with_outbox() -> None: +async def test_reconcile_skips_baseline_advance_when_no_active_run() -> None: session = AsyncMock() repo = AsyncMock() - repo.count_by_status.return_value = {"pending": 0, "in_progress": 0} + repo.count_by_status.return_value = {"pending": 0, "in_progress": 0, "failed": 0} repo.get_run.return_value = None with ( @@ -26,15 +26,14 @@ async def test_reconcile_uses_data_source_repository_with_outbox() -> None: patch( "extraction.infrastructure.extraction_run_reconciliation.advance_extraction_baselines_for_knowledge_graph", new_callable=AsyncMock, - return_value=0, ) as advance_baselines, ): - await reconcile_quiescent_extraction_run( + reconciled, run_was_active = await reconcile_quiescent_extraction_run( session=session, knowledge_graph_id="kg-001", ) - advance_baselines.assert_awaited_once_with( - session=session, - knowledge_graph_id="kg-001", - ) + assert reconciled is False + assert run_was_active is False + advance_baselines.assert_not_awaited() + session.commit.assert_not_awaited() diff --git a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py index 70d9d3bf9..d4c7448e9 100644 --- a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py +++ b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py @@ -197,13 +197,13 @@ async def test_trigger_starts_ingest_only_for_changed_sources( authz=authz, ) - with patch.object(svc, "advance_for_knowledge_graph", AsyncMock(return_value=None)): + with patch.object(svc, "_wait_for_sync_runs", AsyncMock(return_value=["ingested"])): run = await svc.trigger( user_id="user-1", kg_id=kg.id.value, files_per_job=3, worker_count=4, - start_extraction=True, + start_extraction=False, ) assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.INGEST_STARTED @@ -219,6 +219,95 @@ async def test_trigger_starts_ingest_only_for_changed_sources( assert events[0].pipeline_mode == "ingest_only" +@pytest.mark.asyncio +async def test_trigger_skips_ingest_when_sources_already_prepared( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + ds = _make_ds(ds_id="ds-prepared", kg_id=kg.id.value, tenant_id=kg.tenant_id) + ds.clone_head_commit = ds.tracked_branch_head_commit + ds.last_prepared_commit = ds.tracked_branch_head_commit + ds_repo.seed(ds) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + svc = _service( + mock_session=mock_session, + session_factory=session_factory, + kg_repo=kg_repo, + ds_repo=ds_repo, + sync_run_repo=sync_run_repo, + authz=authz, + ) + expected = KnowledgeGraphMaintenanceRunRecord( + run_id="run-materialized", + triggered_at=datetime.now(UTC), + outcome=KnowledgeGraphMaintenanceRunOutcome.EXTRACTION_STARTED, + message="Materialized 3 maintenance job(s) and started extraction workers", + jobs_materialized=3, + ) + + with patch.object( + svc, + "_materialize_and_start_extraction", + AsyncMock(return_value=expected), + ) as materialize: + run = await svc.trigger( + user_id="user-1", + kg_id=kg.id.value, + start_extraction=True, + ) + + assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.EXTRACTION_STARTED + assert sync_run_repo.saved == {} + materialize.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_trigger_waits_for_ingest_before_materializing( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + ds = _make_ds(ds_id="ds-changed", kg_id=kg.id.value, tenant_id=kg.tenant_id) + ds_repo.seed(ds) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + svc = _service( + mock_session=mock_session, + session_factory=session_factory, + kg_repo=kg_repo, + ds_repo=ds_repo, + sync_run_repo=sync_run_repo, + authz=authz, + ) + expected = KnowledgeGraphMaintenanceRunRecord( + run_id="run-materialized", + triggered_at=datetime.now(UTC), + outcome=KnowledgeGraphMaintenanceRunOutcome.EXTRACTION_STARTED, + message="Materialized 1 maintenance job(s) and started extraction workers", + jobs_materialized=1, + ) + + with ( + patch.object(svc, "_wait_for_sync_runs", AsyncMock(return_value=["ingested"])) as wait, + patch.object( + svc, + "_materialize_and_start_extraction", + AsyncMock(return_value=expected), + ) as materialize, + ): + run = await svc.trigger( + user_id="user-1", + kg_id=kg.id.value, + start_extraction=True, + ) + + assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.EXTRACTION_STARTED + wait.assert_awaited_once() + materialize.assert_awaited_once() + + @pytest.mark.asyncio async def test_advance_marks_ingest_failed_when_sync_fails( mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz diff --git a/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py b/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py index b90d51c3d..ebea08d89 100644 --- a/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py +++ b/src/api/tests/unit/management/infrastructure/test_extraction_baseline_updater.py @@ -40,12 +40,12 @@ async def test_advance_extraction_baselines_updates_all_sources_on_kg() -> None: ds_a = _make_ds( ds_id="ds-a", last_extraction_baseline_commit="old-a", - last_prepared_commit="prepared-a", + tracked_branch_head_commit="tracked-a", ) ds_b = _make_ds( ds_id="ds-b", - last_extraction_baseline_commit=None, - clone_head_commit="prepared-b", + last_extraction_baseline_commit="old-b", + tracked_branch_head_commit="tracked-b", ) mock_repo = AsyncMock() mock_repo.find_by_knowledge_graph.return_value = [ds_a, ds_b] @@ -57,13 +57,13 @@ async def test_advance_extraction_baselines_updates_all_sources_on_kg() -> None: ) assert updated == 2 - assert ds_a.last_extraction_baseline_commit == "prepared-a" - assert ds_b.last_extraction_baseline_commit == "prepared-b" + assert ds_a.last_extraction_baseline_commit == "tracked-a" + assert ds_b.last_extraction_baseline_commit == "tracked-b" assert mock_repo.save.await_count == 2 @pytest.mark.asyncio -async def test_advance_extraction_baselines_skips_sources_without_ingested_head() -> None: +async def test_advance_extraction_baselines_skips_sources_without_tracked_head() -> None: ds = _make_ds(last_extraction_baseline_commit="keep-me") mock_repo = AsyncMock() mock_repo.find_by_knowledge_graph.return_value = [ds] diff --git a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue index 06483d418..3e8dc8128 100644 --- a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue @@ -2,7 +2,9 @@ import { computed, onMounted, onUnmounted, ref, watch } from 'vue' import { toast } from 'vue-sonner' import { + Archive, Calendar, + ClipboardList, Eye, GitBranch, Loader2, @@ -16,6 +18,7 @@ import { Card, CardHeader, CardTitle, CardDescription, CardContent } from '@/com import { Button } from '@/components/ui/button' import { Badge } from '@/components/ui/badge' import { Input } from '@/components/ui/input' +import { Separator } from '@/components/ui/separator' import { isMaintenanceReady } from '@/utils/kgManageWorkspace' import { commitStatusClass, @@ -102,7 +105,14 @@ interface ExtractionRunState { interface DbStatus { jobsByStatus: Record<string, number> - jobsBySet?: Record<string, { pending: number; in_progress: number; completed: number; failed: number; total: number }> + jobsBySet?: Record<string, { + pending: number + in_progress: number + completed: number + failed: number + archived?: number + total: number + }> recentJobs: Array<{ jobId: string jobSet: string @@ -112,6 +122,7 @@ interface DbStatus { completedAt: string | null inputTokens: number outputTokens: number + costUsd?: number writeOps: number entitiesCreated?: number entitiesModified?: number @@ -139,8 +150,14 @@ const dataSources = ref<DataSourceRow[]>([]) const schedule = ref<MaintenanceSchedule | null>(null) const extractionRunState = ref<ExtractionRunState | null>(null) const dbStatus = ref<DbStatus | null>(null) +const dbError = ref<string | null>(null) const pausingExtraction = ref(false) const killingExtraction = ref(false) +const resettingRunning = ref(false) +const resettingCompleted = ref(false) +const resettingFailed = ref(false) +const resettingAll = ref(false) +const archivingCompleted = ref(false) const optimisticLiveUntilMs = ref<number | null>(null) const nowMs = ref(Date.now()) const lastStatusRefreshMs = ref<number | null>(null) @@ -159,6 +176,7 @@ const MAX_MAINTENANCE_WORKERS = 50 const workers = ref(8) const filesPerJob = ref(2) +const runControlsInitialized = ref(false) const checkingCommits = ref(false) const updatingLocalCommits = ref(false) const runningMaintenance = ref(false) @@ -201,6 +219,7 @@ const maintenanceSetStats = computed( in_progress: 0, completed: 0, failed: 0, + archived: 0, total: 0, }, ) @@ -208,7 +227,25 @@ const pendingJobsCount = computed(() => maintenanceSetStats.value.pending) const inProgressJobsCount = computed(() => maintenanceSetStats.value.in_progress) const completedJobsCount = computed(() => maintenanceSetStats.value.completed) const failedJobsCount = computed(() => maintenanceSetStats.value.failed) -const remainingJobsCount = computed(() => pendingJobsCount.value + inProgressJobsCount.value) +const archivedJobsCount = computed(() => Number(maintenanceSetStats.value.archived || 0)) +const readyJobsCount = computed(() => { + if (pendingJobsCount.value > 0) return pendingJobsCount.value + if (totalChangedFiles.value > 0 && estimatedJobsFromFiles.value > 0) { + return estimatedJobsFromFiles.value + } + return 0 +}) +const readyJobsAreEstimated = computed( + () => pendingJobsCount.value === 0 && readyJobsCount.value > 0, +) +const remainingJobsCount = computed(() => { + const queued = pendingJobsCount.value + inProgressJobsCount.value + if (queued > 0) return queued + if (totalChangedFiles.value > 0 && estimatedJobsFromFiles.value > 0) { + return estimatedJobsFromFiles.value + } + return 0 +}) const activeQueueJobsTotal = computed( () => pendingJobsCount.value + inProgressJobsCount.value + failedJobsCount.value + completedJobsCount.value, ) @@ -252,6 +289,14 @@ const recentJobsEmptyMessage = computed(() => { if (recentJobStatusFilter.value === 'all') return 'No maintenance job events yet.' return `No ${filterLabel?.toLowerCase() ?? recentJobStatusFilter.value} maintenance job events in the recent window.` }) +const maintenanceRunTotals = computed(() => { + const jobs = (dbStatus.value?.recentJobs || []).filter((job) => job.jobSet === MAINTENANCE_JOB_SET) + return { + inputTokens: jobs.reduce((sum, job) => sum + Number(job.inputTokens || 0), 0), + outputTokens: jobs.reduce((sum, job) => sum + Number(job.outputTokens || 0), 0), + costUsd: jobs.reduce((sum, job) => sum + Number(job.costUsd || 0), 0), + } +}) function resolveApiError(e: unknown): string { const err = e as { data?: { detail?: unknown }; message?: string } @@ -290,11 +335,14 @@ async function loadSchedule() { `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/maintenance-schedule`, ) schedule.value = payload - scheduleEnabled.value = payload.enabled - scheduleTimezone.value = payload.timezone_name || 'UTC' - scheduleTime.value = cronToDailyTime(payload.cron_expression) || '02:00' - if (payload.files_per_job) filesPerJob.value = payload.files_per_job - if (payload.worker_count) workers.value = payload.worker_count + if (!runControlsInitialized.value) { + scheduleEnabled.value = payload.enabled + scheduleTimezone.value = payload.timezone_name || 'UTC' + scheduleTime.value = cronToDailyTime(payload.cron_expression) || '02:00' + if (payload.files_per_job) filesPerJob.value = payload.files_per_job + if (payload.worker_count) workers.value = payload.worker_count + runControlsInitialized.value = true + } } async function loadExtractionState() { @@ -309,8 +357,10 @@ async function loadExtractionState() { dbStatus.value = status mergeRecentJobEvents(status) lastStatusRefreshMs.value = Date.now() - } catch { + dbError.value = null + } catch (e: unknown) { dbStatus.value = null + dbError.value = resolveApiError(e) } } @@ -434,6 +484,48 @@ async function cancelJob(jobId: string) { } } +async function resetByKind(kind: 'stale' | 'completed' | 'failed' | 'all') { + const map = { + stale: { ref: resettingRunning, path: 'reset-stale' }, + completed: { ref: resettingCompleted, path: 'reset-completed' }, + failed: { ref: resettingFailed, path: 'reset-failed' }, + all: { ref: resettingAll, path: 'reset' }, + } as const + map[kind].ref.value = true + try { + const res = await apiFetch<{ message?: string; reset_count?: number; containers_stopped?: number }>( + `${extractionJobsBasePath.value}/${map[kind].path}`, + { method: 'POST' }, + ) + toast.success(kind === 'stale' ? 'Running jobs reset' : 'Jobs reset', { + description: res.message || (res.reset_count !== undefined ? `${res.reset_count} job(s) reset` : undefined), + }) + await refreshAll({ background: true }) + } catch (e: unknown) { + toast.error('Reset failed', { description: resolveApiError(e) }) + } finally { + map[kind].ref.value = false + } +} + +async function archiveCompletedJobs() { + archivingCompleted.value = true + try { + const res = await apiFetch<{ message?: string; archived_count?: number }>( + `${extractionJobsBasePath.value}/archive-completed`, + { method: 'POST' }, + ) + toast.success('Completed jobs archived', { + description: res.message || (res.archived_count !== undefined ? `${res.archived_count} job(s) archived` : undefined), + }) + await refreshAll({ background: true }) + } catch (e: unknown) { + toast.error('Archive failed', { description: resolveApiError(e) }) + } finally { + archivingCompleted.value = false + } +} + async function refreshAll(options?: { background?: boolean }) { const background = options?.background ?? false if (background) refreshing.value = true @@ -571,9 +663,20 @@ async function runMaintenanceNow(options?: { startExtraction?: boolean }) { }, }, ) - toast.success('Maintenance run recorded', { - description: run.message || formatMaintenanceRunOutcome(run.outcome), - }) + const description = run.message || formatMaintenanceRunOutcome(run.outcome) + if (run.outcome === 'extraction-started') { + toast.success('Maintenance started', { description }) + } else if ( + run.outcome === 'ingest-failed' + || run.outcome === 'launch-failed' + || run.outcome === 'preflight-failed' + ) { + toast.error('Maintenance failed', { description }) + } else if (run.outcome === 'no-changes') { + toast.message('No maintenance work', { description }) + } else { + toast.success('Maintenance run recorded', { description }) + } await refreshAll({ background: true }) } catch (e: unknown) { toast.error('Maintenance run failed', { description: resolveApiError(e) }) @@ -833,7 +936,9 @@ onUnmounted(() => { </div> </div> <p class="mt-2 text-xs text-muted-foreground"> - Maintenance queue: {{ pendingJobsCount }} ready · {{ inProgressJobsCount }} running + Maintenance queue: {{ readyJobsCount }} ready + <span v-if="readyJobsAreEstimated"> (estimated)</span> + · {{ inProgressJobsCount }} running <span v-if="extractionLive"> · live</span> </p> </div> @@ -870,7 +975,8 @@ onUnmounted(() => { <p class="text-xs font-medium text-foreground/90">Live maintenance activity</p> <div class="flex flex-wrap items-center gap-1.5"> <Badge variant="outline" class="font-mono text-[11px]"> - {{ completedJobsCount }} completed · {{ inProgressJobsCount }} running · {{ pendingJobsCount }} ready + {{ completedJobsCount }} completed · {{ inProgressJobsCount }} running · {{ readyJobsCount }} ready + <span v-if="readyJobsAreEstimated"> (est.)</span> </Badge> <Badge variant="outline" class="font-mono text-[11px]"> workers: {{ activeWorkerCount }}/{{ workerCount }} @@ -1026,6 +1132,92 @@ onUnmounted(() => { </Card> </div> + <Card> + <CardHeader> + <CardTitle class="flex items-center gap-2 text-base"> + <ClipboardList class="size-4" /> + Job Status + <Loader2 v-if="refreshing" class="size-3.5 animate-spin text-muted-foreground" /> + </CardTitle> + <CardDescription>Maintenance job metrics and queue maintenance actions.</CardDescription> + </CardHeader> + <CardContent class="space-y-4"> + <div v-if="loading && !dbStatus" class="flex items-center gap-2 text-sm text-muted-foreground"> + <Loader2 class="size-4 animate-spin" /> + Loading job status... + </div> + <div v-else-if="dbError && !dbStatus" class="text-sm text-destructive">{{ dbError }}</div> + <template v-else-if="dbStatus"> + <div class="grid gap-3 sm:grid-cols-2 lg:grid-cols-5"> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground"> + Ready + <span v-if="readyJobsAreEstimated" class="block text-[10px] normal-case">(estimated)</span> + </p> + <p class="text-xl font-semibold">{{ readyJobsCount }}</p> + </div> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground">Running</p> + <p class="text-xl font-semibold">{{ inProgressJobsCount }}</p> + </div> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground">Completed</p> + <p class="text-xl font-semibold">{{ completedJobsCount }}</p> + </div> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground">Failed</p> + <p class="text-xl font-semibold">{{ failedJobsCount }}</p> + </div> + <div class="rounded-lg border p-3 text-center"> + <p class="text-xs text-muted-foreground">Archived</p> + <p class="text-xl font-semibold">{{ archivedJobsCount }}</p> + </div> + </div> + + <Separator /> + + <div class="flex flex-wrap gap-2"> + <Button size="sm" variant="outline" :disabled="resettingRunning" @click="resetByKind('stale')"> + Reset Running + </Button> + <Button size="sm" variant="outline" :disabled="resettingCompleted" @click="resetByKind('completed')"> + Reset Completed + </Button> + <Button + size="sm" + variant="outline" + :disabled="archivingCompleted || completedJobsCount === 0" + @click="archiveCompletedJobs" + > + <Archive class="mr-1.5 size-3.5" /> + Archive Completed + </Button> + <Button size="sm" variant="outline" :disabled="resettingFailed" @click="resetByKind('failed')"> + Reset Failed + </Button> + <Button size="sm" variant="outline" :disabled="resettingAll" @click="resetByKind('all')"> + Reset All Jobs + </Button> + </div> + + <div class="rounded-lg border bg-muted/20 p-3 text-xs"> + <p class="font-medium">{{ MAINTENANCE_JOB_SET }}</p> + <p class="text-muted-foreground"> + ready {{ readyJobsCount }}<span v-if="readyJobsAreEstimated"> (estimated)</span> + · running {{ inProgressJobsCount }} · done {{ completedJobsCount }} · failed {{ failedJobsCount }} · archived {{ archivedJobsCount }} + </p> + </div> + + <div class="rounded-lg border bg-muted/20 p-3 text-xs text-muted-foreground"> + Run totals (recent maintenance jobs) — + input {{ maintenanceRunTotals.inputTokens.toLocaleString() }} · + output {{ maintenanceRunTotals.outputTokens.toLocaleString() }} · + cost ${{ maintenanceRunTotals.costUsd.toFixed(4) }} + </div> + </template> + </CardContent> + </Card> + <GraphExtractionJobWatchDialog v-model:open="watchDialogOpen" :kg-id="kgId" diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 05ccf0db9..f84385bae 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -140,6 +140,8 @@ describe('KG-MANAGE-012b - maintain workspace commit and job controls', () => { expect(graphMaintenanceWorkspaceVue).toContain('filesPerJob = ref(2)') expect(graphMaintenanceWorkspaceVue).toContain('Files per job') expect(graphMaintenanceWorkspaceVue).toContain('estimatedJobsFromFiles') + expect(graphMaintenanceWorkspaceVue).toContain('readyJobsCount') + expect(graphMaintenanceWorkspaceVue).toContain('runControlsInitialized') }) it('explains by-file maintenance and hybrid run controls', () => { @@ -147,6 +149,8 @@ describe('KG-MANAGE-012b - maintain workspace commit and job controls', () => { expect(graphMaintenanceWorkspaceVue).toContain('Run maintenance') expect(graphMaintenanceWorkspaceVue).toContain('Schedule recurring maintenance jobs') expect(graphMaintenanceWorkspaceVue).toContain('Live maintenance activity') + expect(graphMaintenanceWorkspaceVue).toContain('Job Status') + expect(graphMaintenanceWorkspaceVue).toContain('Archive Completed') expect(graphMaintenanceWorkspaceVue).not.toContain('Maintenance run history') expect(graphMaintenanceWorkspaceVue).not.toContain('Sync changed sources only') expect(graphMaintenanceWorkspaceVue).not.toContain('Run maintenance jobs now') From 13d0b978f9626322ea32a43d1a19d6a5b815851a Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sat, 20 Jun 2026 00:02:33 -0400 Subject: [PATCH 149/153] feat(maintenance): materialize baseline/HEAD snapshots and diffs for agents Write commit-scoped repository-files paths for maintenance jobs, fetch baseline content from GitHub, and document the layout in prompts and sources-index so workers compare last-extraction and branch-tip state. Also persist maintenance run history and commit pending jobs before starting extraction workers. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/api/extraction/domain/extraction_job.py | 27 +- .../infrastructure/extraction_job_prompt.py | 69 +++- .../extraction_job_workdir_materializer.py | 60 +++- .../infrastructure/maintenance_job_prompt.py | 11 +- .../maintenance_repository_files.py | 314 ++++++++++++++++++ .../prepared_job_package_reader.py | 4 + .../maintenance_baseline_fetcher.py | 148 +++++++++ .../management/maintenance_changed_files.py | 6 + .../maintenance_job_materializer.py | 18 +- .../maintenance_pipeline_service.py | 29 +- .../test_maintenance_job_prompt.py | 44 +++ .../test_maintenance_repository_files.py | 183 ++++++++++ .../test_maintenance_job_materializer.py | 14 +- .../test_maintenance_pipeline_service.py | 132 ++++++++ 14 files changed, 1017 insertions(+), 42 deletions(-) create mode 100644 src/api/extraction/infrastructure/maintenance_repository_files.py create mode 100644 src/api/infrastructure/extraction_workload/maintenance_baseline_fetcher.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_maintenance_job_prompt.py create mode 100644 src/api/tests/unit/extraction/infrastructure/test_maintenance_repository_files.py diff --git a/src/api/extraction/domain/extraction_job.py b/src/api/extraction/domain/extraction_job.py index 39e54e2de..2f9296711 100644 --- a/src/api/extraction/domain/extraction_job.py +++ b/src/api/extraction/domain/extraction_job.py @@ -35,13 +35,29 @@ class ExtractionTargetFile: path: str repository_folder: str package_id: str + baseline_commit: str | None = None + head_commit: str | None = None + change_status: str | None = None + patch: str | None = None + data_source_id: str | None = None def to_dict(self) -> dict[str, Any]: - return { + payload: dict[str, Any] = { "path": self.path, "repository_folder": self.repository_folder, "package_id": self.package_id, } + if self.baseline_commit is not None: + payload["baseline_commit"] = self.baseline_commit + if self.head_commit is not None: + payload["head_commit"] = self.head_commit + if self.change_status is not None: + payload["change_status"] = self.change_status + if self.patch is not None: + payload["patch"] = self.patch + if self.data_source_id is not None: + payload["data_source_id"] = self.data_source_id + return payload @classmethod def from_dict(cls, data: dict[str, Any]) -> ExtractionTargetFile: @@ -49,6 +65,15 @@ def from_dict(cls, data: dict[str, Any]) -> ExtractionTargetFile: path=str(data.get("path") or ""), repository_folder=str(data.get("repository_folder") or ""), package_id=str(data.get("package_id") or ""), + baseline_commit=( + str(data["baseline_commit"]) if data.get("baseline_commit") else None + ), + head_commit=str(data["head_commit"]) if data.get("head_commit") else None, + change_status=str(data["change_status"]) if data.get("change_status") else None, + patch=str(data["patch"]) if data.get("patch") is not None else None, + data_source_id=( + str(data["data_source_id"]) if data.get("data_source_id") else None + ), ) diff --git a/src/api/extraction/infrastructure/extraction_job_prompt.py b/src/api/extraction/infrastructure/extraction_job_prompt.py index c733384ca..a16e43745 100644 --- a/src/api/extraction/infrastructure/extraction_job_prompt.py +++ b/src/api/extraction/infrastructure/extraction_job_prompt.py @@ -5,6 +5,7 @@ from pathlib import Path from extraction.domain.extraction_job import ExtractionJobRecord +from infrastructure.management.maintenance_job_materializer import MAINTENANCE_JOB_SET_NAME EXTRACTION_PROMPT_FILENAME = "extraction_prompt.md" MUTATIONS_HELPER = "helpers/workload-mutations.sh" @@ -14,7 +15,9 @@ EXTRACTION_JOB_INVOKE_PROMPT = ( "You are running a Kartograph extraction job in /workspace. " f"Read {EXTRACTION_PROMPT_FILENAME}, job-context.json, and sources-index.json, then follow " - "the instructions completely. Read job-context.json target_instances for graph_id and " + "the instructions completely. For maintenance jobs, sources-index.json layout.mode is " + "maintenance_commit_snapshots — read baseline/HEAD paths and diff paths from layout.target_files. " + "Read job-context.json target_instances for graph_id and " "properties_missing before querying the graph. For existing instances, fetch live properties " f"with `bash {GRAPH_READ_HELPER} search-by-slug <slug> --entity-type <Type> --out " "mutations/current_<slug>.json` before editing. Copy JSONL shapes from " @@ -40,6 +43,42 @@ def write_extraction_prompt_file(*, workdir: Path, prompt: str) -> Path: return path +def build_maintenance_target_files_prompt_section(*, job: ExtractionJobRecord) -> str: + """Describe commit-scoped repository-files layout for maintenance jobs.""" + if not job.target_files: + return "" + + lines = [ + "## Maintenance repository layout", + "Changed files use commit-first directories under repository-files/:", + "- Baseline (last extraction): repository-files/{baseline_commit}/{repository_folder}/{path}", + "- HEAD (branch tip): repository-files/{head_commit}/{repository_folder}/{path}", + "- Unified diff: repository-files/diffs/{baseline_commit}..{head_commit}/{repository_folder}/{path}.patch", + "See sources-index.json layout.target_files for exact paths per assigned file.", + "", + "### Assigned files", + ] + for target_file in job.target_files: + baseline = target_file.baseline_commit or "<baseline>" + head = target_file.head_commit or "<head>" + status = target_file.change_status or "modified" + lines.append( + f"- [{status}] {target_file.repository_folder}/{target_file.path}" + ) + lines.append(f" - baseline: repository-files/{baseline}/{target_file.repository_folder}/{target_file.path}") + if status != "removed": + lines.append( + f" - head: repository-files/{head}/{target_file.repository_folder}/{target_file.path}" + ) + if target_file.patch and target_file.baseline_commit and target_file.head_commit: + lines.append( + " - diff: " + f"repository-files/diffs/{target_file.baseline_commit}..{target_file.head_commit}/" + f"{target_file.repository_folder}/{target_file.path}.patch" + ) + return "\n".join(lines) + + def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: """Return the agent prompt for one materialized extraction job.""" lines = [ @@ -74,19 +113,23 @@ def build_extraction_job_prompt(*, job: ExtractionJobRecord) -> str: lines.append(f"- {instance.entity_type}: {instance.slug}") lines.append("") if job.target_files: - lines.extend( - [ - "## Target repository files", - "Inspect only the files materialized under repository-files/. Use their content to", - "extract entities and relationships, then emit JSONL mutations via the workload API.", - "", - ] - ) - for target_file in job.target_files: - lines.append( - f"- {target_file.repository_folder}/{target_file.path} (package {target_file.package_id})" + if job.job_set_name == MAINTENANCE_JOB_SET_NAME: + lines.append(build_maintenance_target_files_prompt_section(job=job)) + lines.append("") + else: + lines.extend( + [ + "## Target repository files", + "Inspect only the files materialized under repository-files/. Use their content to", + "extract entities and relationships, then emit JSONL mutations via the workload API.", + "", + ] ) - lines.append("") + for target_file in job.target_files: + lines.append( + f"- {target_file.repository_folder}/{target_file.path} (package {target_file.package_id})" + ) + lines.append("") lines.extend( [ "## Repository files", diff --git a/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py index 969b030f1..10efcb8c4 100644 --- a/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py +++ b/src/api/extraction/infrastructure/extraction_job_workdir_materializer.py @@ -21,12 +21,20 @@ materialize_target_files, write_sources_index, ) +from extraction.infrastructure.maintenance_repository_files import ( + materialize_maintenance_target_files, + write_maintenance_sources_index, +) from extraction.infrastructure.extraction_job_workdir_layout import prepare_agentic_ci_workspace from extraction.infrastructure.prepared_job_package_reader import SqlPreparedJobPackageReader +from infrastructure.extraction_workload.maintenance_baseline_fetcher import ( + MaintenanceBaselineContentFetcher, +) from infrastructure.job_packages.archive_hydrator import JobPackageArchiveHydrator from extraction.infrastructure.workload_runtime_settings import ExtractionWorkloadRuntimeSettings from extraction.ports.extraction_job_target_context import IExtractionJobTargetContextEnricher from extraction.ports.runtime import ScopedWorkloadCredentials +from infrastructure.management.maintenance_job_materializer import MAINTENANCE_JOB_SET_NAME class ExtractionJobWorkdirMaterializer: @@ -74,8 +82,9 @@ async def prepare( knowledge_graph_id=job.knowledge_graph_id, ) packages_by_id = {source.package_id: source for source in job_packages} - materialization = self._materialize_repository_files( + materialization = await self._materialize_repository_files( job=job, + tenant_id=tenant_id, repository_files_dir=repository_files_dir, job_packages=job_packages, packages_by_id=packages_by_id, @@ -86,12 +95,21 @@ async def prepare( warnings=tuple(hydration_warnings), ) ) - write_sources_index( - job_root=job_root, - knowledge_graph_id=job.knowledge_graph_id, - job_packages=job_packages, - materialization=materialization, - ) + if job.job_set_name == MAINTENANCE_JOB_SET_NAME and job.target_files: + write_maintenance_sources_index( + job_root=job_root, + knowledge_graph_id=job.knowledge_graph_id, + job_packages=job_packages, + target_files=job.target_files, + materialization=materialization, + ) + else: + write_sources_index( + job_root=job_root, + knowledge_graph_id=job.knowledge_graph_id, + job_packages=job_packages, + materialization=materialization, + ) prepare_agentic_ci_workspace( job_root, container_run_uid=self._settings.container_run_uid, @@ -148,14 +166,40 @@ async def _build_target_instances_context( instances=job.target_instances, ) - def _materialize_repository_files( + async def _materialize_repository_files( self, *, job: ExtractionJobRecord, + tenant_id: str, repository_files_dir: Path, job_packages: tuple[PreparedJobPackageSource, ...], packages_by_id: dict[str, PreparedJobPackageSource], ) -> RepositoryFilesMaterializationResult: + if job.job_set_name == MAINTENANCE_JOB_SET_NAME and job.target_files: + baseline_fetcher = MaintenanceBaselineContentFetcher( + session=self._prepared_job_package_reader.session, + tenant_id=tenant_id, + ) + + async def fetch_baseline_content( + data_source_id: str, + path: str, + ref: str, + ) -> bytes | None: + return await baseline_fetcher.fetch_file( + data_source_id=data_source_id, + path=path, + ref=ref, + ) + + return await materialize_maintenance_target_files( + repository_files_dir=repository_files_dir, + job_package_work_dir=self._job_package_work_dir, + target_files=job.target_files, + packages_by_id=packages_by_id, + fetch_baseline_content=fetch_baseline_content, + ) + if job.target_files: return materialize_target_files( repository_files_dir=repository_files_dir, diff --git a/src/api/extraction/infrastructure/maintenance_job_prompt.py b/src/api/extraction/infrastructure/maintenance_job_prompt.py index 4e32c3eb1..28c21058b 100644 --- a/src/api/extraction/infrastructure/maintenance_job_prompt.py +++ b/src/api/extraction/infrastructure/maintenance_job_prompt.py @@ -20,9 +20,10 @@ def build_maintenance_job_prompt(*, job: ExtractionJobRecord) -> str: return ( f"{base}\n\n" "## Maintenance objective\n" - "These repository files changed since the last extraction baseline. Use the diff " - "sections above (when present) and the materialized files under repository-files/ " - "to update existing graph instances and relationships. Do not limit updates to " - "only the files' local entities — reconcile downstream references across the " - "entire knowledge graph schema." + "These repository files changed since the last extraction baseline. Compare the " + "baseline snapshot (last successful extraction commit) with the HEAD snapshot " + "(current branch tip). Read unified diffs under repository-files/diffs/ when " + "present. Use that evidence plus the live graph to update existing instances and " + "relationships. Do not limit updates to only the files' local entities — reconcile " + "downstream references across the entire knowledge graph schema." ) diff --git a/src/api/extraction/infrastructure/maintenance_repository_files.py b/src/api/extraction/infrastructure/maintenance_repository_files.py new file mode 100644 index 000000000..892d128f2 --- /dev/null +++ b/src/api/extraction/infrastructure/maintenance_repository_files.py @@ -0,0 +1,314 @@ +"""Materialize commit-scoped repository snapshots for maintenance extraction jobs.""" + +from __future__ import annotations + +import json +from collections.abc import Awaitable, Callable +from pathlib import Path +from typing import Any + +from extraction.domain.extraction_job import ExtractionTargetFile +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from extraction.infrastructure.extraction_job_repository_files import ( + RepositoryFilesMaterializationResult, +) +from shared_kernel.job_package.path_safety import validate_zip_entry_name +from shared_kernel.job_package.reader import JobPackageReader +from shared_kernel.job_package.value_objects import JobPackageId + +BaselineContentFetcher = Callable[[str, str, str], Awaitable[bytes | None]] + +_ADDED_STATUSES = frozenset({"added"}) +_REMOVED_STATUSES = frozenset({"removed"}) +_HEAD_STATUSES = frozenset({"added", "modified", "renamed", "changed", "copied"}) +_BASELINE_STATUSES = frozenset({"removed", "modified", "renamed", "changed", "copied"}) + + +def maintenance_head_path( + *, + repository_files_dir: Path, + head_commit: str, + repository_folder: str, + path: str, +) -> Path: + return repository_files_dir / head_commit / repository_folder / path + + +def maintenance_baseline_path( + *, + repository_files_dir: Path, + baseline_commit: str, + repository_folder: str, + path: str, +) -> Path: + return repository_files_dir / baseline_commit / repository_folder / path + + +def maintenance_diff_path( + *, + repository_files_dir: Path, + baseline_commit: str, + head_commit: str, + repository_folder: str, + path: str, +) -> Path: + return ( + repository_files_dir + / "diffs" + / f"{baseline_commit}..{head_commit}" + / repository_folder + / f"{path}.patch" + ) + + +def _write_bytes(path: Path, content: bytes) -> None: + validate_zip_entry_name(path.name) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_bytes(content) + + +def _write_text(path: Path, content: str) -> None: + _write_bytes(path, content.encode("utf-8")) + + +def _materialize_head_from_job_package( + *, + repository_files_dir: Path, + job_package_work_dir: Path, + target_file: ExtractionTargetFile, + source: PreparedJobPackageSource, +) -> bool: + head_commit = target_file.head_commit + if not head_commit: + return False + archive_path = job_package_work_dir / JobPackageId(value=source.package_id).archive_name() + if not archive_path.is_file(): + return False + reader = JobPackageReader(archive_path) + for change in reader.iter_changeset(): + if change.path != target_file.path or change.content_ref is None: + continue + validate_zip_entry_name(change.path) + output_path = maintenance_head_path( + repository_files_dir=repository_files_dir, + head_commit=head_commit, + repository_folder=source.repository_folder, + path=target_file.path, + ) + _write_bytes(output_path, reader.read_content(change.content_ref)) + return True + return False + + +async def materialize_maintenance_target_files( + *, + repository_files_dir: Path, + job_package_work_dir: Path, + target_files: tuple[ExtractionTargetFile, ...], + packages_by_id: dict[str, PreparedJobPackageSource], + fetch_baseline_content: BaselineContentFetcher | None = None, +) -> RepositoryFilesMaterializationResult: + """Write baseline/HEAD snapshots and unified diffs for maintenance jobs.""" + files_written = 0 + packages_missing: list[str] = [] + sample_paths: list[str] = [] + warnings: list[str] = [] + + for target_file in target_files: + source = packages_by_id.get(target_file.package_id) + if source is None: + warnings.append( + f"No prepared JobPackage for target file {target_file.repository_folder}/{target_file.path}" + ) + continue + + status = (target_file.change_status or "modified").lower() + baseline_commit = target_file.baseline_commit + head_commit = target_file.head_commit + data_source_id = target_file.data_source_id or source.data_source_id + + if status in _HEAD_STATUSES: + archive_path = job_package_work_dir / JobPackageId(value=source.package_id).archive_name() + if not archive_path.is_file(): + packages_missing.append(target_file.package_id) + elif head_commit and _materialize_head_from_job_package( + repository_files_dir=repository_files_dir, + job_package_work_dir=job_package_work_dir, + target_file=target_file, + source=source, + ): + files_written += 1 + sample_paths.append( + f"{head_commit}/{source.repository_folder}/{target_file.path}" + ) + elif head_commit: + warnings.append( + f"HEAD content missing in JobPackage for {source.repository_folder}/{target_file.path}" + ) + + if ( + status in _BASELINE_STATUSES + and baseline_commit + and fetch_baseline_content is not None + ): + baseline_bytes = await fetch_baseline_content( + data_source_id, + target_file.path, + baseline_commit, + ) + if baseline_bytes is not None: + baseline_path = maintenance_baseline_path( + repository_files_dir=repository_files_dir, + baseline_commit=baseline_commit, + repository_folder=source.repository_folder, + path=target_file.path, + ) + _write_bytes(baseline_path, baseline_bytes) + files_written += 1 + sample_paths.append( + f"{baseline_commit}/{source.repository_folder}/{target_file.path}" + ) + elif status in _REMOVED_STATUSES: + warnings.append( + f"Baseline content unavailable for removed file {source.repository_folder}/{target_file.path}" + ) + + if target_file.patch and baseline_commit and head_commit: + diff_path = maintenance_diff_path( + repository_files_dir=repository_files_dir, + baseline_commit=baseline_commit, + head_commit=head_commit, + repository_folder=source.repository_folder, + path=target_file.path, + ) + _write_text(diff_path, target_file.patch) + files_written += 1 + sample_paths.append( + f"diffs/{baseline_commit}..{head_commit}/{source.repository_folder}/{target_file.path}.patch" + ) + + return RepositoryFilesMaterializationResult( + files_written=files_written, + packages_requested=len({target_file.package_id for target_file in target_files}), + packages_found=len({target_file.package_id for target_file in target_files}) + - len(set(packages_missing)), + packages_missing=tuple(packages_missing), + sample_paths=tuple(sample_paths[:12]), + warnings=tuple(warnings), + ) + + +def write_maintenance_sources_index( + *, + job_root: Path, + knowledge_graph_id: str, + job_packages: tuple[PreparedJobPackageSource, ...], + target_files: tuple[ExtractionTargetFile, ...], + materialization: RepositoryFilesMaterializationResult, +) -> None: + """Write sources-index.json describing maintenance commit snapshot layout.""" + commits = sorted( + { + commit + for target_file in target_files + for commit in (target_file.baseline_commit, target_file.head_commit) + if commit + } + ) + diff_ranges = sorted( + { + f"{target_file.baseline_commit}..{target_file.head_commit}" + for target_file in target_files + if target_file.baseline_commit and target_file.head_commit + } + ) + sources = [] + for source in job_packages: + source_targets = [ + target_file + for target_file in target_files + if target_file.data_source_id == source.data_source_id + or ( + target_file.data_source_id is None + and target_file.package_id == source.package_id + ) + ] + head_commit = next( + (target.head_commit for target in source_targets if target.head_commit), + None, + ) + baseline_commit = next( + (target.baseline_commit for target in source_targets if target.baseline_commit), + None, + ) + sources.append( + { + "job_package_id": source.package_id, + "data_source_id": source.data_source_id, + "data_source_name": source.data_source_name, + "repository_folder": source.repository_folder, + "baseline_snapshot_root": ( + f"repository-files/{baseline_commit}/{source.repository_folder}" + if baseline_commit + else None + ), + "head_snapshot_root": ( + f"repository-files/{head_commit}/{source.repository_folder}" + if head_commit + else None + ), + } + ) + layout = { + "mode": "maintenance_commit_snapshots", + "description": ( + "Each changed file appears under repository-files/{commit_sha}/{repository_folder}/{path}. " + "Baseline copies use last_extraction_baseline_commit; HEAD copies use tracked branch tip. " + "Unified diffs live under repository-files/diffs/{baseline}..{head}/{repository_folder}/{path}.patch." + ), + "baseline_root_pattern": "repository-files/{baseline_commit}/{repository_folder}/", + "head_root_pattern": "repository-files/{head_commit}/{repository_folder}/", + "diff_root_pattern": "repository-files/diffs/{baseline_commit}..{head_commit}/{repository_folder}/", + "commits": commits, + "diff_ranges": diff_ranges, + "target_files": [ + { + **target_file.to_dict(), + "baseline_path": ( + f"repository-files/{target_file.baseline_commit}/" + f"{target_file.repository_folder}/{target_file.path}" + if target_file.baseline_commit + else None + ), + "head_path": ( + f"repository-files/{target_file.head_commit}/" + f"{target_file.repository_folder}/{target_file.path}" + if target_file.head_commit + else None + ), + "diff_path": ( + f"repository-files/diffs/{target_file.baseline_commit}..{target_file.head_commit}/" + f"{target_file.repository_folder}/{target_file.path}.patch" + if target_file.baseline_commit + and target_file.head_commit + and target_file.patch + else None + ), + } + for target_file in target_files + ], + } + (job_root / "sources-index.json").write_text( + json.dumps( + { + "version": 1, + "knowledge_graph_id": knowledge_graph_id, + "layout": layout, + "sources": sources, + "materialization": materialization.to_dict(), + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) diff --git a/src/api/extraction/infrastructure/prepared_job_package_reader.py b/src/api/extraction/infrastructure/prepared_job_package_reader.py index 489c431ab..7519e811d 100644 --- a/src/api/extraction/infrastructure/prepared_job_package_reader.py +++ b/src/api/extraction/infrastructure/prepared_job_package_reader.py @@ -25,6 +25,10 @@ def __init__( self._session = session self._job_package_work_dir = job_package_work_dir + @property + def session(self) -> AsyncSession: + return self._session + async def list_latest_for_knowledge_graph( self, *, knowledge_graph_id: str ) -> tuple[PreparedJobPackageSource, ...]: diff --git a/src/api/infrastructure/extraction_workload/maintenance_baseline_fetcher.py b/src/api/infrastructure/extraction_workload/maintenance_baseline_fetcher.py new file mode 100644 index 000000000..a6c7601ae --- /dev/null +++ b/src/api/infrastructure/extraction_workload/maintenance_baseline_fetcher.py @@ -0,0 +1,148 @@ +"""Fetch GitHub file content at a historical commit for maintenance jobs.""" + +from __future__ import annotations + +import base64 +import json +from urllib.parse import quote, urlparse + +import httpx +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +from infrastructure.settings import get_management_settings + + +class MaintenanceBaselineContentFetcher: + """Load baseline repository file bytes from GitHub at a specific ref.""" + + def __init__( + self, + *, + session: AsyncSession, + tenant_id: str, + http_client: httpx.AsyncClient | None = None, + ) -> None: + self._session = session + self._tenant_id = tenant_id + self._http_client = http_client + self._github_context_cache: dict[str, tuple[str, str, dict[str, str]]] = {} + + async def fetch_file( + self, + *, + data_source_id: str, + path: str, + ref: str, + ) -> bytes | None: + owner, repo, headers = await self._github_context_for_source(data_source_id) + encoded_path = quote(path, safe="") + url = f"https://api.github.com/repos/{owner}/{repo}/contents/{encoded_path}?ref={quote(ref, safe='')}" + client = self._http_client or httpx.AsyncClient(timeout=30.0) + try: + response = await client.get(url, headers=headers) + if response.status_code == 404: + return None + response.raise_for_status() + payload = response.json() + finally: + if self._http_client is None: + await client.aclose() + + if isinstance(payload, list): + return None + content = payload.get("content") + encoding = payload.get("encoding") + if not isinstance(content, str) or encoding != "base64": + return None + normalized = content.replace("\n", "") + try: + return base64.b64decode(normalized, validate=False) + except (ValueError, TypeError): + return None + + async def _github_context_for_source( + self, + data_source_id: str, + ) -> tuple[str, str, dict[str, str]]: + cached = self._github_context_cache.get(data_source_id) + if cached is not None: + return cached + + result = await self._session.execute( + text( + """ + SELECT adapter_type, connection_config, credentials_path + FROM data_sources + WHERE id = :data_source_id + """ + ), + {"data_source_id": data_source_id}, + ) + row = result.mappings().first() + if row is None: + raise ValueError(f"Data source not found: {data_source_id}") + if str(row["adapter_type"]) != "github": + raise ValueError(f"Baseline fetch supports GitHub sources only: {data_source_id}") + + connection_config = row["connection_config"] + if isinstance(connection_config, str): + connection_config = json.loads(connection_config) + owner, repo = _parse_github_connection_config(dict(connection_config or {})) + headers = await self._build_github_headers(row.get("credentials_path")) + context = (owner, repo, headers) + self._github_context_cache[data_source_id] = context + return context + + async def _build_github_headers(self, credentials_path: str | None) -> dict[str, str]: + headers = { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + if not credentials_path: + return headers + + from management.infrastructure.repositories.fernet_secret_store import FernetSecretStore + + mgmt_settings = get_management_settings() + encryption_keys = [ + key.strip() + for key in mgmt_settings.encryption_key.get_secret_value().split(",") + if key.strip() + ] + if not encryption_keys: + return headers + + credential_reader = FernetSecretStore( + session=self._session, + encryption_keys=encryption_keys, + ) + try: + credentials = await credential_reader.retrieve( + path=str(credentials_path), + tenant_id=self._tenant_id, + ) + except KeyError: + credentials = {} + token = credentials.get("token") or credentials.get("access_token") + if token: + headers["Authorization"] = f"Bearer {token}" + return headers + + +def _parse_github_connection_config(config: dict[str, str]) -> tuple[str, str]: + if "repo_url" in config: + parsed = urlparse(config["repo_url"]) + path_parts = [part for part in parsed.path.split("/") if part] + if len(path_parts) < 2: + raise ValueError("repo_url must include owner and repo") + owner = path_parts[0] + repo = path_parts[1].removesuffix(".git") + return owner, repo + + if "owner" in config and "repo" in config: + return config["owner"], config["repo"] + + raise ValueError( + "connection_config must include either 'repo_url' or 'owner'+'repo' keys" + ) diff --git a/src/api/infrastructure/management/maintenance_changed_files.py b/src/api/infrastructure/management/maintenance_changed_files.py index 6a1611275..4fc1a0719 100644 --- a/src/api/infrastructure/management/maintenance_changed_files.py +++ b/src/api/infrastructure/management/maintenance_changed_files.py @@ -55,6 +55,10 @@ async def collect_changed_maintenance_files( target = matched_by_path.get(path) if target is None: continue + baseline_commit = summary.baseline_commit + head_commit = summary.tracked_head_commit + if not baseline_commit or not head_commit: + continue changed.append( ChangedMaintenanceFile( data_source_id=data_source.id.value, @@ -62,6 +66,8 @@ async def collect_changed_maintenance_files( path=target.path, status=str(entry.get("status", "modified")), package_id=target.package_id, + baseline_commit=baseline_commit, + head_commit=head_commit, patch=( str(entry["patch"]) if entry.get("patch") is not None diff --git a/src/api/infrastructure/management/maintenance_job_materializer.py b/src/api/infrastructure/management/maintenance_job_materializer.py index 50eee3b97..2e4cc0d0f 100644 --- a/src/api/infrastructure/management/maintenance_job_materializer.py +++ b/src/api/infrastructure/management/maintenance_job_materializer.py @@ -31,6 +31,8 @@ class ChangedMaintenanceFile: path: str status: str package_id: str + baseline_commit: str + head_commit: str patch: str | None = None @@ -55,9 +57,14 @@ def _build_maintenance_description(changed_files: Sequence[ChangedMaintenanceFil [ "", "## Scope", - "Inspect the assigned files under repository-files/, read the live graph via " - "workload-graph-read helpers, and emit JSONL mutations that keep every affected " - "entity and relationship accurate.", + "Each changed file is materialized under repository-files/ using commit-first paths:", + "- Baseline snapshot (last extraction): repository-files/{baseline_commit}/{repo}/{path}", + "- Current HEAD snapshot: repository-files/{head_commit}/{repo}/{path}", + "- Unified diff (when available): repository-files/diffs/{baseline}..{head}/{repo}/{path}.patch", + "Read job-context.json target_files for per-file commits and diff paths. Use the " + "baseline copy to understand prior graph context and the HEAD copy for the updated " + "source of truth. Read the live graph via workload-graph-read helpers, and emit " + "JSONL mutations that keep every affected entity and relationship accurate.", ] ) return "\n".join(lines) @@ -83,6 +90,11 @@ def materialize_maintenance_jobs( path=item.path, repository_folder=item.repository_folder, package_id=item.package_id, + baseline_commit=item.baseline_commit, + head_commit=item.head_commit, + change_status=item.status, + patch=item.patch, + data_source_id=item.data_source_id, ) for item in batch ) diff --git a/src/api/infrastructure/management/maintenance_pipeline_service.py b/src/api/infrastructure/management/maintenance_pipeline_service.py index 107522a39..9e3778bd1 100644 --- a/src/api/infrastructure/management/maintenance_pipeline_service.py +++ b/src/api/infrastructure/management/maintenance_pipeline_service.py @@ -135,7 +135,7 @@ async def _trigger_for_kg( normalized_workers = max(1, int(worker_count)) if not data_sources: - run = self._record_run( + return await self._persist_recorded_run( kg=kg, run=KnowledgeGraphMaintenanceRunRecord( run_id=run_id, @@ -146,8 +146,6 @@ async def _trigger_for_kg( worker_count=normalized_workers, ), ) - await self._session.commit() - return run changed_sources = self._changed_sources(data_sources) needs_ingest = [ @@ -155,7 +153,7 @@ async def _trigger_for_kg( ] target_ids = tuple(ds.id.value for ds in data_sources) if not changed_sources: - run = self._record_run( + return await self._persist_recorded_run( kg=kg, run=KnowledgeGraphMaintenanceRunRecord( run_id=run_id, @@ -167,8 +165,6 @@ async def _trigger_for_kg( worker_count=normalized_workers, ), ) - await self._session.commit() - return run try: sync_run_ids: tuple[str, ...] = () @@ -190,7 +186,7 @@ async def _trigger_for_kg( "prepared source(s)" ) outcome = KnowledgeGraphMaintenanceRunOutcome.STARTED - run = self._record_run( + run = await self._persist_recorded_run( kg=kg, run=KnowledgeGraphMaintenanceRunRecord( run_id=run_id, @@ -203,7 +199,6 @@ async def _trigger_for_kg( worker_count=normalized_workers, ), ) - await self._session.commit() if not start_extraction: return run if needs_ingest: @@ -238,7 +233,7 @@ async def _trigger_for_kg( return advanced return run except Exception as exc: - run = self._record_run( + return await self._persist_recorded_run( kg=kg, run=KnowledgeGraphMaintenanceRunRecord( run_id=run_id, @@ -250,8 +245,6 @@ async def _trigger_for_kg( worker_count=normalized_workers, ), ) - await self._session.commit() - return run async def advance_pending_pipelines(self) -> int: """Advance in-flight maintenance pipelines for all knowledge graphs.""" @@ -423,6 +416,8 @@ async def _materialize_and_start_extraction( jobs=jobs, job_set_name=MAINTENANCE_JOB_SET_NAME, ) + await self._session.commit() + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) await orchestrator.start( tenant_id=tenant_id, @@ -603,6 +598,18 @@ def _record_run( kg.append_maintenance_run(run) return run + async def _persist_recorded_run( + self, + *, + kg: KnowledgeGraph, + run: KnowledgeGraphMaintenanceRunRecord, + ) -> KnowledgeGraphMaintenanceRunRecord: + """Append a maintenance run record and flush it to PostgreSQL.""" + self._record_run(kg=kg, run=run) + await self._kg_repo.save(kg) + await self._session.commit() + return run + def _replace_latest_run( self, *, diff --git a/src/api/tests/unit/extraction/infrastructure/test_maintenance_job_prompt.py b/src/api/tests/unit/extraction/infrastructure/test_maintenance_job_prompt.py new file mode 100644 index 000000000..31d3e8960 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_maintenance_job_prompt.py @@ -0,0 +1,44 @@ +"""Unit tests for maintenance extraction job prompts.""" + +from extraction.domain.extraction_job import ( + ExtractionJobRecord, + ExtractionJobStatus, + ExtractionTargetFile, +) +from extraction.infrastructure.maintenance_job_prompt import build_maintenance_job_prompt + + +def test_build_maintenance_job_prompt_documents_commit_layout() -> None: + job = ExtractionJobRecord( + id="job-row", + knowledge_graph_id="kg-1", + job_id="maintenance_batch_0001_abcd1234", + job_set_name="maintenance", + strategy="by_files", + status=ExtractionJobStatus.PENDING, + order_index=0, + description="Update graph for upstream changes.", + target_files=( + ExtractionTargetFile( + path="src/foo.go", + repository_folder="hyperfleet-api", + package_id="pkg-1", + baseline_commit="defc3afd", + head_commit="0b64088c", + change_status="modified", + patch="@@ diff @@", + ), + ), + ) + + prompt = build_maintenance_job_prompt(job=job) + + assert "repository-files/defc3afd/hyperfleet-api/src/foo.go" in prompt + assert "repository-files/0b64088c/hyperfleet-api/src/foo.go" in prompt + assert ( + "repository-files/diffs/defc3afd..0b64088c/hyperfleet-api/src/foo.go.patch" + in prompt + ) + assert "maintenance_commit_snapshots" not in prompt + assert "sources-index.json layout.target_files" in prompt + assert "Compare the baseline snapshot" in prompt diff --git a/src/api/tests/unit/extraction/infrastructure/test_maintenance_repository_files.py b/src/api/tests/unit/extraction/infrastructure/test_maintenance_repository_files.py new file mode 100644 index 000000000..4c4b57fc6 --- /dev/null +++ b/src/api/tests/unit/extraction/infrastructure/test_maintenance_repository_files.py @@ -0,0 +1,183 @@ +"""Unit tests for maintenance repository-files materialization.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from extraction.domain.extraction_job import ExtractionTargetFile +from extraction.domain.prepared_job_package_source import PreparedJobPackageSource +from extraction.infrastructure.maintenance_repository_files import ( + materialize_maintenance_target_files, + write_maintenance_sources_index, +) +from extraction.infrastructure.extraction_job_repository_files import ( + RepositoryFilesMaterializationResult, +) +from shared_kernel.job_package.builder import JobPackageBuilder +from shared_kernel.job_package.value_objects import ( + AdapterCheckpoint, + ChangeOperation, + ChangesetEntry, + ContentRef, + JobPackageId, + SyncMode, +) + + +def _source(*, package_id: str, folder: str = "hyperfleet-api") -> PreparedJobPackageSource: + return PreparedJobPackageSource( + package_id=package_id, + data_source_id="ds-1", + data_source_name="hyperfleet-api", + repository_folder=folder, + ) + + +def _build_package(work_dir: Path, package_id: str, path: str, content: bytes) -> None: + builder = JobPackageBuilder( + data_source_id="ds-1", + knowledge_graph_id="kg-1", + sync_mode=SyncMode.FULL_REFRESH, + package_id=JobPackageId(value=package_id), + ) + ref = builder.add_content(content) + builder.add_changeset_entry( + ChangesetEntry( + operation=ChangeOperation.ADD, + id="file-1", + type="io.kartograph.change.file", + path=path, + content_ref=ref, + content_type="text/plain", + metadata={}, + ) + ) + builder.set_checkpoint(AdapterCheckpoint(schema_version="1.0.0", data={"commit_sha": "0b64088c"})) + builder.build(work_dir) + + +def _target_file( + *, + path: str = "src/foo.go", + status: str = "modified", + patch: str | None = "@@ -1 +1 @@\n-old\n+new", +) -> ExtractionTargetFile: + return ExtractionTargetFile( + path=path, + repository_folder="hyperfleet-api", + package_id="01JTESTPACK0000000000000000", + baseline_commit="defc3afd", + head_commit="0b64088c", + change_status=status, + patch=patch, + data_source_id="ds-1", + ) + + +@pytest.mark.asyncio +async def test_materialize_maintenance_target_files_writes_commit_scoped_paths( + tmp_path: Path, +) -> None: + package_id = "01JTESTPACK0000000000000000" + _build_package(tmp_path, package_id, "src/foo.go", b"package foo\n") + repo_dir = tmp_path / "repository-files" + + async def fetch_baseline(_data_source_id: str, path: str, ref: str) -> bytes | None: + assert path == "src/foo.go" + assert ref == "defc3afd" + return b"package foo_old\n" + + result = await materialize_maintenance_target_files( + repository_files_dir=repo_dir, + job_package_work_dir=tmp_path, + target_files=(_target_file(),), + packages_by_id={package_id: _source(package_id=package_id)}, + fetch_baseline_content=fetch_baseline, + ) + + head_path = repo_dir / "0b64088c" / "hyperfleet-api" / "src/foo.go" + baseline_path = repo_dir / "defc3afd" / "hyperfleet-api" / "src/foo.go" + diff_path = repo_dir / "diffs" / "defc3afd..0b64088c" / "hyperfleet-api" / "src/foo.go.patch" + + assert result.files_written == 3 + assert head_path.read_text(encoding="utf-8") == "package foo\n" + assert baseline_path.read_text(encoding="utf-8") == "package foo_old\n" + assert "@@ -1 +1 @@" in diff_path.read_text(encoding="utf-8") + + +@pytest.mark.asyncio +async def test_materialize_maintenance_target_files_added_skips_baseline_fetch( + tmp_path: Path, +) -> None: + package_id = "01JTESTPACK0000000000000000" + _build_package(tmp_path, package_id, "src/new.go", b"package new\n") + repo_dir = tmp_path / "repository-files" + fetch_called = False + + async def fetch_baseline(_data_source_id: str, _path: str, _ref: str) -> bytes | None: + nonlocal fetch_called + fetch_called = True + return b"unexpected" + + await materialize_maintenance_target_files( + repository_files_dir=repo_dir, + job_package_work_dir=tmp_path, + target_files=( + _target_file(path="src/new.go", status="added", patch=None), + ), + packages_by_id={package_id: _source(package_id=package_id)}, + fetch_baseline_content=fetch_baseline, + ) + + assert fetch_called is False + assert (repo_dir / "0b64088c" / "hyperfleet-api" / "src/new.go").is_file() + + +@pytest.mark.asyncio +async def test_materialize_maintenance_target_files_removed_writes_baseline_only( + tmp_path: Path, +) -> None: + repo_dir = tmp_path / "repository-files" + + async def fetch_baseline(_data_source_id: str, path: str, ref: str) -> bytes | None: + assert path == "src/removed.go" + assert ref == "defc3afd" + return b"package removed\n" + + await materialize_maintenance_target_files( + repository_files_dir=repo_dir, + job_package_work_dir=tmp_path, + target_files=( + _target_file(path="src/removed.go", status="removed", patch="deleted"), + ), + packages_by_id={ + "01JTESTPACK0000000000000000": _source( + package_id="01JTESTPACK0000000000000000" + ) + }, + fetch_baseline_content=fetch_baseline, + ) + + assert not (repo_dir / "0b64088c" / "hyperfleet-api" / "src/removed.go").exists() + assert ( + repo_dir / "defc3afd" / "hyperfleet-api" / "src/removed.go" + ).read_text(encoding="utf-8") == "package removed\n" + + +def test_write_maintenance_sources_index_documents_layout(tmp_path: Path) -> None: + target = _target_file() + write_maintenance_sources_index( + job_root=tmp_path, + knowledge_graph_id="kg-1", + job_packages=(_source(package_id=target.package_id),), + target_files=(target,), + materialization=RepositoryFilesMaterializationResult(files_written=3), + ) + + payload = (tmp_path / "sources-index.json").read_text(encoding="utf-8") + assert "maintenance_commit_snapshots" in payload + assert "repository-files/defc3afd/hyperfleet-api/src/foo.go" in payload + assert "repository-files/0b64088c/hyperfleet-api/src/foo.go" in payload + assert "repository-files/diffs/defc3afd..0b64088c/hyperfleet-api/src/foo.go.patch" in payload diff --git a/src/api/tests/unit/infrastructure/management/test_maintenance_job_materializer.py b/src/api/tests/unit/infrastructure/management/test_maintenance_job_materializer.py index 4061ce672..fc1900630 100644 --- a/src/api/tests/unit/infrastructure/management/test_maintenance_job_materializer.py +++ b/src/api/tests/unit/infrastructure/management/test_maintenance_job_materializer.py @@ -9,13 +9,22 @@ ) -def _changed(path: str, *, folder: str = "repo-a", status: str = "modified") -> ChangedMaintenanceFile: +def _changed( + path: str, + *, + folder: str = "repo-a", + status: str = "modified", + baseline: str = "defc3afd", + head: str = "0b64088c", +) -> ChangedMaintenanceFile: return ChangedMaintenanceFile( data_source_id="ds-a", repository_folder=folder, path=path, status=status, package_id="pkg-a", + baseline_commit=baseline, + head_commit=head, patch=f"diff for {path}", ) @@ -38,6 +47,9 @@ def test_materialize_maintenance_jobs_batches_across_sources() -> None: assert all(job.strategy == "by_files" for job in jobs) assert sum(len(job.target_files) for job in jobs) == 15 assert "diff for a.txt" in jobs[0].description + assert jobs[0].target_files[0].baseline_commit == "defc3afd" + assert jobs[0].target_files[0].head_commit == "0b64088c" + assert jobs[0].target_files[0].change_status == "modified" def test_materialize_maintenance_jobs_returns_empty_for_no_changes() -> None: diff --git a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py index d4c7448e9..edb5549e7 100644 --- a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py +++ b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py @@ -308,6 +308,138 @@ async def test_trigger_waits_for_ingest_before_materializing( materialize.assert_awaited_once() +@pytest.mark.asyncio +async def test_trigger_persists_run_history_before_materializing( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + ds = _make_ds(ds_id="ds-prepared", kg_id=kg.id.value, tenant_id=kg.tenant_id) + ds.clone_head_commit = ds.tracked_branch_head_commit + ds.last_prepared_commit = ds.tracked_branch_head_commit + ds_repo.seed(ds) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + svc = _service( + mock_session=mock_session, + session_factory=session_factory, + kg_repo=kg_repo, + ds_repo=ds_repo, + sync_run_repo=sync_run_repo, + authz=authz, + ) + expected = KnowledgeGraphMaintenanceRunRecord( + run_id="run-materialized", + triggered_at=datetime.now(UTC), + outcome=KnowledgeGraphMaintenanceRunOutcome.EXTRACTION_STARTED, + message="Materialized 3 maintenance job(s) and started extraction workers", + jobs_materialized=3, + ) + + with patch.object( + svc, + "_materialize_and_start_extraction", + AsyncMock(return_value=expected), + ) as materialize: + run = await svc.trigger( + user_id="user-1", + kg_id=kg.id.value, + start_extraction=True, + ) + + stored = await kg_repo.get_by_id(kg.id) + assert stored is not None + assert len(stored.maintenance_run_history) == 1 + assert stored.maintenance_run_history[-1].outcome in { + KnowledgeGraphMaintenanceRunOutcome.STARTED, + KnowledgeGraphMaintenanceRunOutcome.EXTRACTION_STARTED, + } + materialize.assert_awaited_once() + assert run.outcome == KnowledgeGraphMaintenanceRunOutcome.EXTRACTION_STARTED + + +@pytest.mark.asyncio +async def test_materialize_commits_jobs_before_starting_orchestrator( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + now = datetime.now(UTC) + kg.maintenance_run_history = ( + KnowledgeGraphMaintenanceRunRecord( + run_id="run-1", + triggered_at=now, + outcome=KnowledgeGraphMaintenanceRunOutcome.STARTED, + target_data_source_ids=("ds-prepared",), + files_per_job=2, + worker_count=4, + ), + ) + kg_repo.seed(kg) + ds = _make_ds(ds_id="ds-prepared", kg_id=kg.id.value, tenant_id=kg.tenant_id) + ds.clone_head_commit = ds.tracked_branch_head_commit + ds.last_prepared_commit = ds.tracked_branch_head_commit + ds_repo.seed(ds) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + job_repo = MagicMock() + job_repo.sync_maintenance_pending_jobs = AsyncMock(return_value=2) + svc = MaintenancePipelineService( + session=mock_session, + session_factory=session_factory, + knowledge_graph_repository=kg_repo, + data_source_repository=ds_repo, + sync_run_repository=sync_run_repo, + extraction_job_repository=job_repo, + authorization=authz, + tenant_id=kg.tenant_id, + diff_summary_service_factory=lambda _tenant: MagicMock(), + ) + + call_order: list[str] = [] + + async def track_commit() -> None: + call_order.append("commit") + + job_repo.sync_maintenance_pending_jobs = AsyncMock( + side_effect=lambda **_kwargs: call_order.append("sync") or 2, + ) + mock_session.commit = AsyncMock(side_effect=track_commit) + + orchestrator = MagicMock() + + async def track_start(**_kwargs) -> None: + call_order.append("start") + + orchestrator.start = AsyncMock(side_effect=track_start) + + with ( + patch( + "infrastructure.management.maintenance_pipeline_service.collect_changed_maintenance_files", + AsyncMock(return_value=[MagicMock()]), + ), + patch( + "infrastructure.management.maintenance_pipeline_service.materialize_maintenance_jobs", + return_value=[MagicMock()], + ), + patch( + "infrastructure.management.maintenance_pipeline_service.get_extraction_run_orchestrator", + return_value=orchestrator, + ), + patch( + "infrastructure.management.maintenance_pipeline_service.SqlPreparedJobPackageReader", + ) as reader_cls, + ): + reader_cls.return_value.list_latest_for_knowledge_graph = AsyncMock(return_value=()) + await svc._materialize_and_start_extraction( + kg_id=kg.id.value, + tenant_id=kg.tenant_id, + ) + + assert call_order.index("sync") < call_order.index("commit") + assert call_order.index("commit") < call_order.index("start") + orchestrator.start.assert_awaited_once() + + @pytest.mark.asyncio async def test_advance_marks_ingest_failed_when_sync_fails( mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz From 39dd04608211a50ccce6f8ede776401d2bda5043 Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sat, 20 Jun 2026 00:24:51 -0400 Subject: [PATCH 150/153] fix(maintenance): improve GitHub auth errors for diff collection Load data-source credentials by source tenant, fail fast when tokens are missing, and translate GitHub 401/403 into actionable maintenance errors with clearer pipeline phase messages. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../maintenance_baseline_fetcher.py | 33 +++---- .../maintenance_pipeline_service.py | 5 +- .../git_commit_reference_service.py | 42 +++++---- .../git_diff_summary_service.py | 42 +++++---- .../infrastructure/github_source_auth.py | 90 +++++++++++++++++++ .../test_git_commit_reference_service.py | 13 +-- .../test_git_diff_summary_service.py | 90 ++++++++++++++++++- 7 files changed, 256 insertions(+), 59 deletions(-) create mode 100644 src/api/management/infrastructure/github_source_auth.py diff --git a/src/api/infrastructure/extraction_workload/maintenance_baseline_fetcher.py b/src/api/infrastructure/extraction_workload/maintenance_baseline_fetcher.py index a6c7601ae..ba8f5f7c9 100644 --- a/src/api/infrastructure/extraction_workload/maintenance_baseline_fetcher.py +++ b/src/api/infrastructure/extraction_workload/maintenance_baseline_fetcher.py @@ -72,7 +72,7 @@ async def _github_context_for_source( result = await self._session.execute( text( """ - SELECT adapter_type, connection_config, credentials_path + SELECT adapter_type, connection_config, credentials_path, tenant_id FROM data_sources WHERE id = :data_source_id """ @@ -89,18 +89,24 @@ async def _github_context_for_source( if isinstance(connection_config, str): connection_config = json.loads(connection_config) owner, repo = _parse_github_connection_config(dict(connection_config or {})) - headers = await self._build_github_headers(row.get("credentials_path")) + headers = await self._build_github_headers( + credentials_path=row.get("credentials_path"), + tenant_id=str(row["tenant_id"] or self._tenant_id), + ) context = (owner, repo, headers) self._github_context_cache[data_source_id] = context return context - async def _build_github_headers(self, credentials_path: str | None) -> dict[str, str]: - headers = { - "Accept": "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - } - if not credentials_path: - return headers + async def _build_github_headers( + self, + *, + credentials_path: str | None, + tenant_id: str, + ) -> dict[str, str]: + from management.infrastructure.github_source_auth import github_api_headers + + if not credentials_path or not tenant_id.strip(): + return github_api_headers({}) from management.infrastructure.repositories.fernet_secret_store import FernetSecretStore @@ -111,7 +117,7 @@ async def _build_github_headers(self, credentials_path: str | None) -> dict[str, if key.strip() ] if not encryption_keys: - return headers + return github_api_headers({}) credential_reader = FernetSecretStore( session=self._session, @@ -120,14 +126,11 @@ async def _build_github_headers(self, credentials_path: str | None) -> dict[str, try: credentials = await credential_reader.retrieve( path=str(credentials_path), - tenant_id=self._tenant_id, + tenant_id=tenant_id, ) except KeyError: credentials = {} - token = credentials.get("token") or credentials.get("access_token") - if token: - headers["Authorization"] = f"Bearer {token}" - return headers + return github_api_headers(credentials) def _parse_github_connection_config(config: dict[str, str]) -> tuple[str, str]: diff --git a/src/api/infrastructure/management/maintenance_pipeline_service.py b/src/api/infrastructure/management/maintenance_pipeline_service.py index 9e3778bd1..f51c73885 100644 --- a/src/api/infrastructure/management/maintenance_pipeline_service.py +++ b/src/api/infrastructure/management/maintenance_pipeline_service.py @@ -167,8 +167,10 @@ async def _trigger_for_kg( ) try: + phase = "start maintenance pipeline" sync_run_ids: tuple[str, ...] = () if needs_ingest: + phase = "launch maintenance ingest" sync_run_ids = await self._launch_ingest_only_syncs( changed_sources=needs_ingest, requested_by=requested_by, @@ -225,6 +227,7 @@ async def _trigger_for_kg( outcome=KnowledgeGraphMaintenanceRunOutcome.LAUNCH_FAILED, message="Maintenance ingest finished in an unexpected state", ) + phase = "collect changed files and materialize maintenance jobs" advanced = await self._materialize_and_start_extraction( kg_id=kg_id, tenant_id=kg.tenant_id, @@ -239,7 +242,7 @@ async def _trigger_for_kg( run_id=run_id, triggered_at=now, outcome=KnowledgeGraphMaintenanceRunOutcome.LAUNCH_FAILED, - message=f"Failed to launch maintenance ingest: {exc}", + message=f"Failed to {phase}: {exc}", target_data_source_ids=tuple(ds.id.value for ds in changed_sources), files_per_job=normalized_files_per_job, worker_count=normalized_workers, diff --git a/src/api/management/infrastructure/git_commit_reference_service.py b/src/api/management/infrastructure/git_commit_reference_service.py index b2ddcab8b..3b5292710 100644 --- a/src/api/management/infrastructure/git_commit_reference_service.py +++ b/src/api/management/infrastructure/git_commit_reference_service.py @@ -7,6 +7,12 @@ import httpx from management.domain.aggregates import DataSource +from management.infrastructure.github_source_auth import ( + github_api_headers, + load_github_credentials, + raise_for_github_http_error, + require_github_token, +) from shared_kernel.credential_reader import ICredentialReader from shared_kernel.datasource_types import DataSourceAdapterType @@ -57,23 +63,17 @@ async def resolve_tracked_head_commit(self, data_source: DataSource) -> str | No data_source.connection_config ) - credentials: dict[str, str] = {} - if data_source.credentials_path: - try: - credentials = await self._credential_reader.retrieve( - path=data_source.credentials_path, - tenant_id=self._tenant_id, - ) - except KeyError: - credentials = {} - - headers = { - "Accept": "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - } - token = credentials.get("token") or credentials.get("access_token") - if token: - headers["Authorization"] = f"Bearer {token}" + credentials = await load_github_credentials( + credential_reader=self._credential_reader, + data_source=data_source, + ) + require_github_token( + data_source=data_source, + credentials=credentials, + owner=owner, + repo=repo, + ) + headers = github_api_headers(credentials) url = f"https://api.github.com/repos/{owner}/{repo}/branches/{branch}" client = self._http_client or httpx.AsyncClient(timeout=20.0) @@ -81,6 +81,14 @@ async def resolve_tracked_head_commit(self, data_source: DataSource) -> str | No response = await client.get(url, headers=headers) response.raise_for_status() payload = response.json() + except httpx.HTTPStatusError as exc: + raise_for_github_http_error( + exc=exc, + data_source=data_source, + owner=owner, + repo=repo, + operation="branch lookup", + ) finally: if self._http_client is None: await client.aclose() diff --git a/src/api/management/infrastructure/git_diff_summary_service.py b/src/api/management/infrastructure/git_diff_summary_service.py index 9868ad688..6fa3a3994 100644 --- a/src/api/management/infrastructure/git_diff_summary_service.py +++ b/src/api/management/infrastructure/git_diff_summary_service.py @@ -8,6 +8,12 @@ import httpx from management.domain.aggregates import DataSource +from management.infrastructure.github_source_auth import ( + github_api_headers, + load_github_credentials, + raise_for_github_http_error, + require_github_token, +) from shared_kernel.credential_reader import ICredentialReader from shared_kernel.datasource_types import DataSourceAdapterType @@ -86,23 +92,17 @@ async def build_summary( ) owner, repo = self._parse_github_connection_config(data_source.connection_config) - credentials: dict[str, str] = {} - if data_source.credentials_path: - try: - credentials = await self._credential_reader.retrieve( - path=data_source.credentials_path, - tenant_id=self._tenant_id, - ) - except KeyError: - credentials = {} - - headers = { - "Accept": "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - } - token = credentials.get("token") or credentials.get("access_token") - if token: - headers["Authorization"] = f"Bearer {token}" + credentials = await load_github_credentials( + credential_reader=self._credential_reader, + data_source=data_source, + ) + require_github_token( + data_source=data_source, + credentials=credentials, + owner=owner, + repo=repo, + ) + headers = github_api_headers(credentials) url = f"https://api.github.com/repos/{owner}/{repo}/compare/{baseline}...{tracked}" client = self._http_client or httpx.AsyncClient(timeout=30.0) @@ -110,6 +110,14 @@ async def build_summary( response = await client.get(url, headers=headers) response.raise_for_status() payload = response.json() + except httpx.HTTPStatusError as exc: + raise_for_github_http_error( + exc=exc, + data_source=data_source, + owner=owner, + repo=repo, + operation="compare", + ) finally: if self._http_client is None: await client.aclose() diff --git a/src/api/management/infrastructure/github_source_auth.py b/src/api/management/infrastructure/github_source_auth.py new file mode 100644 index 000000000..98f1df12a --- /dev/null +++ b/src/api/management/infrastructure/github_source_auth.py @@ -0,0 +1,90 @@ +"""Shared GitHub credential and request header helpers for Management services.""" + +from __future__ import annotations + +import httpx + +from management.domain.aggregates import DataSource +from shared_kernel.credential_reader import ICredentialReader + +_GITHUB_USER_AGENT = "Kartograph-GitHub/1.0" + + +async def load_github_credentials( + *, + credential_reader: ICredentialReader, + data_source: DataSource, +) -> dict[str, str]: + """Load decrypted GitHub credentials scoped to the data source tenant.""" + if not data_source.credentials_path: + return {} + try: + return await credential_reader.retrieve( + path=data_source.credentials_path, + tenant_id=data_source.tenant_id, + ) + except KeyError as exc: + raise ValueError( + f"GitHub credentials not found for data source {data_source.name!r}. " + "Re-save the repository access token on the data source and try again." + ) from exc + + +def github_api_headers(credentials: dict[str, str]) -> dict[str, str]: + """Build standard GitHub REST API headers.""" + headers = { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + "User-Agent": _GITHUB_USER_AGENT, + } + token = credentials.get("token") or credentials.get("access_token") + if token: + headers["Authorization"] = f"Bearer {token}" + return headers + + +def require_github_token( + *, + data_source: DataSource, + credentials: dict[str, str], + owner: str, + repo: str, +) -> None: + """Fail fast when a private GitHub call would run without credentials.""" + if credentials.get("token") or credentials.get("access_token"): + return + if data_source.credentials_path: + raise ValueError( + f"GitHub credentials for {data_source.name!r} ({owner}/{repo}) are empty. " + "Update the data source access token and try again." + ) + raise ValueError( + f"GitHub data source {data_source.name!r} ({owner}/{repo}) has no access token " + "configured. Add credentials before running maintenance." + ) + + +def raise_for_github_http_error( + *, + exc: httpx.HTTPStatusError, + data_source: DataSource, + owner: str, + repo: str, + operation: str, +) -> None: + """Translate GitHub HTTP failures into actionable maintenance errors.""" + status = exc.response.status_code + if status in {401, 403}: + raise ValueError( + f"GitHub {operation} failed for {data_source.name!r} ({owner}/{repo}): " + "access was denied. Update the data source access token and ensure it has " + "read access to this repository." + ) from exc + if status == 404: + raise ValueError( + f"GitHub {operation} failed for {data_source.name!r} ({owner}/{repo}): " + "repository or commit range was not found." + ) from exc + raise ValueError( + f"GitHub {operation} failed for {data_source.name!r} ({owner}/{repo}): {exc}" + ) from exc diff --git a/src/api/tests/unit/management/infrastructure/test_git_commit_reference_service.py b/src/api/tests/unit/management/infrastructure/test_git_commit_reference_service.py index 91a0cd85e..f5a1bcd13 100644 --- a/src/api/tests/unit/management/infrastructure/test_git_commit_reference_service.py +++ b/src/api/tests/unit/management/infrastructure/test_git_commit_reference_service.py @@ -92,15 +92,16 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response(status_code=200, json={"commit": {"sha": "head987"}}) client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) - service = GitCommitReferenceService( - credential_reader=_FakeCredentialReader(), - tenant_id="tenant-001", - http_client=client, - ) ds = _make_data_source( connection_config={ "repo_url": "https://github.com/openshift-hyperfleet/kartograph/tree/feature/test" - } + }, + credentials_path="datasource/ds-1/credentials", + ) + service = GitCommitReferenceService( + credential_reader=_FakeCredentialReader({"access_token": "secret-token"}), + tenant_id="tenant-001", + http_client=client, ) tracked = await service.resolve_tracked_head_commit(ds) diff --git a/src/api/tests/unit/management/infrastructure/test_git_diff_summary_service.py b/src/api/tests/unit/management/infrastructure/test_git_diff_summary_service.py index 3e871fd3d..db61ce3ce 100644 --- a/src/api/tests/unit/management/infrastructure/test_git_diff_summary_service.py +++ b/src/api/tests/unit/management/infrastructure/test_git_diff_summary_service.py @@ -14,10 +14,18 @@ class _FakeCredentialReader: - def __init__(self, credentials: dict[str, str] | None = None) -> None: + def __init__( + self, + credentials: dict[str, str] | None = None, + *, + missing: bool = False, + ) -> None: self._credentials = credentials or {} + self._missing = missing async def retrieve(self, path: str, tenant_id: str) -> dict[str, str]: + if self._missing: + raise KeyError(path) return dict(self._credentials) @@ -59,6 +67,66 @@ async def test_returns_empty_summary_when_commits_missing(): assert result.changed_files == () +@pytest.mark.asyncio +async def test_raises_when_github_credentials_missing(): + service = GitDiffSummaryService( + credential_reader=_FakeCredentialReader(missing=True), + tenant_id="tenant-001", + ) + ds = _make_data_source() + ds = DataSource( + id=ds.id, + knowledge_graph_id=ds.knowledge_graph_id, + tenant_id=ds.tenant_id, + name=ds.name, + adapter_type=ds.adapter_type, + connection_config=ds.connection_config, + credentials_path="datasource/test/credentials", + schedule=ds.schedule, + last_sync_at=ds.last_sync_at, + created_at=ds.created_at, + updated_at=ds.updated_at, + last_extraction_baseline_commit=ds.last_extraction_baseline_commit, + tracked_branch_head_commit=ds.tracked_branch_head_commit, + ) + + with pytest.raises(ValueError, match="credentials not found"): + await service.build_summary(data_source=ds, max_files=50) + + +@pytest.mark.asyncio +async def test_raises_when_github_rejects_credentials(): + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(status_code=401, json={"message": "Bad credentials"}) + + client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + service = GitDiffSummaryService( + credential_reader=_FakeCredentialReader({"access_token": "ghp_invalid"}), + tenant_id="tenant-001", + http_client=client, + ) + ds = _make_data_source() + ds = DataSource( + id=ds.id, + knowledge_graph_id=ds.knowledge_graph_id, + tenant_id=ds.tenant_id, + name=ds.name, + adapter_type=ds.adapter_type, + connection_config=ds.connection_config, + credentials_path="datasource/test/credentials", + schedule=ds.schedule, + last_sync_at=ds.last_sync_at, + created_at=ds.created_at, + updated_at=ds.updated_at, + last_extraction_baseline_commit=ds.last_extraction_baseline_commit, + tracked_branch_head_commit=ds.tracked_branch_head_commit, + ) + + with pytest.raises(ValueError, match="access was denied"): + await service.build_summary(data_source=ds, max_files=50) + await client.aclose() + + @pytest.mark.asyncio async def test_truncates_changed_files_when_max_exceeded(): """Changed-file list should truncate safely for large diffs.""" @@ -77,13 +145,29 @@ def handler(request: httpx.Request) -> httpx.Response: ) client = httpx.AsyncClient(transport=httpx.MockTransport(handler)) + ds = _make_data_source() + ds = DataSource( + id=ds.id, + knowledge_graph_id=ds.knowledge_graph_id, + tenant_id=ds.tenant_id, + name=ds.name, + adapter_type=ds.adapter_type, + connection_config=ds.connection_config, + credentials_path="datasource/test/credentials", + schedule=ds.schedule, + last_sync_at=ds.last_sync_at, + created_at=ds.created_at, + updated_at=ds.updated_at, + last_extraction_baseline_commit=ds.last_extraction_baseline_commit, + tracked_branch_head_commit=ds.tracked_branch_head_commit, + ) service = GitDiffSummaryService( - credential_reader=_FakeCredentialReader(), + credential_reader=_FakeCredentialReader({"access_token": "ghp_test"}), tenant_id="tenant-001", http_client=client, ) - result = await service.build_summary(data_source=_make_data_source(), max_files=2) + result = await service.build_summary(data_source=ds, max_files=2) await client.aclose() assert result.total_changed_files == 3 From 23829ed02c4ec5f36b06023764a70d9d1c6b818f Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sat, 20 Jun 2026 01:36:35 -0400 Subject: [PATCH 151/153] feat(maintenance): split run controls, regenerate jobs, and extend timeouts Add start-ready and regenerate-jobs endpoints so operators can resume workers and refresh pending queues without re-running ingest, mirror those actions in Maintain UI, remove redundant local prepare button, and raise extraction job timeout default to 1800s. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../workload_runtime_settings.py | 2 +- .../maintenance_pipeline_service.py | 157 ++++++++++-- .../services/knowledge_graph_service.py | 32 +++ .../management/ports/maintenance_pipeline.py | 16 ++ .../presentation/knowledge_graphs/models.py | 38 +++ .../presentation/knowledge_graphs/routes.py | 103 ++++++++ .../test_maintenance_pipeline_service.py | 231 ++++++++++++++++++ .../test_knowledge_graphs_routes.py | 27 ++ .../GraphMaintenanceWorkspace.vue | 179 +++++++++----- .../knowledge-graph-manage-workspace.test.ts | 7 +- 10 files changed, 704 insertions(+), 88 deletions(-) diff --git a/src/api/extraction/infrastructure/workload_runtime_settings.py b/src/api/extraction/infrastructure/workload_runtime_settings.py index 4c5af1fb7..6a2743309 100644 --- a/src/api/extraction/infrastructure/workload_runtime_settings.py +++ b/src/api/extraction/infrastructure/workload_runtime_settings.py @@ -41,7 +41,7 @@ class ExtractionWorkloadRuntimeSettings(BaseSettings): "Jobs use --network host, so docker service names like api:8000 will not resolve." ), ) - agentic_ci_timeout_seconds: int = Field(default=1200, ge=60, le=7200) + agentic_ci_timeout_seconds: int = Field(default=1800, ge=60, le=7200) extraction_job_work_dir: str = Field(default="/tmp/kartograph/extraction_jobs") sticky_command: tuple[str, ...] = Field( default=(), diff --git a/src/api/infrastructure/management/maintenance_pipeline_service.py b/src/api/infrastructure/management/maintenance_pipeline_service.py index f51c73885..385c0fed5 100644 --- a/src/api/infrastructure/management/maintenance_pipeline_service.py +++ b/src/api/infrastructure/management/maintenance_pipeline_service.py @@ -23,6 +23,11 @@ MAINTENANCE_JOB_SET_NAME, materialize_maintenance_jobs, ) + +_START_READY_NO_JOBS_MESSAGE = ( + "No maintenance jobs are ready to run. Queue maintenance jobs from changed " + "sources first." +) from management.domain.aggregates import DataSource, KnowledgeGraph from management.domain.entities.data_source_sync_run import DataSourceSyncRun from management.domain.value_objects import ( @@ -118,6 +123,106 @@ async def trigger( start_extraction=start_extraction, ) + async def start_ready_maintenance_jobs( + self, + *, + user_id: str, + kg_id: str, + worker_count: int = 8, + ) -> dict[str, int | str | bool]: + """Start or resume workers for already-queued pending maintenance jobs.""" + kg = await self._require_manage_kg(user_id=user_id, kg_id=kg_id) + normalized_workers = max(1, int(worker_count)) + counts = await self._job_repo.count_by_job_set(knowledge_graph_id=kg_id) + maintenance_counts = counts.get(MAINTENANCE_JOB_SET_NAME, {}) + pending_jobs = int(maintenance_counts.get("pending", 0)) + in_progress_jobs = int(maintenance_counts.get("in_progress", 0)) + + if pending_jobs <= 0 and in_progress_jobs <= 0: + raise ValueError(_START_READY_NO_JOBS_MESSAGE) + + orchestrator = get_extraction_run_orchestrator(session_factory=self._session_factory) + await orchestrator.start( + tenant_id=kg.tenant_id, + knowledge_graph_id=kg_id, + worker_count=normalized_workers, + ) + await self._session.commit() + + if pending_jobs > 0: + message = ( + f"Started {normalized_workers} worker(s) for " + f"{pending_jobs} ready maintenance job(s)" + ) + else: + message = ( + f"Resumed {normalized_workers} worker(s) while " + f"{in_progress_jobs} maintenance job(s) are in progress" + ) + return { + "success": True, + "message": message, + "pending_jobs": pending_jobs, + "in_progress_jobs": in_progress_jobs, + "worker_count": normalized_workers, + } + + async def regenerate_maintenance_jobs( + self, + *, + user_id: str, + kg_id: str, + files_per_job: int = 2, + ) -> dict[str, int | str | bool]: + """Replace pending maintenance jobs from the current baseline-to-head diff.""" + kg = await self._require_manage_kg(user_id=user_id, kg_id=kg_id) + normalized_files_per_job = max(1, int(files_per_job)) + data_sources = await self._ds_repo.find_by_knowledge_graph(kg_id) + changed_sources = self._changed_sources(data_sources) + if not changed_sources: + return { + "success": True, + "generated_jobs": 0, + "message": "No source commit delta detected across connected data sources", + } + + needs_ingest = [ + ds for ds in changed_sources if self._source_needs_maintenance_ingest(ds) + ] + if needs_ingest: + raise ValueError( + f"{len(needs_ingest)} changed source(s) still need ingest prepare. " + "Queue maintenance jobs to refresh JobPackages first." + ) + + jobs, changed_files = await self._build_maintenance_jobs( + kg_id=kg_id, + tenant_id=kg.tenant_id, + changed_sources=changed_sources, + files_per_job=normalized_files_per_job, + ) + if not jobs: + return { + "success": True, + "generated_jobs": 0, + "message": "No changed files were mapped to prepared JobPackages", + } + + await self._job_repo.sync_maintenance_pending_jobs( + knowledge_graph_id=kg_id, + jobs=jobs, + job_set_name=MAINTENANCE_JOB_SET_NAME, + ) + await self._session.commit() + return { + "success": True, + "generated_jobs": len(jobs), + "message": ( + f"Regenerated {len(jobs)} pending maintenance job(s) from " + f"{len(changed_files)} changed file(s)" + ), + } + async def _trigger_for_kg( self, *, @@ -380,25 +485,11 @@ async def _materialize_and_start_extraction( for ds in data_sources if ds.id.value in set(latest.target_data_source_ids) ] - runtime_settings = get_extraction_workload_runtime_settings() - prepared_reader = SqlPreparedJobPackageReader( - session=self._session, - job_package_work_dir=Path(runtime_settings.job_package_work_dir), - ) - job_packages = await prepared_reader.list_latest_for_knowledge_graph( - knowledge_graph_id=kg_id, - ) - diff_service = self._diff_summary_service_factory(tenant_id) - changed_files = await collect_changed_maintenance_files( - diff_summary_service=diff_service, - data_sources=changed_sources, - job_package_work_dir=Path(runtime_settings.job_package_work_dir), - job_packages=job_packages, - ) files_per_job = latest.files_per_job or 2 - jobs = materialize_maintenance_jobs( - knowledge_graph_id=kg_id, - changed_files=changed_files, + jobs, changed_files = await self._build_maintenance_jobs( + kg_id=kg_id, + tenant_id=tenant_id, + changed_sources=changed_sources, files_per_job=files_per_job, ) if not jobs: @@ -532,6 +623,36 @@ async def _require_manage_kg(self, *, user_id: str, kg_id: str) -> KnowledgeGrap raise KnowledgeGraphNotFoundError(f"Knowledge graph {kg_id} not found") return kg + async def _build_maintenance_jobs( + self, + *, + kg_id: str, + tenant_id: str, + changed_sources: list[DataSource], + files_per_job: int, + ) -> tuple[list, list]: + runtime_settings = get_extraction_workload_runtime_settings() + prepared_reader = SqlPreparedJobPackageReader( + session=self._session, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + ) + job_packages = await prepared_reader.list_latest_for_knowledge_graph( + knowledge_graph_id=kg_id, + ) + diff_service = self._diff_summary_service_factory(tenant_id) + changed_files = await collect_changed_maintenance_files( + diff_summary_service=diff_service, + data_sources=changed_sources, + job_package_work_dir=Path(runtime_settings.job_package_work_dir), + job_packages=job_packages, + ) + jobs = materialize_maintenance_jobs( + knowledge_graph_id=kg_id, + changed_files=changed_files, + files_per_job=max(1, int(files_per_job)), + ) + return jobs, changed_files + @staticmethod def _changed_sources(data_sources: list[DataSource]) -> list[DataSource]: return [ diff --git a/src/api/management/application/services/knowledge_graph_service.py b/src/api/management/application/services/knowledge_graph_service.py index 8d874d0e3..59c7d7302 100644 --- a/src/api/management/application/services/knowledge_graph_service.py +++ b/src/api/management/application/services/knowledge_graph_service.py @@ -238,6 +238,38 @@ async def trigger_maintenance_run( start_extraction=start_extraction, ) + async def start_ready_maintenance_jobs( + self, + *, + user_id: str, + kg_id: str, + worker_count: int = 8, + ) -> dict[str, int | str | bool]: + """Start workers for pending maintenance jobs without re-queueing work.""" + if self._maintenance_pipeline is None: + raise ValueError("Maintenance pipeline is not configured") + return await self._maintenance_pipeline.start_ready_maintenance_jobs( + user_id=user_id, + kg_id=kg_id, + worker_count=worker_count, + ) + + async def regenerate_maintenance_jobs( + self, + *, + user_id: str, + kg_id: str, + files_per_job: int = 2, + ) -> dict[str, int | str | bool]: + """Replace pending maintenance jobs from the current changed-file diff.""" + if self._maintenance_pipeline is None: + raise ValueError("Maintenance pipeline is not configured") + return await self._maintenance_pipeline.regenerate_maintenance_jobs( + user_id=user_id, + kg_id=kg_id, + files_per_job=files_per_job, + ) + async def _check_permission( self, user_id: str, diff --git a/src/api/management/ports/maintenance_pipeline.py b/src/api/management/ports/maintenance_pipeline.py index 5756cbc29..4d9c7bc9f 100644 --- a/src/api/management/ports/maintenance_pipeline.py +++ b/src/api/management/ports/maintenance_pipeline.py @@ -19,3 +19,19 @@ async def trigger( worker_count: int = 8, start_extraction: bool = True, ) -> KnowledgeGraphMaintenanceRunRecord: ... + + async def start_ready_maintenance_jobs( + self, + *, + user_id: str, + kg_id: str, + worker_count: int = 8, + ) -> dict[str, int | str | bool]: ... + + async def regenerate_maintenance_jobs( + self, + *, + user_id: str, + kg_id: str, + files_per_job: int = 2, + ) -> dict[str, int | str | bool]: ... diff --git a/src/api/management/presentation/knowledge_graphs/models.py b/src/api/management/presentation/knowledge_graphs/models.py index e1ff89a77..62ccfc72d 100644 --- a/src/api/management/presentation/knowledge_graphs/models.py +++ b/src/api/management/presentation/knowledge_graphs/models.py @@ -205,6 +205,44 @@ class MaintenanceScheduleUpsertRequest(BaseModel): ) +class MaintenanceStartReadyRequest(BaseModel): + """Request body for starting workers on queued maintenance jobs.""" + + worker_count: int = Field( + default=8, + ge=1, + description="Parallel OpenShell workers for ready maintenance jobs", + ) + + +class MaintenanceStartReadyResponse(BaseModel): + """Response after starting workers for ready maintenance jobs.""" + + success: bool + message: str + pending_jobs: int + in_progress_jobs: int + worker_count: int + + +class MaintenanceRegenerateJobsRequest(BaseModel): + """Request body for regenerating pending maintenance jobs.""" + + files_per_job: int = Field( + default=2, + ge=1, + description="Number of changed files batched into each maintenance job", + ) + + +class MaintenanceRegenerateJobsResponse(BaseModel): + """Response after regenerating pending maintenance jobs.""" + + success: bool + message: str + generated_jobs: int + + class MaintenanceRunTriggerRequest(BaseModel): """Request body for manual KG maintenance orchestration.""" diff --git a/src/api/management/presentation/knowledge_graphs/routes.py b/src/api/management/presentation/knowledge_graphs/routes.py index 871f1d45e..0d10c879f 100644 --- a/src/api/management/presentation/knowledge_graphs/routes.py +++ b/src/api/management/presentation/knowledge_graphs/routes.py @@ -26,6 +26,10 @@ MaintenanceRunListResponse, MaintenanceRunResponse, MaintenanceRunTriggerRequest, + MaintenanceRegenerateJobsRequest, + MaintenanceRegenerateJobsResponse, + MaintenanceStartReadyRequest, + MaintenanceStartReadyResponse, MaintenanceScheduleResponse, MaintenanceScheduleUpsertRequest, OntologyConfigRequest, @@ -203,6 +207,105 @@ async def trigger_knowledge_graph_maintenance_run( ) +@router.post( + "/knowledge-graphs/{kg_id}/maintenance-runs/start-ready", + response_model=MaintenanceStartReadyResponse, + status_code=status.HTTP_200_OK, + summary="Start workers for queued maintenance jobs", +) +async def start_ready_maintenance_jobs( + kg_id: str, + request: MaintenanceStartReadyRequest, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[KnowledgeGraphService, Depends(get_knowledge_graph_service)], +) -> MaintenanceStartReadyResponse: + """Start or resume extraction workers for pending maintenance jobs only.""" + try: + result = await service.start_ready_maintenance_jobs( + user_id=current_user.user_id.value, + kg_id=kg_id, + worker_count=request.worker_count, + ) + return MaintenanceStartReadyResponse( + success=bool(result.get("success")), + message=str(result.get("message") or ""), + pending_jobs=int(result.get("pending_jobs") or 0), + in_progress_jobs=int(result.get("in_progress_jobs") or 0), + worker_count=int(result.get("worker_count") or request.worker_count), + ) + except UnauthorizedError: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="You do not have permission to perform this action", + ) + except KnowledgeGraphNotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=str(e), + ) + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to start ready maintenance jobs", + ) + + +@router.post( + "/knowledge-graphs/{kg_id}/maintenance-runs/regenerate-jobs", + response_model=MaintenanceRegenerateJobsResponse, + status_code=status.HTTP_200_OK, + summary="Regenerate pending maintenance jobs from current diffs", +) +async def regenerate_maintenance_jobs( + kg_id: str, + request: MaintenanceRegenerateJobsRequest, + current_user: Annotated[CurrentUser, Depends(get_current_user)], + service: Annotated[KnowledgeGraphService, Depends(get_knowledge_graph_service)], +) -> MaintenanceRegenerateJobsResponse: + """Replace pending maintenance jobs using the current baseline-to-head diff.""" + try: + result = await service.regenerate_maintenance_jobs( + user_id=current_user.user_id.value, + kg_id=kg_id, + files_per_job=request.files_per_job, + ) + return MaintenanceRegenerateJobsResponse( + success=bool(result.get("success")), + message=str(result.get("message") or ""), + generated_jobs=int(result.get("generated_jobs") or 0), + ) + except UnauthorizedError: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="You do not have permission to perform this action", + ) + except KnowledgeGraphNotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=str(e), + ) + except RuntimeError as e: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=str(e), + ) + except Exception: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to regenerate maintenance jobs", + ) + + @router.get( "/knowledge-graphs", status_code=status.HTTP_200_OK, diff --git a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py index edb5549e7..d0426cc03 100644 --- a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py +++ b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py @@ -539,3 +539,234 @@ async def test_check_scheduled_triggers_due_knowledge_graph( assert triggered == 1 trigger_scheduled.assert_awaited_once() fake_repo.save.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_start_ready_maintenance_jobs_requires_queued_jobs( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + job_repo = MagicMock() + job_repo.count_by_job_set = AsyncMock(return_value={}) + svc = MaintenancePipelineService( + session=mock_session, + session_factory=session_factory, + knowledge_graph_repository=kg_repo, + data_source_repository=ds_repo, + sync_run_repository=sync_run_repo, + extraction_job_repository=job_repo, + authorization=authz, + tenant_id=kg.tenant_id, + diff_summary_service_factory=lambda _tenant: MagicMock(), + ) + + with pytest.raises(ValueError, match="No maintenance jobs are ready"): + await svc.start_ready_maintenance_jobs( + user_id="user-1", + kg_id=kg.id.value, + worker_count=4, + ) + + +@pytest.mark.asyncio +async def test_start_ready_maintenance_jobs_starts_workers_for_pending_jobs( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + job_repo = MagicMock() + job_repo.count_by_job_set = AsyncMock( + return_value={"maintenance": {"pending": 3, "in_progress": 0}} + ) + svc = MaintenancePipelineService( + session=mock_session, + session_factory=session_factory, + knowledge_graph_repository=kg_repo, + data_source_repository=ds_repo, + sync_run_repository=sync_run_repo, + extraction_job_repository=job_repo, + authorization=authz, + tenant_id=kg.tenant_id, + diff_summary_service_factory=lambda _tenant: MagicMock(), + ) + orchestrator = MagicMock() + orchestrator.start = AsyncMock() + + with patch( + "infrastructure.management.maintenance_pipeline_service.get_extraction_run_orchestrator", + return_value=orchestrator, + ): + result = await svc.start_ready_maintenance_jobs( + user_id="user-1", + kg_id=kg.id.value, + worker_count=4, + ) + + orchestrator.start.assert_awaited_once_with( + tenant_id=kg.tenant_id, + knowledge_graph_id=kg.id.value, + worker_count=4, + ) + assert result["pending_jobs"] == 3 + assert "Started 4 worker(s)" in str(result["message"]) + mock_session.commit.assert_awaited() + + +@pytest.mark.asyncio +async def test_start_ready_maintenance_jobs_resumes_when_jobs_in_progress( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + job_repo = MagicMock() + job_repo.count_by_job_set = AsyncMock( + return_value={"maintenance": {"pending": 0, "in_progress": 2}} + ) + svc = MaintenancePipelineService( + session=mock_session, + session_factory=session_factory, + knowledge_graph_repository=kg_repo, + data_source_repository=ds_repo, + sync_run_repository=sync_run_repo, + extraction_job_repository=job_repo, + authorization=authz, + tenant_id=kg.tenant_id, + diff_summary_service_factory=lambda _tenant: MagicMock(), + ) + orchestrator = MagicMock() + orchestrator.start = AsyncMock() + + with patch( + "infrastructure.management.maintenance_pipeline_service.get_extraction_run_orchestrator", + return_value=orchestrator, + ): + result = await svc.start_ready_maintenance_jobs( + user_id="user-1", + kg_id=kg.id.value, + worker_count=6, + ) + + orchestrator.start.assert_awaited_once() + assert result["in_progress_jobs"] == 2 + assert "Resumed 6 worker(s)" in str(result["message"]) + + +@pytest.mark.asyncio +async def test_regenerate_maintenance_jobs_replaces_pending_from_current_diff( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + ds = _make_ds(ds_id="ds-1", kg_id=kg.id.value, tenant_id=kg.tenant_id) + ds.clone_head_commit = ds.tracked_branch_head_commit + ds.last_prepared_commit = ds.tracked_branch_head_commit + ds_repo.seed(ds) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + job_repo = MagicMock() + job_repo.sync_maintenance_pending_jobs = AsyncMock(return_value=3) + svc = MaintenancePipelineService( + session=mock_session, + session_factory=session_factory, + knowledge_graph_repository=kg_repo, + data_source_repository=ds_repo, + sync_run_repository=sync_run_repo, + extraction_job_repository=job_repo, + authorization=authz, + tenant_id=kg.tenant_id, + diff_summary_service_factory=lambda _tenant: MagicMock(), + ) + + with ( + patch.object( + svc, + "_build_maintenance_jobs", + AsyncMock(return_value=([MagicMock(), MagicMock(), MagicMock()], [MagicMock()])), + ), + ): + result = await svc.regenerate_maintenance_jobs( + user_id="user-1", + kg_id=kg.id.value, + files_per_job=2, + ) + + job_repo.sync_maintenance_pending_jobs.assert_awaited_once() + mock_session.commit.assert_awaited() + assert result["generated_jobs"] == 3 + assert "Regenerated 3 pending maintenance job(s)" in str(result["message"]) + + +@pytest.mark.asyncio +async def test_regenerate_maintenance_jobs_requires_prepared_sources( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + ds = _make_ds(ds_id="ds-1", kg_id=kg.id.value, tenant_id=kg.tenant_id) + ds_repo.seed(ds) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + svc = _service( + mock_session=mock_session, + session_factory=session_factory, + kg_repo=kg_repo, + ds_repo=ds_repo, + sync_run_repo=sync_run_repo, + authz=authz, + tenant_id=kg.tenant_id, + ) + + with pytest.raises(ValueError, match="ingest prepare"): + await svc.regenerate_maintenance_jobs( + user_id="user-1", + kg_id=kg.id.value, + files_per_job=2, + ) + + +@pytest.mark.asyncio +async def test_regenerate_maintenance_jobs_blocks_while_jobs_running( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + ds = _make_ds(ds_id="ds-1", kg_id=kg.id.value, tenant_id=kg.tenant_id) + ds.clone_head_commit = ds.tracked_branch_head_commit + ds.last_prepared_commit = ds.tracked_branch_head_commit + ds_repo.seed(ds) + await _grant_kg_manage(authz, kg.id.value, "user-1") + + job_repo = MagicMock() + job_repo.sync_maintenance_pending_jobs = AsyncMock( + side_effect=RuntimeError("Cannot refresh maintenance jobs while 1 job(s) are running") + ) + svc = MaintenancePipelineService( + session=mock_session, + session_factory=session_factory, + knowledge_graph_repository=kg_repo, + data_source_repository=ds_repo, + sync_run_repository=sync_run_repo, + extraction_job_repository=job_repo, + authorization=authz, + tenant_id=kg.tenant_id, + diff_summary_service_factory=lambda _tenant: MagicMock(), + ) + + with patch.object( + svc, + "_build_maintenance_jobs", + AsyncMock(return_value=([MagicMock()], [MagicMock()])), + ): + with pytest.raises(RuntimeError, match="running"): + await svc.regenerate_maintenance_jobs( + user_id="user-1", + kg_id=kg.id.value, + files_per_job=2, + ) diff --git a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py index 050e2b24e..f22af30db 100644 --- a/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py +++ b/src/api/tests/unit/management/presentation/test_knowledge_graphs_routes.py @@ -495,6 +495,33 @@ def test_trigger_maintenance_run_returns_201( start_extraction=True, ) + def test_regenerate_maintenance_jobs_returns_200( + self, + test_client: TestClient, + mock_kg_service: AsyncMock, + sample_knowledge_graph: KnowledgeGraph, + mock_current_user: CurrentUser, + ) -> None: + mock_kg_service.regenerate_maintenance_jobs.return_value = { + "success": True, + "generated_jobs": 5, + "message": "Regenerated 5 pending maintenance job(s) from 10 changed file(s)", + } + + response = test_client.post( + f"/management/knowledge-graphs/{sample_knowledge_graph.id.value}/maintenance-runs/regenerate-jobs", + json={"files_per_job": 2}, + ) + + assert response.status_code == status.HTTP_200_OK + payload = response.json() + assert payload["generated_jobs"] == 5 + mock_kg_service.regenerate_maintenance_jobs.assert_called_once_with( + user_id=mock_current_user.user_id.value, + kg_id=sample_knowledge_graph.id.value, + files_per_job=2, + ) + class TestWorkspaceCommandsRoutes: """Tests for workspace validate/transition command endpoints.""" diff --git a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue index 3e8dc8128..dc32d9010 100644 --- a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue @@ -158,6 +158,7 @@ const resettingCompleted = ref(false) const resettingFailed = ref(false) const resettingAll = ref(false) const archivingCompleted = ref(false) +const regeneratingJobs = ref(false) const optimisticLiveUntilMs = ref<number | null>(null) const nowMs = ref(Date.now()) const lastStatusRefreshMs = ref<number | null>(null) @@ -178,7 +179,6 @@ const workers = ref(8) const filesPerJob = ref(2) const runControlsInitialized = ref(false) const checkingCommits = ref(false) -const updatingLocalCommits = ref(false) const runningMaintenance = ref(false) let refreshInterval: ReturnType<typeof setInterval> | null = null @@ -238,6 +238,12 @@ const readyJobsCount = computed(() => { const readyJobsAreEstimated = computed( () => pendingJobsCount.value === 0 && readyJobsCount.value > 0, ) +const canRunReadyMaintenanceJobs = computed( + () => pendingJobsCount.value > 0 || (inProgressJobsCount.value > 0 && !extractionRunLive.value), +) +const canQueueMaintenanceJobs = computed( + () => maintenanceReadySources.value.length > 0 && inProgressJobsCount.value === 0, +) const remainingJobsCount = computed(() => { const queued = pendingJobsCount.value + inProgressJobsCount.value if (queued > 0) return queued @@ -281,7 +287,7 @@ const recentJobsEmptyMessage = computed(() => { return 'Starting maintenance workers. Job events will appear as jobs are claimed and completed.' } if (recentJobEvents.value.filter((event) => event.jobSet === MAINTENANCE_JOB_SET).length === 0) { - return 'No maintenance job events yet. Run maintenance to materialize by-file jobs and start workers.' + return 'No maintenance job events yet. Queue maintenance jobs from changed sources, then run ready jobs to start workers.' } const filterLabel = RECENT_JOB_STATUS_FILTERS.find( (option) => option.value === recentJobStatusFilter.value, @@ -305,6 +311,23 @@ function resolveApiError(e: unknown): string { return err.message || 'Request failed' } +function resolveRegenerateFailureMessage(description: string): { title: string; hint?: string } { + const lower = description.toLowerCase() + if (lower.includes('in progress') || lower.includes('still running') || lower.includes('running')) { + return { + title: 'Cannot regenerate maintenance jobs yet', + hint: 'Wait for running jobs to finish, cancel them, or reset running jobs — then regenerate again.', + } + } + if (lower.includes('ingest prepare')) { + return { + title: 'Sources need ingest prepare first', + hint: 'Queue maintenance jobs to refresh JobPackages, then regenerate pending jobs.', + } + } + return { title: 'Regenerate failed', hint: description } +} + function formatWhen(value: string | null | undefined): string { if (!value) return '—' const date = new Date(value) @@ -526,6 +549,32 @@ async function archiveCompletedJobs() { } } +async function regenerateMaintenanceJobs() { + regeneratingJobs.value = true + try { + const res = await apiFetch<{ generated_jobs?: number; message?: string }>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/maintenance-runs/regenerate-jobs`, + { + method: 'POST', + body: { files_per_job: normalizedFilesPerJob.value }, + }, + ) + toast.success('Maintenance jobs synced', { + description: res.message || `Synced ${res.generated_jobs ?? 0} pending job(s).`, + }) + await refreshAll({ background: true }) + } catch (e: unknown) { + const description = resolveApiError(e) + const failure = resolveRegenerateFailureMessage(description) + toast.error(failure.title, { + description: failure.hint || description, + duration: 10000, + }) + } finally { + regeneratingJobs.value = false + } +} + async function refreshAll(options?: { background?: boolean }) { const background = options?.background ?? false if (background) refreshing.value = true @@ -572,47 +621,6 @@ async function checkForNewCommits() { } } -async function getLatestCommitLocally() { - const queue = sourcesNeedingPrepare.value - if (queue.length === 0) { - toast.message('Already up to date locally', { - description: 'No sources need ingestion prepare. Run check for new commits first if unsure.', - }) - return - } - updatingLocalCommits.value = true - try { - const results = await Promise.allSettled( - queue.map((ds) => - apiFetch(`/management/data-sources/${ds.id}/sync`, { - method: 'POST', - body: { mode: 'ingest_only' }, - }), - ), - ) - const failures = results.filter((result) => result.status === 'rejected') - await loadDataSources() - if (failures.length === queue.length) { - toast.error('Failed to update local commits', { - description: resolveApiError(failures[0]?.status === 'rejected' ? failures[0].reason : null), - }) - return - } - if (failures.length > 0) { - toast.warning( - `Started ${queue.length - failures.length} of ${queue.length} preparations`, - { description: 'Some sources could not be queued.' }, - ) - return - } - toast.success(`Preparing ${queue.length} data source${queue.length === 1 ? '' : 's'}`) - } catch (e: unknown) { - toast.error('Failed to get latest commit locally', { description: resolveApiError(e) }) - } finally { - updatingLocalCommits.value = false - } -} - async function saveSchedule() { const cron = dailyTimeToCron(scheduleTime.value) if (!cron) { @@ -645,6 +653,43 @@ async function saveSchedule() { } } +async function queueMaintenanceJobs() { + optimisticLiveUntilMs.value = Date.now() + 30000 + await runMaintenanceNow({ startExtraction: true }) + startFastAutoRefresh() +} + +async function runReadyMaintenanceJobs() { + runningMaintenance.value = true + const workerTotal = Math.min( + MAX_MAINTENANCE_WORKERS, + Math.max(1, Math.floor(Number(workers.value) || 1)), + ) + try { + const result = await apiFetch<{ + success: boolean + message: string + pending_jobs: number + in_progress_jobs: number + worker_count: number + }>( + `/management/knowledge-graphs/${encodeURIComponent(props.kgId)}/maintenance-runs/start-ready`, + { + method: 'POST', + body: { worker_count: workerTotal }, + }, + ) + optimisticLiveUntilMs.value = Date.now() + 30000 + toast.success('Ready maintenance jobs started', { description: result.message }) + startFastAutoRefresh() + await refreshAll({ background: true }) + } catch (e: unknown) { + toast.error('Failed to run ready maintenance jobs', { description: resolveApiError(e) }) + } finally { + runningMaintenance.value = false + } +} + async function runMaintenanceNow(options?: { startExtraction?: boolean }) { runningMaintenance.value = true const workerTotal = Math.min( @@ -684,13 +729,6 @@ async function runMaintenanceNow(options?: { startExtraction?: boolean }) { runningMaintenance.value = false } } - -async function runMaintenancePipeline() { - optimisticLiveUntilMs.value = Date.now() + 30000 - await runMaintenanceNow({ startExtraction: true }) - startFastAutoRefresh() -} - function startAutoRefresh() { if (refreshInterval) return refreshInterval = setInterval(() => { void refreshAll({ background: true }) }, 3000) @@ -776,22 +814,13 @@ onUnmounted(() => { <Button variant="outline" size="sm" - :disabled="checkingCommits || updatingLocalCommits || dataSources.length === 0" + :disabled="checkingCommits || dataSources.length === 0" @click="checkForNewCommits" > <Loader2 v-if="checkingCommits" class="mr-2 size-4 animate-spin" /> <RefreshCw v-else class="mr-2 size-4" /> Check for new commits </Button> - <Button - variant="outline" - size="sm" - :disabled="checkingCommits || updatingLocalCommits || dataSources.length === 0" - @click="getLatestCommitLocally" - > - <Loader2 v-if="updatingLocalCommits" class="mr-2 size-4 animate-spin" /> - Get latest commit locally - </Button> </div> </div> </CardHeader> @@ -880,7 +909,7 @@ onUnmounted(() => { <p class="mt-3 text-xs text-muted-foreground"> {{ maintenanceReadySources.length }} source(s) have commits ahead of the last job baseline · {{ totalChangedFiles }} changed file(s) detected · - {{ sourcesNeedingPrepare.length }} need local prepare + {{ sourcesNeedingPrepare.length }} will ingest on queue </p> </CardContent> </Card> @@ -890,11 +919,11 @@ onUnmounted(() => { <CardHeader> <CardTitle class="flex items-center gap-2 text-base"> <Play class="size-4 text-primary" /> - Run maintenance + Maintenance jobs </CardTitle> <CardDescription> - Materialize by-file maintenance jobs from changed sources, then run parallel workers - until the queue drains. Per-job results appear in Graph Writes History. + Queue by-file jobs from changed sources, then run ready jobs to start workers. + Per-job results appear in Graph Writes History. </CardDescription> </CardHeader> <CardContent class="space-y-4"> @@ -945,11 +974,18 @@ onUnmounted(() => { <div class="flex flex-wrap items-end gap-2"> <Button - :disabled="runningMaintenance || maintenanceReadySources.length === 0" - @click="runMaintenancePipeline" + variant="outline" + :disabled="runningMaintenance || !canQueueMaintenanceJobs" + @click="queueMaintenanceJobs" > <Loader2 v-if="runningMaintenance" class="mr-2 size-4 animate-spin" /> - Run maintenance + Queue maintenance jobs + </Button> + <Button + :disabled="runningMaintenance || !canRunReadyMaintenanceJobs" + @click="runReadyMaintenanceJobs" + > + Run ready maintenance jobs </Button> <Button size="sm" variant="outline" :disabled="pausingExtraction" @click="pauseExtractionWorkers"> Pause @@ -1198,6 +1234,15 @@ onUnmounted(() => { <Button size="sm" variant="outline" :disabled="resettingAll" @click="resetByKind('all')"> Reset All Jobs </Button> + <Button + size="sm" + variant="outline" + :disabled="regeneratingJobs" + @click="regenerateMaintenanceJobs" + > + <Settings class="mr-1.5 size-3.5" /> + Regenerate jobs + </Button> </div> <div class="rounded-lg border bg-muted/20 p-3 text-xs"> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index f84385bae..0b395640b 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -131,7 +131,6 @@ describe('KG-MANAGE-012b - maintain workspace commit and job controls', () => { it('labels new files section and exposes commit refresh actions', () => { expect(graphMaintenanceWorkspaceVue).toContain('New Files to Process') expect(graphMaintenanceWorkspaceVue).toContain('Check for new commits') - expect(graphMaintenanceWorkspaceVue).toContain('Get latest commit locally') expect(graphMaintenanceWorkspaceVue).toContain('last_extraction_baseline_commit') expect(graphMaintenanceWorkspaceVue).toContain('diff-summary') }) @@ -146,11 +145,15 @@ describe('KG-MANAGE-012b - maintain workspace commit and job controls', () => { it('explains by-file maintenance and hybrid run controls', () => { expect(graphMaintenanceWorkspaceVue).toContain('by-file') - expect(graphMaintenanceWorkspaceVue).toContain('Run maintenance') + expect(graphMaintenanceWorkspaceVue).toContain('Queue maintenance jobs') + expect(graphMaintenanceWorkspaceVue).toContain('Run ready maintenance jobs') + expect(graphMaintenanceWorkspaceVue).toContain('canRunReadyMaintenanceJobs') expect(graphMaintenanceWorkspaceVue).toContain('Schedule recurring maintenance jobs') expect(graphMaintenanceWorkspaceVue).toContain('Live maintenance activity') expect(graphMaintenanceWorkspaceVue).toContain('Job Status') expect(graphMaintenanceWorkspaceVue).toContain('Archive Completed') + expect(graphMaintenanceWorkspaceVue).toContain('Regenerate jobs') + expect(graphMaintenanceWorkspaceVue).toContain('regenerateMaintenanceJobs') expect(graphMaintenanceWorkspaceVue).not.toContain('Maintenance run history') expect(graphMaintenanceWorkspaceVue).not.toContain('Sync changed sources only') expect(graphMaintenanceWorkspaceVue).not.toContain('Run maintenance jobs now') From d861ee49147f1e358db9b640d52f41bab84f7d8a Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 21 Jun 2026 15:25:55 -0400 Subject: [PATCH 152/153] feat(maintenance): clarify baseline timing and guard destructive regenerate Add a baseline notice in New Files to Process, require confirmation before regenerating pending jobs, and skip scheduled runs while failed maintenance jobs remain until operators reset them manually. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../maintenance_pipeline_service.py | 34 ++++++- .../test_maintenance_pipeline_service.py | 68 +++++++++++++ .../GraphMaintenanceWorkspace.vue | 98 ++++++++++++++++++- .../knowledge-graph-manage-workspace.test.ts | 7 +- 4 files changed, 199 insertions(+), 8 deletions(-) diff --git a/src/api/infrastructure/management/maintenance_pipeline_service.py b/src/api/infrastructure/management/maintenance_pipeline_service.py index 385c0fed5..697699f0f 100644 --- a/src/api/infrastructure/management/maintenance_pipeline_service.py +++ b/src/api/infrastructure/management/maintenance_pipeline_service.py @@ -401,11 +401,37 @@ async def check_scheduled_triggers(self, *, now: datetime | None = None) -> int: if schedule.next_run_at is None or schedule.next_run_at > current: continue service = self._with_session(session) - await service.trigger_scheduled( - kg_id=kg.id.value, - files_per_job=schedule.files_per_job, - worker_count=schedule.worker_count, + from extraction.infrastructure.repositories.extraction_job_repository import ( + ExtractionJobRepository, ) + + job_repo = ExtractionJobRepository(session) + maintenance_counts = ( + await job_repo.count_by_job_set(knowledge_graph_id=kg.id.value) + ).get(MAINTENANCE_JOB_SET_NAME, {}) + failed_jobs = int(maintenance_counts.get("failed", 0)) + if failed_jobs > 0: + service._record_run( + kg=kg, + run=KnowledgeGraphMaintenanceRunRecord( + run_id=str(ULID()), + triggered_at=current, + outcome=KnowledgeGraphMaintenanceRunOutcome.PREFLIGHT_FAILED, + message=( + f"Scheduled maintenance skipped: {failed_jobs} failed " + "maintenance job(s) must be reset or addressed manually " + "before another scheduled run can proceed" + ), + files_per_job=schedule.files_per_job, + worker_count=schedule.worker_count, + ), + ) + else: + await service.trigger_scheduled( + kg_id=kg.id.value, + files_per_job=schedule.files_per_job, + worker_count=schedule.worker_count, + ) kg.set_maintenance_schedule( KnowledgeGraphMaintenanceSchedule( enabled=schedule.enabled, diff --git a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py index d0426cc03..8b4aa9678 100644 --- a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py +++ b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py @@ -517,11 +517,18 @@ async def test_check_scheduled_triggers_due_knowledge_graph( fake_repo.find_all = AsyncMock(return_value=[kg]) fake_repo.save = AsyncMock() + job_repo = MagicMock() + job_repo.count_by_job_set = AsyncMock(return_value={"maintenance": {"failed": 0}}) + with ( patch( "management.infrastructure.repositories.knowledge_graph_repository.KnowledgeGraphRepository", return_value=fake_repo, ), + patch( + "extraction.infrastructure.repositories.extraction_job_repository.ExtractionJobRepository", + return_value=job_repo, + ), patch.object( MaintenancePipelineService, "trigger_scheduled", @@ -541,6 +548,67 @@ async def test_check_scheduled_triggers_due_knowledge_graph( fake_repo.save.assert_awaited_once() +@pytest.mark.asyncio +async def test_check_scheduled_triggers_skips_when_failed_maintenance_jobs_remain( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + now = datetime.now(UTC) + kg = _make_kg() + kg.set_maintenance_schedule( + KnowledgeGraphMaintenanceSchedule( + enabled=True, + cron_expression="0 2 * * *", + timezone_name="UTC", + next_run_at=now - timedelta(minutes=1), + files_per_job=2, + worker_count=8, + ) + ) + kg_repo.seed(kg) + + svc = _service( + mock_session=mock_session, + session_factory=session_factory, + kg_repo=kg_repo, + ds_repo=ds_repo, + sync_run_repo=sync_run_repo, + authz=authz, + ) + + fake_repo = MagicMock() + fake_repo.find_all = AsyncMock(return_value=[kg]) + fake_repo.save = AsyncMock() + + job_repo = MagicMock() + job_repo.count_by_job_set = AsyncMock( + return_value={"maintenance": {"failed": 2, "pending": 0, "in_progress": 0}} + ) + + with ( + patch( + "management.infrastructure.repositories.knowledge_graph_repository.KnowledgeGraphRepository", + return_value=fake_repo, + ), + patch( + "extraction.infrastructure.repositories.extraction_job_repository.ExtractionJobRepository", + return_value=job_repo, + ), + patch.object( + MaintenancePipelineService, + "trigger_scheduled", + AsyncMock(), + ) as trigger_scheduled, + ): + triggered = await svc.check_scheduled_triggers(now=now) + + assert triggered == 1 + trigger_scheduled.assert_not_awaited() + fake_repo.save.assert_awaited_once() + assert len(kg.maintenance_run_history) == 1 + assert kg.maintenance_run_history[-1].outcome == KnowledgeGraphMaintenanceRunOutcome.PREFLIGHT_FAILED + assert "failed maintenance job" in (kg.maintenance_run_history[-1].message or "").lower() + + @pytest.mark.asyncio async def test_start_ready_maintenance_jobs_requires_queued_jobs( mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz diff --git a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue index dc32d9010..9d8c93f4f 100644 --- a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue @@ -4,6 +4,7 @@ import { toast } from 'vue-sonner' import { Archive, Calendar, + CircleAlert, ClipboardList, Eye, GitBranch, @@ -19,6 +20,17 @@ import { Button } from '@/components/ui/button' import { Badge } from '@/components/ui/badge' import { Input } from '@/components/ui/input' import { Separator } from '@/components/ui/separator' +import { Alert, AlertDescription } from '@/components/ui/alert' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' import { isMaintenanceReady } from '@/utils/kgManageWorkspace' import { commitStatusClass, @@ -159,6 +171,7 @@ const resettingFailed = ref(false) const resettingAll = ref(false) const archivingCompleted = ref(false) const regeneratingJobs = ref(false) +const regenerateConfirmOpen = ref(false) const optimisticLiveUntilMs = ref<number | null>(null) const nowMs = ref(Date.now()) const lastStatusRefreshMs = ref<number | null>(null) @@ -255,6 +268,29 @@ const remainingJobsCount = computed(() => { const activeQueueJobsTotal = computed( () => pendingJobsCount.value + inProgressJobsCount.value + failedJobsCount.value + completedJobsCount.value, ) +const showExtractionBaselineNotice = computed( + () => maintenanceReadySources.value.length > 0 || activeQueueJobsTotal.value > 0, +) +const extractionBaselineNoticeDetail = computed(() => { + const queued = pendingJobsCount.value + inProgressJobsCount.value + failedJobsCount.value + if (queued > 0) { + return ( + `${queued} job${queued === 1 ? '' : 's'} still need to finish ` + + '(ready, running, or failed). Resolve failures or reset them, then run ready jobs. ' + + 'Job Status tracks the queue; Regenerate jobs refreshes pending work from the table below.' + ) + } + if (completedJobsCount.value > 0 && maintenanceReadySources.value.length > 0) { + return ( + `${completedJobsCount.value} job${completedJobsCount.value === 1 ? '' : 's'} completed but the baseline ` + + 'has not advanced yet — wait for reconciliation after the queue drains.' + ) + } + return ( + 'After you queue maintenance, that column stays fixed until every job in the run succeeds. ' + + 'Check for new commits only updates branch HEAD; it does not move the extraction baseline.' + ) +}) const extractionRunLive = computed(() => { if (optimisticLiveUntilMs.value && nowMs.value < optimisticLiveUntilMs.value) return true return Boolean(extractionRunState.value?.live) @@ -825,6 +861,19 @@ onUnmounted(() => { </div> </CardHeader> <CardContent> + <Alert + v-if="showExtractionBaselineNotice" + variant="warning" + class="mb-4" + > + <CircleAlert class="size-4" /> + <AlertDescription> + <span class="font-medium">Commit during last extraction</span> + does not update until every maintenance job in the current run completes successfully + with no failed jobs remaining. + {{ extractionBaselineNoticeDetail }} + </AlertDescription> + </Alert> <div v-if="dataSources.length === 0" class="rounded-md border border-dashed px-4 py-8 text-center text-sm text-muted-foreground" @@ -1119,7 +1168,9 @@ onUnmounted(() => { Schedule recurring maintenance jobs </CardTitle> <CardDescription> - Daily schedule to sync changed sources and run maintenance extraction automatically. + Each scheduled run checks for new commits, rebuilds the pending maintenance job set, + and starts workers on ready jobs. Failed jobs in Job Status block scheduled runs + until you reset or fix them manually. </CardDescription> </CardHeader> <CardContent class="space-y-4"> @@ -1152,6 +1203,10 @@ onUnmounted(() => { </div> <div class="rounded-lg border bg-muted/20 p-3 text-xs text-muted-foreground"> + <p v-if="failedJobsCount > 0" class="mb-2 font-medium text-destructive"> + Scheduled maintenance is blocked while {{ failedJobsCount }} failed job(s) remain. + Reset failed jobs in Job Status before the next run can proceed. + </p> <p v-if="schedule?.next_run_at"> Next scheduled run: {{ formatWhen(schedule.next_run_at) }} </p> @@ -1236,9 +1291,9 @@ onUnmounted(() => { </Button> <Button size="sm" - variant="outline" + variant="destructive" :disabled="regeneratingJobs" - @click="regenerateMaintenanceJobs" + @click="regenerateConfirmOpen = true" > <Settings class="mr-1.5 size-3.5" /> Regenerate jobs @@ -1268,6 +1323,43 @@ onUnmounted(() => { :kg-id="kgId" :job-id="watchJobId" /> + + <AlertDialog v-model:open="regenerateConfirmOpen"> + <AlertDialogContent> + <AlertDialogHeader> + <AlertDialogTitle>Regenerate maintenance jobs?</AlertDialogTitle> + <AlertDialogDescription class="space-y-2 text-left"> + <p> + This replaces the entire <span class="font-medium text-foreground">Ready</span> + queue with a freshly materialized maintenance job set from the current diffs. + </p> + <p> + All pending jobs are wiped. Running jobs must finish or be reset first — regenerate + is blocked while workers are active. + </p> + <p> + If some jobs already completed, regenerating can create + <span class="font-medium text-destructive">duplicate work</span> + on the same files until you archive completed jobs and the extraction baseline advances. + </p> + <p class="font-medium text-destructive"> + Only continue when you intend to discard the current pending queue and start over. + </p> + </AlertDialogDescription> + </AlertDialogHeader> + <AlertDialogFooter> + <AlertDialogCancel>Cancel</AlertDialogCancel> + <AlertDialogAction + class="bg-destructive text-white hover:bg-destructive/90" + :disabled="regeneratingJobs" + @click="regenerateMaintenanceJobs" + > + <Loader2 v-if="regeneratingJobs" class="mr-2 size-4 animate-spin" /> + Regenerate jobs + </AlertDialogAction> + </AlertDialogFooter> + </AlertDialogContent> + </AlertDialog> </template> </div> </template> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index 0b395640b..dc0669cb0 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -131,7 +131,8 @@ describe('KG-MANAGE-012b - maintain workspace commit and job controls', () => { it('labels new files section and exposes commit refresh actions', () => { expect(graphMaintenanceWorkspaceVue).toContain('New Files to Process') expect(graphMaintenanceWorkspaceVue).toContain('Check for new commits') - expect(graphMaintenanceWorkspaceVue).toContain('last_extraction_baseline_commit') + expect(graphMaintenanceWorkspaceVue).toContain('showExtractionBaselineNotice') + expect(graphMaintenanceWorkspaceVue).toContain('Commit during last extraction') expect(graphMaintenanceWorkspaceVue).toContain('diff-summary') }) @@ -154,6 +155,10 @@ describe('KG-MANAGE-012b - maintain workspace commit and job controls', () => { expect(graphMaintenanceWorkspaceVue).toContain('Archive Completed') expect(graphMaintenanceWorkspaceVue).toContain('Regenerate jobs') expect(graphMaintenanceWorkspaceVue).toContain('regenerateMaintenanceJobs') + expect(graphMaintenanceWorkspaceVue).toContain('regenerateConfirmOpen') + expect(graphMaintenanceWorkspaceVue).toContain('variant="destructive"') + expect(graphMaintenanceWorkspaceVue).toContain('Regenerate maintenance jobs?') + expect(graphMaintenanceWorkspaceVue).toContain('Scheduled maintenance is blocked') expect(graphMaintenanceWorkspaceVue).not.toContain('Maintenance run history') expect(graphMaintenanceWorkspaceVue).not.toContain('Sync changed sources only') expect(graphMaintenanceWorkspaceVue).not.toContain('Run maintenance jobs now') From d5c2fdd3123c7b4b368d5c7784802fb87f87e7fa Mon Sep 17 00:00:00 2001 From: aredenba-rh <aredenba@redhat.com> Date: Sun, 21 Jun 2026 15:51:40 -0400 Subject: [PATCH 153/153] refactor(maintenance): simplify Maintain commit view and lazy ref refresh Show only baseline-vs-HEAD on the Maintain table, refresh branch tips on manual check and scheduled runs, and leave ingest prepare to queue/regenerate workflows. Co-authored-by: Cursor <cursoragent@cursor.com> --- .../maintenance_pipeline_dependencies.py | 15 +++++ .../maintenance_pipeline_service.py | 30 ++++++++++ .../test_maintenance_pipeline_service.py | 48 ++++++++++++++++ .../GraphMaintenanceWorkspace.vue | 57 ++++++------------- .../knowledge-graph-manage-workspace.test.ts | 6 +- 5 files changed, 113 insertions(+), 43 deletions(-) diff --git a/src/api/infrastructure/management/maintenance_pipeline_dependencies.py b/src/api/infrastructure/management/maintenance_pipeline_dependencies.py index d30bab955..2b3ed3d4b 100644 --- a/src/api/infrastructure/management/maintenance_pipeline_dependencies.py +++ b/src/api/infrastructure/management/maintenance_pipeline_dependencies.py @@ -19,6 +19,7 @@ ) from infrastructure.outbox.repository import OutboxRepository from infrastructure.settings import get_management_settings, get_spicedb_settings +from management.infrastructure.git_commit_reference_service import GitCommitReferenceService from management.infrastructure.git_diff_summary_service import GitDiffSummaryService from management.infrastructure.repositories import ( DataSourceRepository, @@ -42,6 +43,18 @@ def factory(tenant_id: str) -> GitDiffSummaryService: return factory +def _commit_reference_service_factory( + secret_store: FernetSecretStore, +) -> Callable[[str], GitCommitReferenceService]: + def factory(tenant_id: str) -> GitCommitReferenceService: + return GitCommitReferenceService( + credential_reader=secret_store, + tenant_id=tenant_id, + ) + + return factory + + def build_maintenance_pipeline_for_background( *, session_factory: Any, @@ -71,6 +84,7 @@ def build_maintenance_pipeline_for_background( authorization=authz, tenant_id="", diff_summary_service_factory=_diff_summary_service_factory(secret_store), + commit_reference_service_factory=_commit_reference_service_factory(secret_store), ) @@ -96,4 +110,5 @@ def get_maintenance_pipeline_service( authorization=authz, tenant_id=current_user.tenant_id.value, diff_summary_service_factory=_diff_summary_service_factory(secret_store), + commit_reference_service_factory=_commit_reference_service_factory(secret_store), ) diff --git a/src/api/infrastructure/management/maintenance_pipeline_service.py b/src/api/infrastructure/management/maintenance_pipeline_service.py index 697699f0f..3b4ebacc5 100644 --- a/src/api/infrastructure/management/maintenance_pipeline_service.py +++ b/src/api/infrastructure/management/maintenance_pipeline_service.py @@ -36,6 +36,7 @@ KnowledgeGraphMaintenanceRunRecord, KnowledgeGraphMaintenanceSchedule, ) +from management.infrastructure.git_commit_reference_service import GitCommitReferenceService from management.infrastructure.git_diff_summary_service import GitDiffSummaryService from management.ports.exceptions import UnauthorizedError from shared_kernel.authorization.protocols import AuthorizationProvider @@ -74,6 +75,9 @@ def __init__( authorization: AuthorizationProvider, tenant_id: str, diff_summary_service_factory: Callable[[str], GitDiffSummaryService], + commit_reference_service_factory: ( + Callable[[str], GitCommitReferenceService] | None + ) = None, ) -> None: self._session = session self._session_factory = session_factory @@ -84,6 +88,7 @@ def __init__( self._authz = authorization self._tenant_id = tenant_id self._diff_summary_service_factory = diff_summary_service_factory + self._commit_reference_service_factory = commit_reference_service_factory async def trigger_scheduled( self, @@ -96,6 +101,7 @@ async def trigger_scheduled( kg = await self._kg_repo.get_by_id(KnowledgeGraphId(value=kg_id)) if kg is None: raise ValueError(f"Knowledge graph {kg_id} not found") + await self._refresh_tracked_branch_heads(kg_id=kg_id, tenant_id=kg.tenant_id) return await self._trigger_for_kg( kg=kg, requested_by="maintenance-scheduler", @@ -628,8 +634,32 @@ def _with_session(self, session: AsyncSession) -> MaintenancePipelineService: authorization=self._authz, tenant_id=self._tenant_id, diff_summary_service_factory=self._diff_summary_service_factory, + commit_reference_service_factory=self._commit_reference_service_factory, ) + async def _refresh_tracked_branch_heads( + self, + *, + kg_id: str, + tenant_id: str, + ) -> int: + """Refresh stored branch tips from GitHub before a scheduled maintenance run.""" + if self._commit_reference_service_factory is None: + return 0 + ref_service = self._commit_reference_service_factory(tenant_id) + data_sources = await self._ds_repo.find_by_knowledge_graph(kg_id) + updated = 0 + for data_source in data_sources: + tracked_head = await ref_service.resolve_tracked_head_commit(data_source) + if tracked_head is None: + continue + if data_source.tracked_branch_head_commit == tracked_head: + continue + data_source.tracked_branch_head_commit = tracked_head + await self._ds_repo.save(data_source) + updated += 1 + return updated + async def _require_manage_kg(self, *, user_id: str, kg_id: str) -> KnowledgeGraph: resource = format_resource(ResourceType.KNOWLEDGE_GRAPH, kg_id) subject = format_subject(ResourceType.USER, user_id) diff --git a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py index 8b4aa9678..6635885a3 100644 --- a/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py +++ b/src/api/tests/unit/management/application/test_maintenance_pipeline_service.py @@ -609,6 +609,54 @@ async def test_check_scheduled_triggers_skips_when_failed_maintenance_jobs_remai assert "failed maintenance job" in (kg.maintenance_run_history[-1].message or "").lower() +@pytest.mark.asyncio +async def test_trigger_scheduled_refreshes_tracked_branch_heads( + mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz +): + kg = _make_kg() + kg_repo.seed(kg) + ds = _make_ds(ds_id="ds-1", kg_id=kg.id.value, tenant_id=kg.tenant_id, baseline="abc", head="old-head") + ds_repo.seed(ds) + + ref_service = MagicMock() + ref_service.resolve_tracked_head_commit = AsyncMock(return_value="new-head") + svc = MaintenancePipelineService( + session=mock_session, + session_factory=session_factory, + knowledge_graph_repository=kg_repo, + data_source_repository=ds_repo, + sync_run_repository=sync_run_repo, + extraction_job_repository=MagicMock(), + authorization=authz, + tenant_id=kg.tenant_id, + diff_summary_service_factory=lambda _tenant: MagicMock(), + commit_reference_service_factory=lambda _tenant: ref_service, + ) + + with patch.object( + svc, + "_trigger_for_kg", + AsyncMock( + return_value=KnowledgeGraphMaintenanceRunRecord( + run_id="run-scheduled", + triggered_at=datetime.now(UTC), + outcome=KnowledgeGraphMaintenanceRunOutcome.NO_CHANGES, + ) + ), + ) as trigger_for_kg: + await svc.trigger_scheduled( + kg_id=kg.id.value, + files_per_job=2, + worker_count=4, + ) + + ref_service.resolve_tracked_head_commit.assert_awaited_once() + trigger_for_kg.assert_awaited_once() + saved = await ds_repo.get_by_id(DataSourceId(value="ds-1")) + assert saved is not None + assert saved.tracked_branch_head_commit == "new-head" + + @pytest.mark.asyncio async def test_start_ready_maintenance_jobs_requires_queued_jobs( mock_session, session_factory, kg_repo, ds_repo, sync_run_repo, authz diff --git a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue index 9d8c93f4f..efe86c298 100644 --- a/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue +++ b/src/dev-ui/app/components/graph-management/GraphMaintenanceWorkspace.vue @@ -34,14 +34,9 @@ import { import { isMaintenanceReady } from '@/utils/kgManageWorkspace' import { commitStatusClass, - formatFilesOnDisk, - hasUnpulledCommits, - needsIngestionPrepare, - resolveIngestedHeadCommit, resolveRepoUrl, resolveTrackedBranch, shortCommitHash, - unpulledCommitStatusLabel, } from '@/utils/kgDataSourcesCommits' import { cronToDailyTime, @@ -205,10 +200,6 @@ const maintenanceReadySources = computed(() => dataSources.value.filter((ds) => isMaintenanceReady(ds)), ) -const sourcesNeedingPrepare = computed(() => - dataSources.value.filter((ds) => needsIngestionPrepare(ds)), -) - const totalChangedFiles = computed(() => dataSources.value.reduce((sum, ds) => sum + (ds.diff_summary?.total_changed_files || 0), 0), ) @@ -288,7 +279,7 @@ const extractionBaselineNoticeDetail = computed(() => { } return ( 'After you queue maintenance, that column stays fixed until every job in the run succeeds. ' - + 'Check for new commits only updates branch HEAD; it does not move the extraction baseline.' + + 'Use Check for new commits to refresh branch tips from GitHub before queueing or regenerating.' ) }) const extractionRunLive = computed(() => { @@ -641,13 +632,15 @@ async function checkForNewCommits() { ), ) await loadDataSources() - const unpulled = dataSources.value.filter((ds) => hasUnpulledCommits(ds)) - if (unpulled.length === 0) { - toast.success('Up to date with remote branches') + const ready = maintenanceReadySources.value.length + if (ready === 0) { + toast.success('Branch tips refreshed', { + description: 'No sources have commits ahead of the last extraction baseline.', + }) } else { toast.success( - `${unpulled.length} source${unpulled.length === 1 ? '' : 's'} have unpulled commits`, - { description: 'Compare baseline vs branch head in the table below.' }, + `${ready} source${ready === 1 ? '' : 's'} have new commits vs baseline`, + { description: 'Queue or regenerate maintenance jobs when ready to process them.' }, ) } } catch (e: unknown) { @@ -843,7 +836,8 @@ onUnmounted(() => { New Files to Process </CardTitle> <CardDescription class="mt-1"> - Compare the last job baseline to the remote branch tip and review changed files since extraction. + Compare commit during last extraction to branch HEAD. Check for new commits refreshes + tips from GitHub; queue and regenerate use these stored refs and prepare sources as needed. </CardDescription> </div> <div class="flex flex-wrap gap-2"> @@ -881,13 +875,12 @@ onUnmounted(() => { Connect a data source before scheduling maintenance. </div> <div v-else class="overflow-x-auto rounded-md border"> - <table class="w-full min-w-[1100px] text-sm"> + <table class="w-full min-w-[900px] text-sm"> <thead> <tr class="border-b bg-muted/50 text-left"> <th class="px-3 py-2 font-medium">Source</th> <th class="px-3 py-2 font-medium">Branch</th> <th class="px-3 py-2 font-medium">Branch HEAD</th> - <th class="px-3 py-2 text-right font-medium">Files on disk</th> <th class="px-3 py-2 font-medium">Commit during last extraction</th> <th class="px-3 py-2 text-right font-medium">Changed files</th> <th class="px-3 py-2 font-medium">Status</th> @@ -898,7 +891,7 @@ onUnmounted(() => { v-for="ds in dataSources" :key="ds.id" class="border-b border-border/60 align-top last:border-0" - :class="isMaintenanceReady(ds) || hasUnpulledCommits(ds) ? 'bg-amber-50/40 dark:bg-amber-950/10' : ''" + :class="isMaintenanceReady(ds) ? 'bg-amber-50/40 dark:bg-amber-950/10' : ''" > <td class="px-3 py-2"> <p class="font-medium">{{ ds.name }}</p> @@ -909,22 +902,11 @@ onUnmounted(() => { <td class="px-3 py-2 font-mono text-xs">{{ resolveTrackedBranch(ds.connection_config) }}</td> <td class="px-3 py-2 font-mono text-xs"> <span - :class="commitStatusClass(ds.tracked_branch_head_commit, resolveIngestedHeadCommit(ds))" + :class="commitStatusClass(ds.tracked_branch_head_commit, ds.last_extraction_baseline_commit)" :title="ds.tracked_branch_head_commit || ''" > {{ shortCommitHash(ds.tracked_branch_head_commit) }} </span> - <p class="mt-0.5 text-[10px] text-muted-foreground"> - {{ - unpulledCommitStatusLabel( - ds.newest_unpulled_commit, - ds.tracked_branch_head_commit, - ) - }} - </p> - </td> - <td class="px-3 py-2 text-right tabular-nums text-muted-foreground"> - {{ formatFilesOnDisk(ds) }} </td> <td class="px-3 py-2 font-mono text-xs"> <span @@ -939,16 +921,10 @@ onUnmounted(() => { </td> <td class="px-3 py-2"> <Badge - :variant="isMaintenanceReady(ds) ? 'default' : hasUnpulledCommits(ds) ? 'outline' : 'secondary'" + :variant="isMaintenanceReady(ds) ? 'default' : 'secondary'" class="text-xs" > - {{ - isMaintenanceReady(ds) - ? 'New files vs baseline' - : hasUnpulledCommits(ds) - ? 'Unpulled commits' - : 'Up to date' - }} + {{ isMaintenanceReady(ds) ? 'New commits vs baseline' : 'Up to date' }} </Badge> </td> </tr> @@ -957,8 +933,7 @@ onUnmounted(() => { </div> <p class="mt-3 text-xs text-muted-foreground"> {{ maintenanceReadySources.length }} source(s) have commits ahead of the last job baseline · - {{ totalChangedFiles }} changed file(s) detected · - {{ sourcesNeedingPrepare.length }} will ingest on queue + {{ totalChangedFiles }} changed file(s) detected </p> </CardContent> </Card> diff --git a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts index dc0669cb0..1d8e12594 100644 --- a/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts +++ b/src/dev-ui/app/tests/knowledge-graph-manage-workspace.test.ts @@ -131,8 +131,10 @@ describe('KG-MANAGE-012b - maintain workspace commit and job controls', () => { it('labels new files section and exposes commit refresh actions', () => { expect(graphMaintenanceWorkspaceVue).toContain('New Files to Process') expect(graphMaintenanceWorkspaceVue).toContain('Check for new commits') - expect(graphMaintenanceWorkspaceVue).toContain('showExtractionBaselineNotice') - expect(graphMaintenanceWorkspaceVue).toContain('Commit during last extraction') + expect(graphMaintenanceWorkspaceVue).toContain('New commits vs baseline') + expect(graphMaintenanceWorkspaceVue).not.toContain('not ingested yet') + expect(graphMaintenanceWorkspaceVue).not.toContain('Unpulled commits') + expect(graphMaintenanceWorkspaceVue).not.toContain('will ingest on queue') expect(graphMaintenanceWorkspaceVue).toContain('diff-summary') })