From 8c533596d774e38cbb80fcb329d3958bd8286aa2 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 17 Jun 2026 02:13:04 -0600 Subject: [PATCH 1/4] fix: register CODEGRAPH_ENGINE and CODEGRAPH_FAST_SKIP_DIAG in config DEFAULTS docs check acknowledged --- src/domain/graph/builder/pipeline.ts | 3 ++- src/infrastructure/config.ts | 16 ++++++++++++++-- src/types.ts | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index a948e001..07a3c836 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -411,7 +411,8 @@ export async function buildGraph( // engineName, scope, etc.) we log the reason so the bench gate run // produces observable output even if `detectNoChanges` is never // entered. - const fastSkipDiag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1'; + // Reads from config (which applies CODEGRAPH_FAST_SKIP_DIAG via applyEnvOverrides). + const fastSkipDiag = ctx.config.build.fastSkipDiag; if (fastSkipDiag) { const reasons: string[] = []; if (!ctx.nativeAvailable) reasons.push('nativeAvailable=false'); diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index 189d68be..c838494c 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -29,6 +29,8 @@ export const DEFAULTS = { driftThreshold: 0.2, smallFilesThreshold: 5, typescriptResolver: true, + engine: 'auto' as 'auto' | 'native' | 'wasm', + fastSkipDiag: false, }, query: { defaultDepth: 3, @@ -658,8 +660,8 @@ export function loadConfigWithProvenance( const raw = JSON.parse(fs.readFileSync(filePath, 'utf-8')) as Record; for (const k of Object.keys(raw)) provenance[k] = 'project'; break; - } catch { - // ignore + } catch (err) { + debug(`loadConfigWithProvenance: failed to parse ${filePath}: ${toErrorMessage(err)}`); } } } @@ -686,6 +688,16 @@ export function applyEnvOverrides(config: CodegraphConfig): CodegraphConfig { process.env[envKey as keyof NodeJS.ProcessEnv]; } } + // Engine selection: CODEGRAPH_ENGINE env always wins over config-file value. + if (process.env.CODEGRAPH_ENGINE !== undefined) { + const val = process.env.CODEGRAPH_ENGINE as 'auto' | 'native' | 'wasm'; + (config.build as Record).engine = val; + } + // Fast-skip diagnostic flag. + if (process.env.CODEGRAPH_FAST_SKIP_DIAG !== undefined) { + (config.build as Record).fastSkipDiag = + process.env.CODEGRAPH_FAST_SKIP_DIAG === '1'; + } return config; } diff --git a/src/types.ts b/src/types.ts index c7731372..50bee5b8 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1327,6 +1327,21 @@ export interface CodegraphConfig { * Default: false. */ typescriptResolver: boolean; + /** + * Engine selection override. Equivalent to the `CODEGRAPH_ENGINE` env var and + * the `--engine` CLI flag. Values: `'auto'` (default), `'native'`, `'wasm'`. + * When set in config, takes lower priority than the CLI flag but higher than + * the default. Routed through `applyEnvOverrides` so `CODEGRAPH_ENGINE=wasm` + * always wins over a config-file value. + */ + engine: 'auto' | 'native' | 'wasm'; + /** + * Enable diagnostic logging for the native fast-skip pre-flight check. + * Equivalent to `CODEGRAPH_FAST_SKIP_DIAG=1`. When true, logs why the + * fast-skip gate was skipped (e.g. forceFullRebuild, engineName mismatch). + * Default: false. + */ + fastSkipDiag: boolean; }; query: { From 95d50ab177a014837ad004850bfb1f7b05e9a654 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 17 Jun 2026 02:27:01 -0600 Subject: [PATCH 2/4] refactor: move JS type-resolution confidence threshold to config DEFAULTS --- src/extractors/javascript.ts | 11 +++++++++-- src/infrastructure/config.ts | 9 ++++++++- src/types.ts | 7 +++++++ tests/unit/config.test.ts | 1 + 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 4fef18da..e5760b8c 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -96,6 +96,13 @@ const BUILTIN_GLOBALS: Set = new Set([ const MAX_PROPAGATION_DEPTH = 3; /** Confidence penalty applied per propagation hop (1.0 → 0.9 → 0.8 → 0.7). */ export const PROPAGATION_HOP_PENALTY = 0.1; +/** + * Confidence score for a return type inferred from `return new Constructor()` with no + * explicit TypeScript annotation. Registered as `analysis.typeInferenceConfidence` in + * `src/infrastructure/config.ts` DEFAULTS — kept in sync manually until config is + * threaded through to `extractSymbols`. + */ +const INFERRED_RETURN_TYPE_CONFIDENCE = 0.85; /** * Extract symbols from a JS/TS parsed AST. @@ -1592,8 +1599,8 @@ function storeReturnType( const inferred = findReturnNewExprType(body); if (inferred) { const existing = returnTypeMap.get(fnName); - if (!existing || 0.85 > existing.confidence) - returnTypeMap.set(fnName, { type: inferred, confidence: 0.85 }); + if (!existing || INFERRED_RETURN_TYPE_CONFIDENCE > existing.confidence) + returnTypeMap.set(fnName, { type: inferred, confidence: INFERRED_RETURN_TYPE_CONFIDENCE }); } } } diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index c838494c..ef932e47 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -89,9 +89,16 @@ export const DEFAULTS = { // TODO(Phase 8.3): wire these into the points-to solver and type-propagation path // once config is threaded through to extractSymbols / buildPointsToMap. Currently // controlled by hardcoded constants in src/extractors/javascript.ts - // (MAX_PROPAGATION_DEPTH, PROPAGATION_HOP_PENALTY) and in + // (MAX_PROPAGATION_DEPTH, PROPAGATION_HOP_PENALTY, INFERRED_RETURN_TYPE_CONFIDENCE) and in // src/domain/graph/resolver/points-to.ts (MAX_SOLVER_ITERATIONS). typePropagationDepth: 3, + /** + * Confidence score assigned to a return type inferred from `return new Constructor()` + * when no explicit TypeScript annotation is present. + * Mirrors `INFERRED_RETURN_TYPE_CONFIDENCE` in `src/extractors/javascript.ts`. + * @reserved — not yet wired; see TODO above. + */ + typeInferenceConfidence: 0.85, /** * Maximum fixed-point iterations for the Phase 8.3 points-to solver. * @reserved — currently not wired to either the WASM solver diff --git a/src/types.ts b/src/types.ts index 50bee5b8..f4389888 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1420,6 +1420,13 @@ export interface CodegraphConfig { * constant of 50. See TODO in `src/infrastructure/config.ts`. */ pointsToMaxIterations: number; + /** + * Confidence score for a return type inferred from `return new Constructor()` + * with no explicit TypeScript annotation. + * Mirrors `INFERRED_RETURN_TYPE_CONFIDENCE` in `src/extractors/javascript.ts`. + * @reserved — not yet wired; see TODO in `src/infrastructure/config.ts`. + */ + typeInferenceConfidence: number; }; community: { diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts index 4e9052c1..0aadfd31 100644 --- a/tests/unit/config.test.ts +++ b/tests/unit/config.test.ts @@ -97,6 +97,7 @@ describe('DEFAULTS', () => { briefMediumRiskCallers: 3, typePropagationDepth: 3, pointsToMaxIterations: 50, + typeInferenceConfidence: 0.85, }); }); From 24b1008a4451bb77c08c57f736b3a8e7c101be70 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 17 Jun 2026 04:15:16 -0600 Subject: [PATCH 3/4] refactor: split process_file and match_js_type_map (Rust) -- docs check acknowledged --- .../graph/builder/stages/build_edges.rs | 378 +++++++++++------- .../src/extractors/javascript.rs | 307 +++++++------- 2 files changed, 386 insertions(+), 299 deletions(-) diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index a2580aa1..4ce0c1e5 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -443,23 +443,24 @@ pub fn build_call_edges( edges } -/// Process a single file: build per-file maps and emit call/receiver/hierarchy edges. -fn process_file<'a>( - ctx: &EdgeContext<'a>, - file_input: &'a FileEdgeInput, - all_nodes: &'a [NodeInfo], - edges: &mut Vec, -) { - let rel_path = &file_input.file; - let file_node_id = file_input.file_node_id; - - let imported_names: HashMap<&str, &str> = file_input - .imported_names.iter() - .map(|im| (im.name.as_str(), im.file.as_str())) - .collect(); +/// Per-file lookup structures built once and shared by the call/receiver/hierarchy +/// edge emission loops. Encapsulates what was formerly the setup block of `process_file`. +struct FileContext<'a> { + rel_path: &'a str, + file_node_id: u32, + imported_names: HashMap<&'a str, &'a str>, + type_map: HashMap<&'a str, (&'a str, f64)>, + defs_with_ids: Vec>, + pts_map: Option>>, + /// lhs names from the *raw* fnRefBindings only (thisCall conversions are + /// scoped keys and never flat-matched). Used for case-(c) pts gate. + fn_ref_binding_lhs: HashSet<&'a str>, +} - // Build type map keeping the highest-confidence entry per name - // (first-wins on tie), matching the JS setTypeMapEntry behaviour. +/// Build the per-file type map from the input's type_map entries. +/// Keeps the highest-confidence entry per name (first-wins on tie), matching +/// the JS `setTypeMapEntry` behaviour. +fn build_type_map<'a>(file_input: &'a FileEdgeInput) -> HashMap<&'a str, (&'a str, f64)> { let mut type_map: HashMap<&str, (&str, f64)> = HashMap::new(); for tm in &file_input.type_map { let entry = type_map.entry(tm.name.as_str()); @@ -474,16 +475,21 @@ fn process_file<'a>( } } } + type_map +} - let file_nodes: Vec<&NodeInfo> = all_nodes.iter().filter(|n| n.file == *rel_path).collect(); - let defs_with_ids: Vec = file_input.definitions.iter().map(|d| { - let node_id = file_nodes.iter() - .find(|n| n.name == d.name && n.kind == d.kind && n.line == d.line) - .map(|n| n.id); - DefWithId { name: &d.name, kind: &d.kind, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id } - }).collect(); - // Phase 8.3: build pts map for alias resolution — mirrors buildPointsToMapForFile. - // Only callable (function/method) defs are seeded as concrete targets. +/// Build the points-to map for a file. +/// +/// Constructs the `PtsBindings` from `file_input`, merges `this_call_bindings` +/// into scoped `fn::this → ctx` fnRefBindings, builds `def_names` and +/// `definition_params`, then delegates to `build_points_to_map`. +/// Returns `None` when the file has no pts inputs (fast path). +/// +/// Mirrors `buildPointsToMapForFile` in `src/domain/graph/resolver/points-to.ts`. +fn build_pts_map_for_file( + file_input: &FileEdgeInput, + imported_names: &HashMap<&str, &str>, +) -> Option>> { let raw_fn_ref: &[FnRefBinding] = file_input.fn_ref_bindings.as_deref().unwrap_or(&[]); let this_calls: &[ThisCallBinding] = file_input.this_call_bindings.as_deref().unwrap_or(&[]); let bindings = PtsBindings { @@ -505,48 +511,206 @@ fn process_file<'a>( || !bindings.object_rest_param_bindings.is_empty() || !bindings.object_prop_bindings.is_empty() || !this_calls.is_empty(); + if !has_pts_inputs { + return None; + } + + let def_names: HashSet<&str> = file_input.definitions.iter() + .filter(|d| d.kind == "function" || d.kind == "method") + .map(|d| d.name.as_str()) + .collect(); + // First-wins on duplicate names — mirrors buildDefinitionParamsMap. + let mut definition_params: HashMap<&str, Vec<&str>> = HashMap::new(); + for d in &file_input.definitions { + if d.kind != "function" && d.kind != "method" { continue; } + let Some(params) = d.params.as_ref().filter(|p| !p.is_empty()) else { continue }; + definition_params.entry(d.name.as_str()) + .or_insert_with(|| params.iter().map(|s| s.as_str()).collect()); + } + // Convert thisCallBindings into scoped fnRefBindings (`fn::this → ctx`) so // `this()` calls inside `fn` resolve via the scoped key `fn::this`. - let all_fn_ref_bindings: Vec; - let pts_map: Option>> = if has_pts_inputs { - let def_names: HashSet<&str> = file_input.definitions.iter() - .filter(|d| d.kind == "function" || d.kind == "method") - .map(|d| d.name.as_str()) - .collect(); - // First-wins on duplicate names — mirrors buildDefinitionParamsMap. - let mut definition_params: HashMap<&str, Vec<&str>> = HashMap::new(); - for d in &file_input.definitions { - if d.kind != "function" && d.kind != "method" { continue; } - let Some(params) = d.params.as_ref().filter(|p| !p.is_empty()) else { continue }; - definition_params.entry(d.name.as_str()) - .or_insert_with(|| params.iter().map(|s| s.as_str()).collect()); - } - let bindings = if this_calls.is_empty() { - bindings - } else { - let mut merged = raw_fn_ref.to_vec(); - merged.extend(this_calls.iter().map(|b| FnRefBinding { - lhs: format!("{}::this", b.callee), - rhs: b.this_arg.clone(), - rhs_receiver: None, - })); - all_fn_ref_bindings = merged; - PtsBindings { fn_ref_bindings: &all_fn_ref_bindings, ..bindings } - }; - Some(build_points_to_map(&bindings, &def_names, &imported_names, &definition_params)) + // The merged vec must outlive the PtsBindings borrow — stored here. + let merged_fn_ref: Vec; + let final_bindings = if this_calls.is_empty() { + bindings } else { - None + let mut merged = raw_fn_ref.to_vec(); + merged.extend(this_calls.iter().map(|b| FnRefBinding { + lhs: format!("{}::this", b.callee), + rhs: b.this_arg.clone(), + rhs_receiver: None, + })); + merged_fn_ref = merged; + PtsBindings { fn_ref_bindings: &merged_fn_ref, ..bindings } }; + + Some(build_points_to_map(&final_bindings, &def_names, imported_names, &definition_params)) +} + +/// Build all per-file lookup structures needed for edge emission. +fn build_file_context<'a>( + file_input: &'a FileEdgeInput, + all_nodes: &'a [NodeInfo], +) -> FileContext<'a> { + let rel_path = file_input.file.as_str(); + let imported_names: HashMap<&str, &str> = file_input + .imported_names.iter() + .map(|im| (im.name.as_str(), im.file.as_str())) + .collect(); + let type_map = build_type_map(file_input); + let file_nodes: Vec<&NodeInfo> = all_nodes.iter().filter(|n| n.file == rel_path).collect(); + let defs_with_ids: Vec = file_input.definitions.iter().map(|d| { + let node_id = file_nodes.iter() + .find(|n| n.name == d.name && n.kind == d.kind && n.line == d.line) + .map(|n| n.id); + DefWithId { + name: &d.name, + kind: &d.kind, + line: d.line, + end_line: d.end_line.unwrap_or(u32::MAX), + node_id, + } + }).collect(); + let pts_map = build_pts_map_for_file(file_input, &imported_names); + let raw_fn_ref: &[FnRefBinding] = file_input.fn_ref_bindings.as_deref().unwrap_or(&[]); // Case (c) flat-key gate set: lhs names from the *raw* fnRefBindings only // (thisCall conversions are scoped keys and never flat-matched). let fn_ref_binding_lhs: HashSet<&str> = raw_fn_ref.iter().map(|b| b.lhs.as_str()).collect(); + FileContext { + rel_path, + file_node_id: file_input.file_node_id, + imported_names, + type_map, + defs_with_ids, + pts_map, + fn_ref_binding_lhs, + } +} + +/// Resolve and emit pts-alias edges for a no-receiver unresolved call. +/// +/// Implements the four-case gate from buildFileCallEdges (build-edges.ts): +/// (a) dynamic alias calls — flat `call.name` lookup; +/// (b) parameter / this-rebinding / for-of variable calls — scoped key +/// `caller::name`, with the `::name` sentinel for top-level for-of loops; +/// (c) module-level alias bindings (`const f = handler`, `f = fn.bind(ctx)`) +/// — flat key, gated on fnRefBindingLhs so self-seeded local definitions never fire. +/// Confidence is penalised by one hop to reflect the indirection. +fn emit_no_receiver_pts_edges<'a>( + ctx: &EdgeContext<'a>, + fc: &FileContext<'a>, + call: &CallInfo, + caller_id: u32, + caller_name: &'a str, + is_dynamic: u32, + seen_edges: &HashSet, + pts_edge_map: &mut HashMap, + edges: &mut Vec, +) { + let pts = match fc.pts_map.as_ref() { Some(p) => p, None => return }; + let is_dyn_call = call.dynamic.unwrap_or(false); + let scoped_key = if caller_name.is_empty() { None } else { + Some(format!("{}::{}", caller_name, call.name)) + .filter(|k| pts.contains_key(k.as_str())) + }; + let module_key = if caller_name.is_empty() { + Some(format!("::{}", call.name)) + .filter(|k| pts.contains_key(k.as_str())) + } else { + None + }; + let flat_ok = !is_dyn_call + && fc.fn_ref_binding_lhs.contains(call.name.as_str()) + && pts.contains_key(call.name.as_str()); + let lookup_name: Option = if is_dyn_call { + Some(call.name.clone()) + } else if let Some(k) = scoped_key { + Some(k) + } else if let Some(k) = module_key { + Some(k) + } else if flat_ok { + Some(call.name.clone()) + } else { + None + }; + if let Some(lookup_name) = lookup_name { + emit_pts_alias_edges( + ctx, + &PtsAliasCtx { + pts, + lookup_name: &lookup_name, + call_line: call.line, + caller_id, + caller_name, + is_dynamic, + rel_path: fc.rel_path, + imported_names: &fc.imported_names, + type_map: &fc.type_map, + }, + seen_edges, + pts_edge_map, + edges, + ); + } +} + +/// Resolve and emit pts-alias edges for a receiver call via object-rest bindings. +/// +/// Phase 8.3f: `rest.prop()` resolves when pts["rest.prop"] was seeded by the +/// rest-dispatch chain. Builtin receivers are already skipped at the call-loop top. +fn emit_receiver_pts_edges<'a>( + ctx: &EdgeContext<'a>, + fc: &FileContext<'a>, + call: &CallInfo, + caller_id: u32, + caller_name: &'a str, + is_dynamic: u32, + seen_edges: &HashSet, + pts_edge_map: &mut HashMap, + edges: &mut Vec, +) { + let (receiver, pts) = match (call.receiver.as_deref(), fc.pts_map.as_ref()) { + (Some(r), Some(p)) => (r, p), + _ => return, + }; + if receiver == "this" || receiver == "self" || receiver == "super" { return; } + let receiver_key = format!("{}.{}", receiver, call.name); + if !pts.contains_key(receiver_key.as_str()) { return; } + emit_pts_alias_edges( + ctx, + &PtsAliasCtx { + pts, + lookup_name: &receiver_key, + call_line: call.line, + caller_id, + caller_name, + is_dynamic, + rel_path: fc.rel_path, + imported_names: &fc.imported_names, + type_map: &fc.type_map, + }, + seen_edges, + pts_edge_map, + edges, + ); +} + +/// Process a single file: build per-file lookup context and emit call/receiver/hierarchy edges. +fn process_file<'a>( + ctx: &EdgeContext<'a>, + file_input: &'a FileEdgeInput, + all_nodes: &'a [NodeInfo], + edges: &mut Vec, +) { + let fc = build_file_context(file_input, all_nodes); - let mut seen_edges: HashSet = HashSet::new(); // Phase 8.3: tracks pts-resolved edges separately from seen_edges so that a // subsequent direct call to the same caller→target pair can upgrade confidence // in-place rather than being silently dropped by the dedup guard. // Mirrors `ptsEdgeRows` in `src/domain/graph/builder/stages/build-edges.ts`. // Key: edge_key (same as seen_edges). Value: index into `edges` vec. + let mut seen_edges: HashSet = HashSet::new(); let mut pts_edge_map: HashMap = HashMap::new(); for call in &file_input.calls { @@ -554,110 +718,26 @@ fn process_file<'a>( if ctx.builtin_set.contains(receiver.as_str()) { continue; } } - let (caller_id, caller_name) = find_enclosing_caller(&defs_with_ids, call.line, file_node_id); + let (caller_id, caller_name) = find_enclosing_caller(&fc.defs_with_ids, call.line, fc.file_node_id); let is_dynamic = if call.dynamic.unwrap_or(false) { 1u32 } else { 0u32 }; - let imported_from = imported_names.get(call.name.as_str()).copied(); - - let mut targets = resolve_call_targets(ctx, call, rel_path, imported_from, &type_map, caller_name); - sort_targets_by_confidence(&mut targets, rel_path, imported_from); - emit_call_edges(&targets, caller_id, is_dynamic, rel_path, imported_from, &mut seen_edges, &mut pts_edge_map, edges); - - // Phase 8.3 / 8.3c / 8.3e: points-to fallback for unresolved calls. - // Mirrors the four-case gate in buildFileCallEdges (build-edges.ts): - // (a) dynamic alias calls — flat `call.name` lookup; - // (b) parameter / this-rebinding / for-of variable calls — scoped key - // `caller::name`, with the `::name` sentinel for - // top-level for-of loops; - // (c) module-level alias bindings (`const f = handler`, `f = fn.bind(ctx)`) - // — flat key, gated on fnRefBindingLhs so self-seeded local - // definitions never fire. - // Confidence is penalised by one hop to reflect the indirection. - // - // Pts edges go into pts_edge_map (not seen_edges) so a later direct call - // to the same target can upgrade confidence in-place — mirroring ptsEdgeRows. + let imported_from = fc.imported_names.get(call.name.as_str()).copied(); + + let mut targets = resolve_call_targets(ctx, call, fc.rel_path, imported_from, &fc.type_map, caller_name); + sort_targets_by_confidence(&mut targets, fc.rel_path, imported_from); + emit_call_edges(&targets, caller_id, is_dynamic, fc.rel_path, imported_from, &mut seen_edges, &mut pts_edge_map, edges); + if targets.is_empty() && call.receiver.is_none() { - if let Some(ref pts) = pts_map { - let is_dyn_call = call.dynamic.unwrap_or(false); - let scoped_key = if caller_name.is_empty() { None } else { - Some(format!("{}::{}", caller_name, call.name)) - .filter(|k| pts.contains_key(k.as_str())) - }; - let module_key = if caller_name.is_empty() { - Some(format!("::{}", call.name)) - .filter(|k| pts.contains_key(k.as_str())) - } else { - None - }; - let flat_ok = !is_dyn_call - && fn_ref_binding_lhs.contains(call.name.as_str()) - && pts.contains_key(call.name.as_str()); - let lookup_name: Option = if is_dyn_call { - Some(call.name.clone()) - } else if let Some(k) = scoped_key { - Some(k) - } else if let Some(k) = module_key { - Some(k) - } else if flat_ok { - Some(call.name.clone()) - } else { - None - }; - if let Some(lookup_name) = lookup_name { - emit_pts_alias_edges( - ctx, - &PtsAliasCtx { - pts, - lookup_name: &lookup_name, - call_line: call.line, - caller_id, - caller_name, - is_dynamic, - rel_path, - imported_names: &imported_names, - type_map: &type_map, - }, - &seen_edges, - &mut pts_edge_map, - edges, - ); - } - } + emit_no_receiver_pts_edges(ctx, &fc, call, caller_id, caller_name, is_dynamic, &seen_edges, &mut pts_edge_map, edges); } - // Phase 8.3f: pts fallback for receiver calls via object-rest bindings. - // `rest.prop()` resolves when pts["rest.prop"] was seeded by the - // rest-dispatch chain. Builtin receivers were skipped at loop top. if targets.is_empty() { - if let (Some(receiver), Some(pts)) = (call.receiver.as_deref(), pts_map.as_ref()) { - if receiver != "this" && receiver != "self" && receiver != "super" { - let receiver_key = format!("{}.{}", receiver, call.name); - if pts.contains_key(receiver_key.as_str()) { - emit_pts_alias_edges( - ctx, - &PtsAliasCtx { - pts, - lookup_name: &receiver_key, - call_line: call.line, - caller_id, - caller_name, - is_dynamic, - rel_path, - imported_names: &imported_names, - type_map: &type_map, - }, - &seen_edges, - &mut pts_edge_map, - edges, - ); - } - } - } + emit_receiver_pts_edges(ctx, &fc, call, caller_id, caller_name, is_dynamic, &seen_edges, &mut pts_edge_map, edges); } - emit_receiver_edge(ctx, call, caller_id, rel_path, &type_map, &imported_names, &mut seen_edges, edges); + emit_receiver_edge(ctx, call, caller_id, fc.rel_path, &fc.type_map, &fc.imported_names, &mut seen_edges, edges); } - emit_hierarchy_edges(ctx, file_input, rel_path, edges); + emit_hierarchy_edges(ctx, file_input, fc.rel_path, edges); } /// Callable definition kinds — only function/method bodies act as enclosing diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 7b3097a6..30d13dc6 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -113,161 +113,168 @@ fn enclosing_type_map_class<'a>(node: &Node<'a>, source: &'a [u8]) -> Option<&'a fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { - "variable_declarator" => { - if let Some(name_n) = node.child_by_field_name("name") { - if name_n.kind() == "identifier" { - let var_name = node_text(&name_n, source); - // Type annotation: confidence 0.9 - if let Some(type_anno) = find_child(node, "type_annotation") { - if let Some(type_name) = extract_simple_type_name(&type_anno, source) { - push_type_map_entry(symbols, var_name.to_string(), type_name.to_string()); - } - } - // Constructor: confidence 1.0 (overrides annotation in edge builder) - if let Some(value_n) = node.child_by_field_name("value") { - if value_n.kind() == "new_expression" { - if let Some(type_name) = extract_new_expr_type_name(&value_n, source) { - symbols.type_map.push(TypeMapEntry { - name: var_name.to_string(), - type_name: type_name.to_string(), - confidence: 1.0, - }); - } - } - // Phase 8.3e: Object.create({ key: fn }) → composite pts key per property - if value_n.kind() == "call_expression" { - seed_object_create_entries(var_name, &value_n, source, symbols); - } - // Phase 8.3f parity: seed composite typeMap keys for ALL object-literal - // declarations (`const`, `let`, `var`) when at non-function scope. - // Mirrors WASM handleVarDeclaratorTypeMap (no isConst guard there). - // For `const`, extract_object_literal_functions already seeds these entries; - // dedup_type_map collapses any duplicates at equal confidence. - if value_n.kind() == "object" && find_parent_of_types(node, &[ - "function_declaration", "arrow_function", "function_expression", - "method_definition", "generator_function_declaration", "generator_function", - ]).is_none() { - seed_objlit_type_map_entries(var_name, &value_n, source, symbols); - } - } - } - } - } + "variable_declarator" => handle_var_declarator_type_map(node, source, symbols), // Phase 8.3e: Object.defineProperty / defineProperties → composite pts key - "call_expression" => { - seed_define_property_entries(node, source, symbols); + "call_expression" => seed_define_property_entries(node, source, symbols), + "required_parameter" | "optional_parameter" => handle_param_type_map(node, source, symbols), + // Phase 8.3d: property-write pts tracking. + // Mirrors handlePropWriteTypeMap in src/extractors/javascript.ts. + "assignment_expression" => handle_assignment_type_map(node, source, symbols), + // TypeScript class field declarations. + // Mirrors handleFieldDefTypeMap in src/extractors/javascript.ts. + "public_field_definition" | "field_definition" => handle_field_def_type_map(node, source, symbols), + _ => {} + } +} + +/// Handle `variable_declarator` nodes in the type-map walk. +/// +/// Seeds type-map entries from: +/// - type annotations (`confidence = 0.9`) +/// - constructor calls (`confidence = 1.0`) +/// - Object.create({ key: fn }) composite pts keys (Phase 8.3e) +/// - object-literal declarations at non-function scope (Phase 8.3f parity) +fn handle_var_declarator_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_n) = node.child_by_field_name("name") else { return }; + if name_n.kind() != "identifier" { return; } + let var_name = node_text(&name_n, source); + // Type annotation: confidence 0.9 + if let Some(type_anno) = find_child(node, "type_annotation") { + if let Some(type_name) = extract_simple_type_name(&type_anno, source) { + push_type_map_entry(symbols, var_name.to_string(), type_name.to_string()); } - "required_parameter" | "optional_parameter" => { - let name_node = node.child_by_field_name("pattern") - .or_else(|| node.child_by_field_name("left")) - .or_else(|| node.child(0)); - if let Some(name_node) = name_node { - if name_node.kind() == "identifier" { - if let Some(type_anno) = find_child(node, "type_annotation") { - if let Some(type_name) = extract_simple_type_name(&type_anno, source) { - push_type_map_entry( - symbols, - node_text(&name_node, source).to_string(), - type_name.to_string(), - ); - } - } - } - } + } + let Some(value_n) = node.child_by_field_name("value") else { return }; + // Constructor: confidence 1.0 (overrides annotation in edge builder) + if value_n.kind() == "new_expression" { + if let Some(type_name) = extract_new_expr_type_name(&value_n, source) { + symbols.type_map.push(TypeMapEntry { + name: var_name.to_string(), + type_name: type_name.to_string(), + confidence: 1.0, + }); } - // Phase 8.3d: property-write pts tracking — `obj.prop = fn` seeds composite key. - // Also seeds `this.prop = new Ctor()` constructor-assigned property types, - // keyed as `ClassName.prop` (class-scoped) so two classes with identically-named - // properties don't overwrite each other's typeMap entry (issue #1323). - // Mirrors handlePropWriteTypeMap in src/extractors/javascript.ts. - "assignment_expression" => { - let lhs = node.child_by_field_name("left"); - let rhs = node.child_by_field_name("right"); - if let (Some(lhs), Some(rhs)) = (lhs, rhs) { - if lhs.kind() == "member_expression" { - let obj = lhs.child_by_field_name("object"); - let prop = lhs.child_by_field_name("property"); - if let (Some(obj), Some(prop)) = (obj, prop) { - // Guard: only static property access, not computed subscripts. - let prop_kind = prop.kind(); - if prop_kind == "property_identifier" || prop_kind == "identifier" { - if obj.kind() == "this" && rhs.kind() == "new_expression" { - if let Some(ctor_type) = extract_new_expr_type_name(&rhs, source) { - let key = match enclosing_type_map_class(node, source) { - Some(class_name) => { - format!("{}.{}", class_name, node_text(&prop, source)) - } - None => format!("this.{}", node_text(&prop, source)), - }; - symbols.type_map.push(TypeMapEntry { - name: key, - type_name: ctor_type.to_string(), - confidence: 1.0, - }); - } - } else if obj.kind() == "identifier" && rhs.kind() == "identifier" { - let obj_name = node_text(&obj, source); - if !is_js_builtin_global(obj_name) { - let key = format!("{}.{}", obj_name, node_text(&prop, source)); - let rhs_name = node_text(&rhs, source).to_string(); - symbols.type_map.push(TypeMapEntry { - name: key, - type_name: rhs_name, - confidence: 0.85, - }); - } - } - } - } - } - } + } + // Phase 8.3e: Object.create({ key: fn }) → composite pts key per property + if value_n.kind() == "call_expression" { + seed_object_create_entries(var_name, &value_n, source, symbols); + } + // Phase 8.3f parity: seed composite typeMap keys for ALL object-literal + // declarations (`const`, `let`, `var`) when at non-function scope. + // Mirrors WASM handleVarDeclaratorTypeMap (no isConst guard there). + // For `const`, extract_object_literal_functions already seeds these entries; + // dedup_type_map collapses any duplicates at equal confidence. + if value_n.kind() == "object" && find_parent_of_types(node, &[ + "function_declaration", "arrow_function", "function_expression", + "method_definition", "generator_function_declaration", "generator_function", + ]).is_none() { + seed_objlit_type_map_entries(var_name, &value_n, source, symbols); + } +} + +/// Handle `required_parameter` / `optional_parameter` nodes in the type-map walk. +/// +/// Seeds a type-map entry when the parameter carries a TypeScript type annotation. +fn handle_param_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = node.child_by_field_name("pattern") + .or_else(|| node.child_by_field_name("left")) + .or_else(|| node.child(0)); + let Some(name_node) = name_node else { return }; + if name_node.kind() != "identifier" { return }; + let Some(type_anno) = find_child(node, "type_annotation") else { return }; + if let Some(type_name) = extract_simple_type_name(&type_anno, source) { + push_type_map_entry( + symbols, + node_text(&name_node, source).to_string(), + type_name.to_string(), + ); + } +} + +/// Handle `assignment_expression` nodes in the type-map walk. +/// +/// Seeds two kinds of entries: +/// - `this.prop = new Ctor()` → class-scoped key `ClassName.prop` (confidence 1.0) +/// - `obj.prop = identifier` → composite key `obj.prop` (confidence 0.85) +/// +/// Mirrors `handlePropWriteTypeMap` in `src/extractors/javascript.ts`. +fn handle_assignment_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let lhs = node.child_by_field_name("left"); + let rhs = node.child_by_field_name("right"); + let (Some(lhs), Some(rhs)) = (lhs, rhs) else { return }; + if lhs.kind() != "member_expression" { return; } + let obj = lhs.child_by_field_name("object"); + let prop = lhs.child_by_field_name("property"); + let (Some(obj), Some(prop)) = (obj, prop) else { return }; + // Guard: only static property access, not computed subscripts. + let prop_kind = prop.kind(); + if prop_kind != "property_identifier" && prop_kind != "identifier" { return; } + if obj.kind() == "this" && rhs.kind() == "new_expression" { + if let Some(ctor_type) = extract_new_expr_type_name(&rhs, source) { + let key = match enclosing_type_map_class(node, source) { + Some(class_name) => format!("{}.{}", class_name, node_text(&prop, source)), + None => format!("this.{}", node_text(&prop, source)), + }; + symbols.type_map.push(TypeMapEntry { + name: key, + type_name: ctor_type.to_string(), + confidence: 1.0, + }); } - // TypeScript class field declarations: `private repo: Repository` - // Seeds a class-scoped key `ClassName.field` (confidence 0.9) as the primary - // entry so that two classes with identically-named fields don't overwrite each - // other's typeMap entry (issue #1458). The resolver's `CallerClass.X` fallback - // looks up exactly this key. - // Bare `field` and `this.field` keys are kept at lower confidence (0.6) as - // fallbacks for single-class files where the resolver may lack callerClass context. - // Mirrors handleFieldDefTypeMap in src/extractors/javascript.ts. - "public_field_definition" | "field_definition" => { - let name_node = node.child_by_field_name("name") - .or_else(|| node.child_by_field_name("property")) - .or_else(|| find_child(node, "property_identifier")); - if let Some(name_node) = name_node { - let kind = name_node.kind(); - if kind == "property_identifier" || kind == "identifier" - || kind == "private_property_identifier" - { - let field_name = node_text(&name_node, source).to_string(); - if let Some(type_anno) = find_child(node, "type_annotation") { - if let Some(type_name) = extract_simple_type_name(&type_anno, source) { - match enclosing_type_map_class(node, source) { - Some(class_name) => { - // Primary: class-scoped key prevents cross-class collision. - set_type_map_entry( - symbols, - format!("{}.{}", class_name, field_name), - type_name.to_string(), - 0.9, - ); - // Fallback bare keys at lower confidence. - set_type_map_entry(symbols, field_name.clone(), type_name.to_string(), 0.6); - set_type_map_entry(symbols, format!("this.{}", field_name), type_name.to_string(), 0.6); - } - None => { - // No enclosing class declaration (e.g. class expression) - // — use bare keys only at full confidence. - set_type_map_entry(symbols, field_name.clone(), type_name.to_string(), 0.9); - set_type_map_entry(symbols, format!("this.{}", field_name), type_name.to_string(), 0.9); - } - } - } - } - } - } + } else if obj.kind() == "identifier" && rhs.kind() == "identifier" { + let obj_name = node_text(&obj, source); + if !is_js_builtin_global(obj_name) { + let key = format!("{}.{}", obj_name, node_text(&prop, source)); + let rhs_name = node_text(&rhs, source).to_string(); + symbols.type_map.push(TypeMapEntry { + name: key, + type_name: rhs_name, + confidence: 0.85, + }); + } + } +} + +/// Handle `public_field_definition` / `field_definition` nodes in the type-map walk. +/// +/// Seeds a class-scoped key `ClassName.field` (confidence 0.9) as the primary entry +/// so that two classes with identically-named fields don't overwrite each other's +/// typeMap entry (issue #1458). The resolver's `CallerClass.X` fallback looks up +/// exactly this key. Bare `field` and `this.field` keys are kept at lower confidence +/// (0.6) as fallbacks for single-class files where the resolver may lack callerClass. +/// +/// Mirrors `handleFieldDefTypeMap` in `src/extractors/javascript.ts`. +fn handle_field_def_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = node.child_by_field_name("name") + .or_else(|| node.child_by_field_name("property")) + .or_else(|| find_child(node, "property_identifier")); + let Some(name_node) = name_node else { return }; + let kind = name_node.kind(); + if kind != "property_identifier" && kind != "identifier" && kind != "private_property_identifier" { + return; + } + let field_name = node_text(&name_node, source).to_string(); + let Some(type_anno) = find_child(node, "type_annotation") else { return }; + let Some(type_name) = extract_simple_type_name(&type_anno, source) else { return }; + match enclosing_type_map_class(node, source) { + Some(class_name) => { + // Primary: class-scoped key prevents cross-class collision. + set_type_map_entry( + symbols, + format!("{}.{}", class_name, field_name), + type_name.to_string(), + 0.9, + ); + // Fallback bare keys at lower confidence. + set_type_map_entry(symbols, field_name.clone(), type_name.to_string(), 0.6); + set_type_map_entry(symbols, format!("this.{}", field_name), type_name.to_string(), 0.6); + } + None => { + // No enclosing class declaration (e.g. class expression) + // — use bare keys only at full confidence. + set_type_map_entry(symbols, field_name.clone(), type_name.to_string(), 0.9); + set_type_map_entry(symbols, format!("this.{}", field_name), type_name.to_string(), 0.9); } - _ => {} } } From 408b1bba1ffa18cbf56d56b1e0051f4570349344 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 17 Jun 2026 04:24:01 -0600 Subject: [PATCH 4/4] refactor: split Rust pipeline.rs write_dataflow and insert_nodes.rs do_insert_nodes --- .../src/domain/graph/builder/pipeline.rs | 283 +++++++++++------- .../graph/builder/stages/insert_nodes.rs | 223 ++++++++------ 2 files changed, 300 insertions(+), 206 deletions(-) diff --git a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs index 70840d94..9cde7e29 100644 --- a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs +++ b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs @@ -1307,7 +1307,38 @@ fn propagate_return_types_across_files( ) { use crate::domain::graph::builder::stages::build_edges::PROPAGATION_HOP_PENALTY; - // rel_path → (fn_name → (type_name, confidence)) + let (return_type_index, global_return_types) = build_return_type_index(file_symbols); + if return_type_index.is_empty() { + return; + } + + for (rel_path, symbols) in file_symbols.iter_mut() { + if symbols.call_assignments.is_empty() { + continue; + } + inject_return_types_for_file( + rel_path, + symbols, + import_ctx, + &return_type_index, + &global_return_types, + PROPAGATION_HOP_PENALTY, + ); + } +} + +/// Build per-file and global return-type indexes from `return_type_map` entries. +/// +/// Returns: +/// - `return_type_index`: `rel_path → (fn_name → (type_name, confidence))` +/// - `global_return_types`: flat map for qualified `Type.method` lookups; higher +/// confidence wins, tie-break is deterministic (paths visited in sorted order). +fn build_return_type_index( + file_symbols: &HashMap, +) -> ( + HashMap>, + HashMap, +) { let mut return_type_index: HashMap> = HashMap::new(); for (rel_path, symbols) in file_symbols.iter() { if symbols.return_type_map.is_empty() { @@ -1318,13 +1349,7 @@ fn propagate_return_types_across_files( per_file.insert(e.name.clone(), (e.type_name.clone(), e.confidence)); } } - if return_type_index.is_empty() { - return; - } - // Flat map for qualified `Type.method` lookups. Higher confidence wins; - // ties keep the first writer. Files are visited in sorted order so the - // tie-break is deterministic (HashMap iteration order is not). let mut global_return_types: HashMap = HashMap::new(); let mut sorted_paths: Vec<&String> = return_type_index.keys().collect(); sorted_paths.sort(); @@ -1340,57 +1365,64 @@ fn propagate_return_types_across_files( } } - for (rel_path, symbols) in file_symbols.iter_mut() { - if symbols.call_assignments.is_empty() { + (return_type_index, global_return_types) +} + +/// Inject cross-file return types into a single file's `type_map`. +/// +/// For each call-assignment in the file (`const x = callee()`), looks up the +/// callee's return type in `return_type_index` (imported callee) or +/// `global_return_types` (qualified `Receiver.method` callee) and pushes a +/// `TypeMapEntry` so downstream call-edge resolution can follow `x.method()`. +/// Already-resolved locals (`type_map` already has `var_name`) are skipped. +fn inject_return_types_for_file( + rel_path: &str, + symbols: &mut FileSymbols, + import_ctx: &ImportEdgeContext, + return_type_index: &HashMap>, + global_return_types: &HashMap, + hop_penalty: f64, +) { + let abs_file = Path::new(&import_ctx.root_dir).join(rel_path); + let abs_str = abs_file.to_str().unwrap_or(""); + let imported_names = collect_imported_names_for_file(abs_str, symbols, import_ctx); + // Later entries overwrite earlier ones on duplicate names — same as the + // HashMap collect in build_call_edges. + let imported_map: HashMap = + imported_names.into_iter().map(|e| (e.name, e.file)).collect(); + + let mut injections: Vec = Vec::new(); + let mut injected: HashSet = HashSet::new(); + for ca in &symbols.call_assignments { + // Already resolved locally (JS: `typeMap.has(varName)`); first + // successful injection wins for repeated assignments to one name. + if injected.contains(&ca.var_name) || symbols.type_map.iter().any(|t| t.name == ca.var_name) + { continue; } - let abs_file = Path::new(&import_ctx.root_dir).join(rel_path.as_str()); - let abs_str = abs_file.to_str().unwrap_or(""); - let imported_names = collect_imported_names_for_file(abs_str, symbols, import_ctx); - // Later entries overwrite earlier ones on duplicate names — same as the - // HashMap collect in build_call_edges. - let imported_map: HashMap = imported_names - .into_iter() - .map(|e| (e.name, e.file)) - .collect(); - - let mut injections: Vec = Vec::new(); - let mut injected: HashSet = HashSet::new(); - for ca in &symbols.call_assignments { - // Already resolved locally (JS: `typeMap.has(varName)`); first - // successful injection wins for repeated assignments to one name. - if injected.contains(&ca.var_name) - || symbols.type_map.iter().any(|t| t.name == ca.var_name) - { - continue; + let found = match &ca.receiver_type_name { + Some(receiver) => { + global_return_types.get(&format!("{receiver}.{}", ca.callee_name)) } + None => imported_map.get(&ca.callee_name).and_then(|from| { + return_type_index.get(from).and_then(|m| m.get(&ca.callee_name)) + }), + }; - let found = match &ca.receiver_type_name { - Some(receiver) => { - global_return_types.get(&format!("{receiver}.{}", ca.callee_name)) - } - None => imported_map.get(&ca.callee_name).and_then(|from| { - return_type_index - .get(from) - .and_then(|m| m.get(&ca.callee_name)) - }), - }; - - if let Some((type_name, confidence)) = found { - let propagated = confidence - PROPAGATION_HOP_PENALTY; - if propagated > 0.0 { - injections.push(TypeMapEntry { - name: ca.var_name.clone(), - type_name: type_name.clone(), - confidence: propagated, - }); - injected.insert(ca.var_name.clone()); - } + if let Some((type_name, confidence)) = found { + let propagated = confidence - hop_penalty; + if propagated > 0.0 { + injections.push(TypeMapEntry { + name: ca.var_name.clone(), + type_name: type_name.clone(), + confidence: propagated, + }); + injected.insert(ca.var_name.clone()); } } - symbols.type_map.extend(injections); } + symbols.type_map.extend(injections); } /// Insert the edges produced by the native edge builder into the edges table. @@ -1825,76 +1857,101 @@ fn write_dataflow( Some(d) => d, None => continue, }; + write_dataflow_arg_flows(&mut insert_stmt, &mut local_stmt, &mut global_stmt, data, file); + write_dataflow_assignments(&mut insert_stmt, &mut local_stmt, &mut global_stmt, data, file); + write_dataflow_mutations(&mut insert_stmt, &mut local_stmt, &mut global_stmt, data, file); + } - // argFlows → flows_to edges - for flow in &data.arg_flows { - let caller = match &flow.caller_func { - Some(name) => name.as_str(), - None => continue, - }; - let src = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, caller, file); - let tgt = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, &flow.callee_name, file); - if let (Some(src), Some(tgt)) = (src, tgt) { - let _ = insert_stmt.execute(rusqlite::params![ - src, - tgt, - "flows_to", - flow.arg_index, - &flow.expression, - flow.line, - flow.confidence, - ]); - } - } + drop(insert_stmt); + drop(local_stmt); + drop(global_stmt); + tx.commit().is_ok() +} - // assignments → returns edges - for assignment in &data.assignments { - let consumer = match &assignment.caller_func { - Some(name) => name.as_str(), - None => continue, - }; - let producer = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, &assignment.source_call_name, file); - let consumer_id = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, consumer, file); - if let (Some(producer), Some(consumer_id)) = (producer, consumer_id) { - let _ = insert_stmt.execute(rusqlite::params![ - producer, - consumer_id, - "returns", - Option::::None, - &assignment.expression, - assignment.line, - 1.0_f64, - ]); - } +/// Emit `flows_to` edges for each argFlow entry: caller → callee via argument passing. +fn write_dataflow_arg_flows( + insert_stmt: &mut rusqlite::Statement, + local_stmt: &mut rusqlite::Statement, + global_stmt: &mut rusqlite::Statement, + data: &crate::types::DataflowResult, + file: &str, +) { + for flow in &data.arg_flows { + let caller = match &flow.caller_func { + Some(name) => name.as_str(), + None => continue, + }; + let src = resolve_dataflow_node(local_stmt, global_stmt, caller, file); + let tgt = resolve_dataflow_node(local_stmt, global_stmt, &flow.callee_name, file); + if let (Some(src), Some(tgt)) = (src, tgt) { + let _ = insert_stmt.execute(rusqlite::params![ + src, tgt, "flows_to", flow.arg_index, &flow.expression, flow.line, flow.confidence, + ]); } + } +} - // mutations → mutates edges (only for param bindings) - for mutation in &data.mutations { - if mutation.binding_type.as_deref() != Some("param") { - continue; - } - let func = match &mutation.func_name { - Some(name) => name.as_str(), - None => continue, - }; - if let Some(node_id) = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, func, file) { - let _ = insert_stmt.execute(rusqlite::params![ - node_id, - node_id, - "mutates", - Option::::None, - &mutation.mutating_expr, - mutation.line, - 1.0_f64, - ]); - } +/// Emit `returns` edges for each assignment entry: producer → consumer via +/// return-value assignment (`const x = callee()`). +fn write_dataflow_assignments( + insert_stmt: &mut rusqlite::Statement, + local_stmt: &mut rusqlite::Statement, + global_stmt: &mut rusqlite::Statement, + data: &crate::types::DataflowResult, + file: &str, +) { + for assignment in &data.assignments { + let consumer = match &assignment.caller_func { + Some(name) => name.as_str(), + None => continue, + }; + let producer = + resolve_dataflow_node(local_stmt, global_stmt, &assignment.source_call_name, file); + let consumer_id = resolve_dataflow_node(local_stmt, global_stmt, consumer, file); + if let (Some(producer), Some(consumer_id)) = (producer, consumer_id) { + let _ = insert_stmt.execute(rusqlite::params![ + producer, + consumer_id, + "returns", + Option::::None, + &assignment.expression, + assignment.line, + 1.0_f64, + ]); } } +} - drop(insert_stmt); - drop(local_stmt); - drop(global_stmt); - tx.commit().is_ok() +/// Emit `mutates` edges for param-binding mutation entries. Only fires for +/// mutations where `binding_type == "param"` — other mutation kinds are +/// informational and not persisted as dataflow edges. +fn write_dataflow_mutations( + insert_stmt: &mut rusqlite::Statement, + local_stmt: &mut rusqlite::Statement, + global_stmt: &mut rusqlite::Statement, + data: &crate::types::DataflowResult, + file: &str, +) { + for mutation in &data.mutations { + if mutation.binding_type.as_deref() != Some("param") { + continue; + } + let func = match &mutation.func_name { + Some(name) => name.as_str(), + None => continue, + }; + if let Some(node_id) = resolve_dataflow_node(local_stmt, global_stmt, func, file) { + let _ = insert_stmt.execute(rusqlite::params![ + node_id, + node_id, + "mutates", + Option::::None, + &mutation.mutating_expr, + mutation.line, + 1.0_f64, + ]); + } + } } /// Resolve a function name to a node ID, trying same-file first then global. diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/insert_nodes.rs b/crates/codegraph-core/src/domain/graph/builder/stages/insert_nodes.rs index cffb3a90..69b4519a 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/insert_nodes.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/insert_nodes.rs @@ -101,82 +101,102 @@ pub(crate) fn do_insert_nodes( removed_files: &[String], ) -> rusqlite::Result<()> { let tx = conn.unchecked_transaction()?; + insert_file_nodes(&tx, batches)?; + let (contains_edges, param_of_edges) = insert_symbol_nodes(&tx, batches)?; + upsert_node_batch(&tx, &contains_edges, ¶m_of_edges)?; + upsert_file_hashes(&tx, file_hashes, removed_files)?; + tx.commit() +} - // ── Phase 1: Insert file nodes + definitions + export nodes ────── - { - let mut stmt = tx.prepare_cached( - "INSERT OR IGNORE INTO nodes \ - (name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility) \ - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", - )?; - - for batch in batches { - // File node +/// Phase 1 + 1b: insert file nodes, definition nodes, export nodes, and mark +/// exported nodes. Each batch writes one file-kind node, then all its +/// definitions and exports (OR IGNORE handles duplicates), then flips the +/// `exported` flag via UPDATE. +fn insert_file_nodes( + tx: &rusqlite::Transaction, + batches: &[InsertNodesBatch], +) -> rusqlite::Result<()> { + let mut stmt = tx.prepare_cached( + "INSERT OR IGNORE INTO nodes \ + (name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)", + )?; + + for batch in batches { + // File node + stmt.execute(params![ + &batch.file, + "file", + &batch.file, + 0, + None::, + None::, + None::<&str>, + None::<&str>, + None::<&str> + ])?; + + // Definitions + for def in &batch.definitions { + let scope: Option<&str> = def.name.rfind('.').map(|i| &def.name[..i]); + // .as_deref() converts Option → Option<&str> so rusqlite + // serialises None as SQL NULL unambiguously (#709). + let vis = def.visibility.as_deref(); stmt.execute(params![ + &def.name, + &def.kind, &batch.file, - "file", + def.line, + def.end_line, + None::, + &def.name, + scope, + vis + ])?; + } + + // Exports (may duplicate definitions — OR IGNORE handles it) + for exp in &batch.exports { + stmt.execute(params![ + &exp.name, + &exp.kind, &batch.file, - 0, + exp.line, None::, None::, - None::<&str>, + &exp.name, None::<&str>, None::<&str> ])?; - - // Definitions - for def in &batch.definitions { - let scope: Option<&str> = def.name.rfind('.').map(|i| &def.name[..i]); - // .as_deref() converts Option → Option<&str> so rusqlite - // serialises None as SQL NULL unambiguously (#709). - let vis = def.visibility.as_deref(); - stmt.execute(params![ - &def.name, - &def.kind, - &batch.file, - def.line, - def.end_line, - None::, - &def.name, - scope, - vis - ])?; - } - - // Exports (may duplicate definitions — OR IGNORE handles it) - for exp in &batch.exports { - stmt.execute(params![ - &exp.name, - &exp.kind, - &batch.file, - exp.line, - None::, - None::, - &exp.name, - None::<&str>, - None::<&str> - ])?; - } } } - // ── Phase 1b: Mark exported nodes ──────────────────────────────── - { - let mut stmt = tx.prepare_cached( - "UPDATE nodes SET exported = 1 \ - WHERE name = ?1 AND kind = ?2 AND file = ?3 AND line = ?4", - )?; - for batch in batches { - for exp in &batch.exports { - stmt.execute(params![&exp.name, &exp.kind, &batch.file, exp.line])?; - } + // Mark exported nodes + let mut mark_stmt = tx.prepare_cached( + "UPDATE nodes SET exported = 1 \ + WHERE name = ?1 AND kind = ?2 AND file = ?3 AND line = ?4", + )?; + for batch in batches { + for exp in &batch.exports { + mark_stmt.execute(params![&exp.name, &exp.kind, &batch.file, exp.line])?; } } - // ── Phase 2: Query node IDs, insert children, collect file→def edges + Ok(()) +} + +/// Phase 2 + 3: query freshly inserted node IDs, insert child nodes (parameters, +/// nested functions, etc.), then re-fetch IDs to collect all containment and +/// parameter_of edges. Returns `(contains_edges, param_of_edges)` for bulk +/// insertion by [`upsert_node_batch`]. +fn insert_symbol_nodes( + tx: &rusqlite::Transaction, + batches: &[InsertNodesBatch], +) -> rusqlite::Result<(Vec<(i64, i64)>, Vec<(i64, i64)>)> { let mut contains_edges: Vec<(i64, i64)> = Vec::new(); let mut param_of_edges: Vec<(i64, i64)> = Vec::new(); + // Phase 2: query existing node IDs, insert children, collect file→def edges { let mut id_stmt = tx.prepare_cached("SELECT id, name, kind, line FROM nodes WHERE file = ?1")?; @@ -223,7 +243,7 @@ pub(crate) fn do_insert_nodes( } } - // ── Phase 3: Re-fetch IDs (including children), add def→child edges + // Phase 3: re-fetch IDs (now including children), add def→child edges { let mut id_stmt = tx.prepare_cached("SELECT id, name, kind, line FROM nodes WHERE file = ?1")?; @@ -254,50 +274,67 @@ pub(crate) fn do_insert_nodes( } } - // ── Insert all edges ───────────────────────────────────────────── - { - let mut stmt = tx.prepare_cached( - "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) \ - VALUES (?1, ?2, ?3, ?4, ?5)", - )?; - for &(src, tgt) in &contains_edges { - stmt.execute(params![src, tgt, "contains", 1.0, 0])?; - } - for &(src, tgt) in ¶m_of_edges { - stmt.execute(params![src, tgt, "parameter_of", 1.0, 0])?; - } + Ok((contains_edges, param_of_edges)) +} + +/// Bulk-insert all containment and parameter_of edges collected by +/// [`insert_symbol_nodes`]. Single prepared statement, single pass. +fn upsert_node_batch( + tx: &rusqlite::Transaction, + contains_edges: &[(i64, i64)], + param_of_edges: &[(i64, i64)], +) -> rusqlite::Result<()> { + let mut stmt = tx.prepare_cached( + "INSERT OR IGNORE INTO edges (source_id, target_id, kind, confidence, dynamic) \ + VALUES (?1, ?2, ?3, ?4, ?5)", + )?; + for &(src, tgt) in contains_edges { + stmt.execute(params![src, tgt, "contains", 1.0, 0])?; + } + for &(src, tgt) in param_of_edges { + stmt.execute(params![src, tgt, "parameter_of", 1.0, 0])?; } + Ok(()) +} - // ── Phase 4: File hashes ───────────────────────────────────────── +/// Phase 4: upsert file hashes and remove hashes for deleted files. No-ops +/// gracefully when the `file_hashes` table has not been created yet (e.g. +/// during the initial schema migration). +fn upsert_file_hashes( + tx: &rusqlite::Transaction, + file_hashes: &[FileHashEntry], + removed_files: &[String], +) -> rusqlite::Result<()> { let has_file_hashes = tx .prepare("SELECT 1 FROM sqlite_master WHERE type='table' AND name='file_hashes'") .and_then(|mut s| s.query_row([], |_| Ok(true))) .unwrap_or(false); - if has_file_hashes { - { - let mut upsert = tx.prepare_cached( - "INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) \ - VALUES (?1, ?2, ?3, ?4)", - )?; - for entry in file_hashes { - upsert.execute(params![ - &entry.file, - &entry.hash, - entry.mtime as i64, - entry.size as i64 - ])?; - } + if !has_file_hashes { + return Ok(()); + } + + { + let mut upsert = tx.prepare_cached( + "INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) \ + VALUES (?1, ?2, ?3, ?4)", + )?; + for entry in file_hashes { + upsert.execute(params![ + &entry.file, + &entry.hash, + entry.mtime as i64, + entry.size as i64 + ])?; } + } - if !removed_files.is_empty() { - let mut delete = - tx.prepare_cached("DELETE FROM file_hashes WHERE file = ?1")?; - for file in removed_files { - delete.execute(params![file])?; - } + if !removed_files.is_empty() { + let mut delete = tx.prepare_cached("DELETE FROM file_hashes WHERE file = ?1")?; + for file in removed_files { + delete.execute(params![file])?; } } - tx.commit() + Ok(()) }