From b13c0c8fa57eb74db6154511a2a2600a41d1ab14 Mon Sep 17 00:00:00 2001 From: Lewis Russell Date: Mon, 15 Jun 2026 16:52:52 +0100 Subject: [PATCH] fix: stop flow inference from hanging on large self-assignments Log a warning and return unknown when a single flow query exceeds the step budget. This keeps semantic model construction moving for pathological repeated assignments while preserving normal flow precision. Add a development/test switch that can turn the fallback off when a runaway flow query needs to be reproduced. Fixes #1114 Assisted-by: Codex --- .../analyzer/infer_cache_manager.rs | 19 ++- .../src/compilation/analyzer/mod.rs | 24 ++-- .../src/compilation/mod.rs | 19 ++- .../src/compilation/test/flow.rs | 48 ++++++++ .../src/semantic/cache/cache_options.rs | 8 +- .../semantic/infer/narrow/get_type_at_flow.rs | 116 +++++++++++++++++- 6 files changed, 207 insertions(+), 27 deletions(-) diff --git a/crates/emmylua_code_analysis/src/compilation/analyzer/infer_cache_manager.rs b/crates/emmylua_code_analysis/src/compilation/analyzer/infer_cache_manager.rs index 22786cf0c..d69c32f3c 100644 --- a/crates/emmylua_code_analysis/src/compilation/analyzer/infer_cache_manager.rs +++ b/crates/emmylua_code_analysis/src/compilation/analyzer/infer_cache_manager.rs @@ -1,28 +1,27 @@ use hashbrown::HashMap; -use crate::{FileId, LuaAnalysisPhase, semantic::LuaInferCache}; +use crate::{CacheOptions, FileId, LuaAnalysisPhase, semantic::LuaInferCache}; #[derive(Debug, Default)] pub struct InferCacheManager { infer_map: HashMap, + cache_options: CacheOptions, } impl InferCacheManager { - pub fn new() -> Self { + pub fn new(cache_options: CacheOptions) -> Self { InferCacheManager { infer_map: HashMap::new(), + cache_options, } } pub fn get_infer_cache(&mut self, file_id: FileId) -> &mut LuaInferCache { - self.infer_map.entry(file_id).or_insert_with(|| { - LuaInferCache::new( - file_id, - crate::CacheOptions { - analysis_phase: LuaAnalysisPhase::Ordered, - }, - ) - }) + let mut cache_options = self.cache_options; + cache_options.analysis_phase = LuaAnalysisPhase::Ordered; + self.infer_map + .entry(file_id) + .or_insert_with(|| LuaInferCache::new(file_id, cache_options)) } pub fn set_force(&mut self) { diff --git a/crates/emmylua_code_analysis/src/compilation/analyzer/mod.rs b/crates/emmylua_code_analysis/src/compilation/analyzer/mod.rs index 13ff48003..a07a9a6b2 100644 --- a/crates/emmylua_code_analysis/src/compilation/analyzer/mod.rs +++ b/crates/emmylua_code_analysis/src/compilation/analyzer/mod.rs @@ -7,7 +7,7 @@ mod lua; mod unresolve; use crate::{ - Emmyrc, FileId, InFiled, InferFailReason, LuaType, LuaTypeDeclId, + CacheOptions, Emmyrc, FileId, InFiled, InferFailReason, LuaType, LuaTypeDeclId, db_index::{DbIndex, WorkspaceId}, profile::Profile, }; @@ -27,12 +27,17 @@ where lua::func_body::analyze_func_body_missing_return_flags_with(body, infer_expr_type) } -pub fn analyze(db: &mut DbIndex, need_analyzed_files: Vec>, config: Arc) { +pub fn analyze( + db: &mut DbIndex, + need_analyzed_files: Vec>, + config: Arc, + cache_options: CacheOptions, +) { if need_analyzed_files.is_empty() { return; } - let contexts = module_analyze(db, need_analyzed_files, config); + let contexts = module_analyze(db, need_analyzed_files, config, cache_options); for (workspace_id, mut context) in contexts { context.workspace_id = Some(workspace_id); @@ -58,6 +63,7 @@ fn module_analyze( db: &mut DbIndex, need_analyzed_files: Vec>, config: Arc, + cache_options: CacheOptions, ) -> Vec<(WorkspaceId, AnalyzeContext)> { if need_analyzed_files.len() == 1 { let in_filed_tree = need_analyzed_files[0].clone(); @@ -75,11 +81,11 @@ fn module_analyze( .get_module_index_mut() .add_module_by_path(file_id, path_str); let workspace_id = workspace_id.unwrap_or(WorkspaceId::MAIN); - let mut context = AnalyzeContext::new(config); + let mut context = AnalyzeContext::new(config, cache_options); context.add_tree_chunk(in_filed_tree); return vec![(workspace_id, context)]; } else if db.get_vfs().is_remote_file(&file_id) { - let mut context = AnalyzeContext::new(config); + let mut context = AnalyzeContext::new(config, cache_options); context.add_tree_chunk(in_filed_tree); return vec![(WorkspaceId::REMOTE, context)]; }; @@ -118,14 +124,14 @@ fn module_analyze( let mut contexts = Vec::new(); if let Some(std_lib) = file_tree_map.remove(&WorkspaceId::STD) { - let mut context = AnalyzeContext::new(config.clone()); + let mut context = AnalyzeContext::new(config.clone(), cache_options); context.tree_list = std_lib; contexts.push((WorkspaceId::STD, context)); } let mut main_vec = Vec::new(); for (workspace_id, tree_list) in file_tree_map { - let mut context = AnalyzeContext::new(config.clone()); + let mut context = AnalyzeContext::new(config.clone(), cache_options); context.tree_list = tree_list; if workspace_id.is_library() || workspace_id.is_remote() { contexts.push((workspace_id, context)); @@ -153,13 +159,13 @@ pub struct AnalyzeContext { } impl AnalyzeContext { - pub fn new(emmyrc: Arc) -> Self { + pub fn new(emmyrc: Arc, cache_options: CacheOptions) -> Self { Self { tree_list: Vec::new(), config: emmyrc, metas: HashSet::new(), unresolves: Vec::new(), - infer_manager: InferCacheManager::new(), + infer_manager: InferCacheManager::new(cache_options), workspace_id: None, pending_type_generic_headers: Vec::new(), } diff --git a/crates/emmylua_code_analysis/src/compilation/mod.rs b/crates/emmylua_code_analysis/src/compilation/mod.rs index 601f12d09..5865e83e4 100644 --- a/crates/emmylua_code_analysis/src/compilation/mod.rs +++ b/crates/emmylua_code_analysis/src/compilation/mod.rs @@ -4,8 +4,8 @@ mod test; use std::sync::Arc; use crate::{ - Emmyrc, FileId, InFiled, InferFailReason, LuaIndex, LuaInferCache, LuaType, db_index::DbIndex, - semantic::SemanticModel, + CacheOptions, Emmyrc, FileId, InFiled, InferFailReason, LuaIndex, LuaInferCache, LuaType, + db_index::DbIndex, semantic::SemanticModel, }; use emmylua_parser::{LuaBlock, LuaExpr}; @@ -23,6 +23,7 @@ where pub struct LuaCompilation { db: DbIndex, emmyrc: Arc, + cache_options: CacheOptions, } impl LuaCompilation { @@ -30,6 +31,7 @@ impl LuaCompilation { let mut compilation = Self { db: DbIndex::new(), emmyrc: emmyrc.clone(), + cache_options: CacheOptions::default(), }; compilation.db.update_config(emmyrc.clone()); @@ -37,7 +39,7 @@ impl LuaCompilation { } pub fn get_semantic_model(&'_ self, file_id: FileId) -> Option> { - let cache = LuaInferCache::new(file_id, Default::default()); + let cache = LuaInferCache::new(file_id, self.cache_options); let tree = self.db.get_vfs().get_syntax_tree(&file_id)?; Some(SemanticModel::new( file_id, @@ -64,7 +66,12 @@ impl LuaCompilation { }); } - analyzer::analyze(&mut self.db, need_analyzed_files, self.emmyrc.clone()); + analyzer::analyze( + &mut self.db, + need_analyzed_files, + self.emmyrc.clone(), + self.cache_options, + ); } pub fn remove_index(&mut self, file_ids: Vec) { @@ -83,6 +90,10 @@ impl LuaCompilation { &mut self.db } + pub fn get_cache_options_mut(&mut self) -> &mut CacheOptions { + &mut self.cache_options + } + pub fn update_config(&mut self, config: Arc) { self.emmyrc = config.clone(); self.db.update_config(config); diff --git a/crates/emmylua_code_analysis/src/compilation/test/flow.rs b/crates/emmylua_code_analysis/src/compilation/test/flow.rs index 2dcc638b5..51f669ab8 100644 --- a/crates/emmylua_code_analysis/src/compilation/test/flow.rs +++ b/crates/emmylua_code_analysis/src/compilation/test/flow.rs @@ -8,6 +8,7 @@ mod test { const LARGE_LINEAR_ASSIGNMENT_STEPS: usize = 2048; const MAXWELLHOME_ARRAY_VALUES: usize = 2048; const ISSUE_1100_HIGHLIGHT_GROUPS: usize = 2048; + const ISSUE_1114_REPEATED_ASSIGNMENT_STEPS: usize = 512; #[test] fn test_closure_return() { @@ -469,6 +470,53 @@ mod test { assert_eq!(ws.humanize_type(after_assign), "integer"); } + #[test] + #[timeout(5000)] + fn test_issue_1114_repeated_self_dependent_assignments_build_semantic_model() { + let cases = [ + ( + "concat", + r#"local value = """#, + "value = value .. config.pic[idx][index]", + ), + ( + "add", + "local value = 0", + "value = value + config.pic[idx][index]", + ), + ( + "comparison", + "local value = true", + "value = value == config.pic[idx][index]", + ), + ]; + + for (name, init, assignment) in cases { + let mut ws = VirtualWorkspace::new(); + let repeated_assignments = + format!("{assignment}\n").repeat(ISSUE_1114_REPEATED_ASSIGNMENT_STEPS); + let block = format!( + r#" + function f(idx, index) + {init} + {repeated_assignments} + return value + end + "# + ); + + let file_id = ws.def(&block); + + assert!( + ws.analysis + .compilation + .get_semantic_model(file_id) + .is_some(), + "expected semantic model for repeated self-dependent {name} assignment" + ); + } + } + #[test] #[timeout(5000)] fn test_issue_1094_self_call_fallback_stress() { diff --git a/crates/emmylua_code_analysis/src/semantic/cache/cache_options.rs b/crates/emmylua_code_analysis/src/semantic/cache/cache_options.rs index 124422ffe..635e3ca77 100644 --- a/crates/emmylua_code_analysis/src/semantic/cache/cache_options.rs +++ b/crates/emmylua_code_analysis/src/semantic/cache/cache_options.rs @@ -1,17 +1,21 @@ -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub struct CacheOptions { pub analysis_phase: LuaAnalysisPhase, + // Development/test escape hatch: keep flow inference unbounded so budget + // blowups can be reproduced instead of hidden behind `unknown`. + pub disable_flow_inference_step_budget: bool, } impl Default for CacheOptions { fn default() -> Self { Self { analysis_phase: LuaAnalysisPhase::Ordered, + disable_flow_inference_step_budget: false, } } } -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub enum LuaAnalysisPhase { // Ordered phase Ordered, diff --git a/crates/emmylua_code_analysis/src/semantic/infer/narrow/get_type_at_flow.rs b/crates/emmylua_code_analysis/src/semantic/infer/narrow/get_type_at_flow.rs index a29a511b3..910fe4179 100644 --- a/crates/emmylua_code_analysis/src/semantic/infer/narrow/get_type_at_flow.rs +++ b/crates/emmylua_code_analysis/src/semantic/infer/narrow/get_type_at_flow.rs @@ -35,6 +35,10 @@ use crate::{ }, }; +// Past this point a single flow query is already too expensive to be useful: +// cache `unknown` instead of letting semantic model construction stall. +const FLOW_INFERENCE_STEP_BUDGET: usize = 50_000; + #[derive(Debug, Clone, PartialEq, Eq, Hash)] // One cached flow query: one ref at one flow node, optionally without replaying // pending condition narrows. @@ -141,6 +145,20 @@ enum Continuation { }, } +impl Continuation { + fn query(&self) -> &FlowQuery { + match self { + Continuation::Merge { walk, .. } + | Continuation::AssignmentAntecedent { walk, .. } + | Continuation::ExprReplay { walk, .. } + | Continuation::TagCastAntecedent { walk, .. } + | Continuation::ConditionDependency { walk, .. } + | Continuation::FieldLiteralSiblingDependency { walk, .. } + | Continuation::CorrelatedSearchRoot { walk, .. } => &walk.query, + } + } +} + enum FlowExprReplay { Assignment { antecedent_flow_id: FlowId, @@ -418,13 +436,15 @@ struct FlowTypeEngine<'a> { tree: &'a FlowTree, cache: &'a mut LuaInferCache, root: &'a LuaChunk, + steps_remaining: Option, } impl<'a> FlowTypeEngine<'a> { fn run(&mut self, var_ref_id: &VarRefId, flow_id: FlowId) -> InferResult { let mut stack = Vec::new(); + let root_query = FlowQuery::new(self.cache, var_ref_id, flow_id); let mut step = SchedulerStep::StartQuery { - query: FlowQuery::new(self.cache, var_ref_id, flow_id), + query: root_query.clone(), continuation: None, }; @@ -434,12 +454,34 @@ impl<'a> FlowTypeEngine<'a> { query, continuation, } => { + if !self.consume_step() { + let fallback_type = self.finish_budget_fallback( + &root_query, + None, + continuation.as_ref(), + &stack, + ); + break Ok(fallback_type); + } + if let Some(continuation) = continuation { stack.push(continuation); } self.start_query(query) } - SchedulerStep::ContinueWalk(walk) => self.evaluate_walk(walk), + SchedulerStep::ContinueWalk(walk) => { + if self.steps_remaining == Some(0) { + let fallback_type = self.finish_budget_fallback( + &root_query, + Some(&walk.query), + None, + &stack, + ); + break Ok(fallback_type); + } + + self.evaluate_walk(walk) + } SchedulerStep::ResumeNext(query_result) => match stack.pop() { Some(Continuation::Merge { walk, @@ -522,6 +564,69 @@ impl<'a> FlowTypeEngine<'a> { } } + fn consume_step(&mut self) -> bool { + let Some(steps_remaining) = &mut self.steps_remaining else { + return true; + }; + + if *steps_remaining == 0 { + return false; + } + + *steps_remaining -= 1; + true + } + + fn finish_budget_fallback( + &mut self, + root_query: &FlowQuery, + current_query: Option<&FlowQuery>, + pending_continuation: Option<&Continuation>, + stack: &[Continuation], + ) -> LuaType { + let used_steps = FLOW_INFERENCE_STEP_BUDGET - self.steps_remaining.unwrap_or_default(); + log::warn!( + "flow inference step budget exceeded after {used_steps} steps: file={:?}, ref={:?}, flow={:?}, mode={:?}, stack_depth={}", + self.cache.get_file_id(), + root_query.var_ref_id, + root_query.flow_id, + root_query.mode, + stack.len(), + ); + + // The aborted run may leave recursive-infer guards in either cache. + for var_cache in &mut self.cache.flow_var_caches { + var_cache + .type_cache + .retain(|_, entry| !matches!(entry, CacheEntry::Ready)); + var_cache + .condition_cache + .retain(|_, entry| !matches!(entry, CacheEntry::Ready)); + } + + self.cache_query_unknown(root_query); + if let Some(current_query) = current_query { + self.cache_query_unknown(current_query); + } + if let Some(continuation) = pending_continuation { + self.cache_query_unknown(continuation.query()); + } + for continuation in stack { + self.cache_query_unknown(continuation.query()); + } + + LuaType::Unknown + } + + fn cache_query_unknown(&mut self, query: &FlowQuery) { + get_flow_var_cache(self.cache, query.var_cache_idx) + .type_cache + .insert( + (query.flow_id, query.mode), + CacheEntry::Cache(LuaType::Unknown), + ); + } + // Begin one flow query. If this `(var_ref, flow_id, mode)` pair is already // resolved or already in progress, reuse that state; otherwise start the // backward walk that computes it. @@ -1292,6 +1397,10 @@ impl<'a> FlowTypeEngine<'a> { // continuation point like a branch merge. fn evaluate_walk(&mut self, mut walk: QueryWalk) -> Result { loop { + if !self.consume_step() { + return Ok(SchedulerStep::ContinueWalk(walk)); + } + let flow_node = self .tree .get_flow_node(walk.antecedent_flow_id) @@ -1569,11 +1678,14 @@ pub(super) fn get_type_at_flow( var_ref_id: &VarRefId, flow_id: FlowId, ) -> InferResult { + let steps_remaining = (!cache.get_config().disable_flow_inference_step_budget) + .then_some(FLOW_INFERENCE_STEP_BUDGET); FlowTypeEngine { db, tree, cache, root, + steps_remaining, } .run(var_ref_id, flow_id) }