diff --git a/util/lua/lua.go b/util/lua/lua.go index 92dbc83005cd2..73a1843bd33f5 100644 --- a/util/lua/lua.go +++ b/util/lua/lua.go @@ -17,6 +17,7 @@ import ( "github.com/argoproj/argo-cd/gitops-engine/pkg/health" glob "github.com/bmatcuk/doublestar/v4" + "github.com/golang/groupcache/lru" lua "github.com/yuin/gopher-lua" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" @@ -39,6 +40,62 @@ const ( // errScriptDoesNotExist is an error type for when a built-in script does not exist. var errScriptDoesNotExist = errors.New("built-in script does not exist") +// compiledScriptCacheSize bounds the number of compiled Lua scripts retained in memory. +// The working set (embedded built-in scripts plus a small number of argocd-cm customizations) +// is well under this, so eviction is only a safety valve against pathological churn. +const compiledScriptCacheSize = 1024 + +var ( + // scriptCacheEnabled controls whether compiled Lua scripts are cached and reused across + // executions. It is always enabled in production; it exists only as a seam so tests and + // benchmarks can measure the compile-on-every-call path. + scriptCacheEnabled = true + // compiledScripts is a process-wide, content-addressed cache of compiled Lua scripts. + // The cache key is the raw script source, so any change to a script (for example, an + // edited argocd-cm customization) is a distinct key and a cache miss - no explicit + // invalidation is required. A bounded size caps memory under pathological churn. + compiledScripts = newCompiledScriptCache() +) + +// compiledScriptCache is a bounded, content-addressed cache of compiled Lua scripts. +// A gopher-lua FunctionProto is immutable and safe to reuse across LStates, so a single +// compiled proto can back any number of concurrent executions. The working set (embedded +// built-in scripts plus a small number of argocd-cm customizations) is finite and stable, so +// LRU eviction is only a safety valve against unbounded growth, not a hot-path concern. +// +// It is backed by github.com/golang/groupcache/lru. That cache's Get promotes the accessed +// entry (a mutation), so a sync.Mutex (not RWMutex) guards all access. +type compiledScriptCache struct { + mu sync.Mutex + cache *lru.Cache +} + +func newCompiledScriptCache() *compiledScriptCache { + return &compiledScriptCache{ + cache: lru.New(compiledScriptCacheSize), + } +} + +func (c *compiledScriptCache) get(key string) (*lua.FunctionProto, bool) { + c.mu.Lock() + defer c.mu.Unlock() + if v, ok := c.cache.Get(key); ok { + return v.(*lua.FunctionProto), true + } + return nil, false +} + +func (c *compiledScriptCache) add(key string, proto *lua.FunctionProto) { + c.mu.Lock() + defer c.mu.Unlock() + // Keep the first compiled proto for a given source. Concurrent misses may both compile, + // but the protos are equivalent, so retaining the earliest avoids needless churn. + if _, ok := c.cache.Get(key); ok { + return + } + c.cache.Add(key, proto) +} + type ResourceHealthOverrides map[string]appv1.ResourceOverride func (overrides ResourceHealthOverrides) GetResourceHealth(obj *unstructured.Unstructured) (*health.HealthStatus, error) { @@ -113,7 +170,12 @@ func (vm VM) runLuaWithResourceActionParameters(obj *unstructured.Unstructured, objectValue := decodeValue(l, obj.Object) l.SetGlobal("obj", objectValue) - err := l.DoString(script) + + fn, err := loadCompiledFunction(l, script) + if err == nil { + l.Push(fn) + err = l.PCall(0, lua.MultRet, nil) + } // Remove the default lua stack trace from execution errors since these // errors will make it back to the user @@ -128,6 +190,26 @@ func (vm VM) runLuaWithResourceActionParameters(obj *unstructured.Unstructured, return l, err } +// loadCompiledFunction returns a callable Lua function for the given script, using the +// compiled-script cache when enabled. On a cache miss (or when the cache is disabled) the +// script is compiled via LState.LoadString, which preserves the exact syntax-error behavior +// of the previous DoString call path. Compilation failures are never cached so that a fresh +// error is surfaced on each call. +func loadCompiledFunction(l *lua.LState, script string) (*lua.LFunction, error) { + if !scriptCacheEnabled { + return l.LoadString(script) + } + if proto, ok := compiledScripts.get(script); ok { + return l.NewFunctionFromProto(proto), nil + } + fn, err := l.LoadString(script) + if err != nil { + return nil, err + } + compiledScripts.add(script, fn.Proto) + return fn, nil +} + // ExecuteHealthLua runs the lua script to generate the health status of a resource func (vm VM) ExecuteHealthLua(obj *unstructured.Unstructured, script string) (*health.HealthStatus, error) { l, err := vm.runLua(obj, script) diff --git a/util/lua/script_cache_test.go b/util/lua/script_cache_test.go new file mode 100644 index 0000000000000..1f167beda92b1 --- /dev/null +++ b/util/lua/script_cache_test.go @@ -0,0 +1,140 @@ +package lua + +import ( + "fmt" + "os" + "testing" + + lua "github.com/yuin/gopher-lua" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/yaml" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + // Rollout is a representative built-in health script: ~5 KB of Lua with helper + // functions and multiple code paths, similar in complexity to hot-path checks. + scriptCacheBenchScriptPath = "../../resource_customizations/argoproj.io/Rollout/health.lua" + scriptCacheBenchObjPath = "../../resource_customizations/argoproj.io/Rollout/testdata/canary/healthy_executedAllSteps.yaml" +) + +const cacheTestScriptA = `local hs = {} +hs.status = "Healthy" +hs.message = "a" +return hs +` + +func compileProto(t *testing.T, script string) *lua.FunctionProto { + t.Helper() + l := lua.NewState() + defer l.Close() + fn, err := l.LoadString(script) + require.NoError(t, err) + return fn.Proto +} + +func TestCompiledScriptCache_ContentAddressed(t *testing.T) { + c := newCompiledScriptCache() + + _, ok := c.get(cacheTestScriptA) + assert.False(t, ok, "expected miss for never-seen script") + + proto := compileProto(t, cacheTestScriptA) + c.add(cacheTestScriptA, proto) + + got, ok := c.get(cacheTestScriptA) + require.True(t, ok, "expected hit after add") + assert.Same(t, proto, got, "identical script content must return the same compiled proto") + + // Adding the same key again is a no-op (keeps the original proto). + c.add(cacheTestScriptA, compileProto(t, cacheTestScriptA)) + got2, _ := c.get(cacheTestScriptA) + assert.Same(t, proto, got2) + + // A different script body is a distinct key - no invalidation required. + _, ok = c.get("return 2") + assert.False(t, ok) +} + +func TestCompiledScriptCache_BoundedEviction(t *testing.T) { + c := newCompiledScriptCache() + scripts := make([]string, compiledScriptCacheSize+1) + for i := range scripts { + scripts[i] = fmt.Sprintf("return %d", i) + c.add(scripts[i], compileProto(t, scripts[i])) + } + + assert.Equal(t, compiledScriptCacheSize, c.cache.Len(), "cache must not exceed max size") + _, ok := c.get(scripts[0]) + assert.False(t, ok, "least-recently-used entry should have been evicted") + _, ok = c.get(scripts[len(scripts)-1]) + assert.True(t, ok, "newest entry should be present") +} + +func TestCompiledScriptCache_OnOffParity(t *testing.T) { + testObj := StrToUnstructured(objJSON) + + orig := scriptCacheEnabled + t.Cleanup(func() { scriptCacheEnabled = orig }) + + vm := VM{UseOpenLibs: true} + + scriptCacheEnabled = false + off, err := vm.ExecuteHealthLua(testObj, cacheTestScriptA) + require.NoError(t, err) + + scriptCacheEnabled = true + on, err := vm.ExecuteHealthLua(testObj, cacheTestScriptA) + require.NoError(t, err) + + assert.Equal(t, off, on, "cache must not change health output") +} + +func loadScriptCacheBenchFixtures(tb testing.TB) (script string, obj *unstructured.Unstructured) { + tb.Helper() + scriptBytes, err := os.ReadFile(scriptCacheBenchScriptPath) + require.NoError(tb, err) + yamlBytes, err := os.ReadFile(scriptCacheBenchObjPath) + require.NoError(tb, err) + objMap := make(map[string]any) + require.NoError(tb, yaml.Unmarshal(yamlBytes, &objMap)) + return string(scriptBytes), &unstructured.Unstructured{Object: objMap} +} + +func BenchmarkExecuteHealthLuaScriptCache(b *testing.B) { + script, obj := loadScriptCacheBenchFixtures(b) + vm := VM{} + + for _, mode := range []struct { + name string + enabled bool + }{ + {"cacheOff", false}, + {"cacheOn", true}, + } { + b.Run(mode.name, func(b *testing.B) { + origEnabled := scriptCacheEnabled + origCache := compiledScripts + scriptCacheEnabled = mode.enabled + if mode.enabled { + compiledScripts = newCompiledScriptCache() + if _, err := vm.ExecuteHealthLua(obj, script); err != nil { + b.Fatal(err) + } + } + b.Cleanup(func() { + scriptCacheEnabled = origEnabled + compiledScripts = origCache + }) + + b.ReportAllocs() + for b.Loop() { + if _, err := vm.ExecuteHealthLua(obj, script); err != nil { + b.Fatal(err) + } + } + }) + } +}