Team-Commonly · samxu01 · Jun 4, 2026 · Jun 4, 2026
diff --git a/k8s/helm/commonly/templates/configmaps/litellm-config.yaml b/k8s/helm/commonly/templates/configmaps/litellm-config.yaml
@@ -75,6 +75,11 @@ data:
       # back to OpenRouter Nemotron. The /responses endpoint is NOT challenged. OAuth tokens
       # are valid; this is a LiteLLM endpoint-routing bug, not an auth/IP/OpenAI block.
       # See BerriAI/litellm#27175 (workaround) + openclaw#68033 (2026-06).
+      # Equally LOAD-BEARING: do NOT add `model_info: mode: responses` here. With the
+      # responses/ prefix, mode:responses routes through LiteLLM's responses-transformation
+      # handler, which strips `chatgpt/` and sends the invalid model `responses/gpt-5.4-mini`
+      # (400 "model not supported"). No mode -> the proxy uses the completion handler, which
+      # parses the responses/ prefix correctly. Verified 3/3 via proxy /chat/completions.
       # NOTE: LiteLLM's chatgpt/ provider reads tokens from auth.json (written by the
       # codex-auth-seed init + codex-auth-rotator sidecar). The Authenticator ignores
       # api_key in litellm_params, so duplicate deployments per account are useless.
@@ -88,17 +93,13 @@ data:
       #   gpt-5.4-mini: $0.75 input / $4.50  output per 1M tokens
       #   gpt-5.4-nano: $0.20 input / $1.25  output per 1M tokens
       - model_name: gpt-5.4
-        model_info:
-          mode: responses
         litellm_params:
           model: chatgpt/responses/gpt-5.4
           timeout: 120
           input_cost_per_token: 0.0000025
           output_cost_per_token: 0.000015
 
       - model_name: openai-codex/gpt-5.4
-        model_info:
-          mode: responses
         litellm_params:
           model: chatgpt/responses/gpt-5.4
           timeout: 120
@@ -107,17 +108,13 @@ data:
 
       # --- gpt-5.4-mini (used for dev agent heartbeat orchestration) ---
       - model_name: gpt-5.4-mini
-        model_info:
-          mode: responses
         litellm_params:
           model: chatgpt/responses/gpt-5.4-mini
           timeout: 120
           input_cost_per_token: 0.00000075
           output_cost_per_token: 0.0000045
 
       - model_name: openai-codex/gpt-5.4-mini
-        model_info:
-          mode: responses
         litellm_params:
           model: chatgpt/responses/gpt-5.4-mini
           timeout: 120
@@ -126,17 +123,13 @@ data:
 
       # --- gpt-5.4-nano (community agents — cheapest Codex model) ---
       - model_name: gpt-5.4-nano
-        model_info:
-          mode: responses
         litellm_params:
           model: chatgpt/responses/gpt-5.4-nano
           timeout: 120
           input_cost_per_token: 0.0000002
           output_cost_per_token: 0.00000125
 
       - model_name: openai-codex/gpt-5.4-nano
-        model_info:
-          mode: responses
         litellm_params:
           model: chatgpt/responses/gpt-5.4-nano
           timeout: 120
@@ -146,16 +139,12 @@ data:
       # Aliases for Codex CLI default model names in openai mode
       # When acpx_run spawns with auth_mode=openai, the CLI may send these model names
       - model_name: codex-mini-latest
-        model_info:
-          mode: responses
         litellm_params:
           model: chatgpt/responses/gpt-5.4-mini
           input_cost_per_token: 0.00000075
           output_cost_per_token: 0.0000045
 
       - model_name: o4-mini
-        model_info:
-          mode: responses
         litellm_params:
           model: chatgpt/responses/gpt-5.4-mini
           input_cost_per_token: 0.00000075