-
Notifications
You must be signed in to change notification settings - Fork 2.1k
fix(desktop): enable Anthropic prompt caching for macOS chat #7951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -237,7 +237,23 @@ fn translate_request( | |
| model: upstream_model.to_string(), | ||
| max_tokens, | ||
| messages: anthropic_messages, | ||
| system, | ||
| <<<<<<< HEAD | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When this commit is checked out, the Rust source still contains merge-conflict markers in the Useful? React with 👍 / 👎. |
||
| ======= | ||
| system: system_prompt.and_then(|text| { | ||
| let text = text.trim().to_string(); | ||
| if text.is_empty() { | ||
| None | ||
| } else { | ||
| Some(vec![AnthropicSystemContentBlock { | ||
| block_type: AnthropicContentBlockType::Text, | ||
| text, | ||
| cache_control: AnthropicCacheControl { | ||
| cache_type: AnthropicCacheControlType::Ephemeral, | ||
| }, | ||
| }]) | ||
| } | ||
| }), | ||
| >>>>>>> 608ebd12c3 (refactor: replace raw String types with typed enums for Anthropic block/cache types) | ||
| temperature: req.temperature, | ||
| stream: req.stream, | ||
| tools: if is_tool_choice_none { None } else { anthropic_tools }, | ||
|
|
@@ -1269,10 +1285,19 @@ mod tests { | |
|
|
||
| let result = translate_request(&req, "claude-sonnet-4-6").unwrap(); | ||
| assert_eq!(result.model, "claude-sonnet-4-6"); | ||
| // system is now an ephemeral-cached content-block array, not a bare string. | ||
| let system = result.system.as_ref().expect("system block should be present"); | ||
| assert_eq!(system[0]["text"], "You are helpful."); | ||
| assert_eq!(system[0]["cache_control"]["type"], "ephemeral"); | ||
| <<<<<<< HEAD | ||
| ======= | ||
| assert_eq!( | ||
| result.system, | ||
| Some(vec![AnthropicSystemContentBlock { | ||
| block_type: AnthropicContentBlockType::Text, | ||
| text: "You are helpful.".to_string(), | ||
| cache_control: AnthropicCacheControl { | ||
| cache_type: AnthropicCacheControlType::Ephemeral, | ||
| }, | ||
| }]) | ||
| ); | ||
| >>>>>>> 608ebd12c3 (refactor: replace raw String types with typed enums for Anthropic block/cache types) | ||
| assert_eq!(result.messages.len(), 1); // only user message, system extracted | ||
| assert_eq!(result.messages[0].role, "user"); | ||
| assert_eq!(result.max_tokens, 1024); | ||
|
|
@@ -1431,13 +1456,113 @@ mod tests { | |
| }; | ||
|
|
||
| let result = translate_request(&req, "claude-sonnet-4-6").unwrap(); | ||
| let system = result.system.as_ref().expect("system block should be present"); | ||
| assert_eq!(system[0]["text"], "You are terse."); | ||
| assert_eq!(system[0]["cache_control"]["type"], "ephemeral"); | ||
| assert_eq!(result.messages.len(), 1, "developer msg must be extracted, not forwarded"); | ||
| assert_eq!(result.messages[0].role, "user"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_translate_request_system_prompt_uses_cache_control_blocks() { | ||
| let req = ChatCompletionRequest { | ||
| model: "omi-sonnet".to_string(), | ||
| messages: vec![ | ||
| ChatMessage { | ||
| role: "system".to_string(), | ||
| content: Some(json!("You are helpful.")), | ||
| name: None, | ||
| tool_calls: None, | ||
| tool_call_id: None, | ||
| }, | ||
| ChatMessage { | ||
| role: "user".to_string(), | ||
| content: Some(json!("Hello")), | ||
| name: None, | ||
| tool_calls: None, | ||
| tool_call_id: None, | ||
| }, | ||
| ], | ||
| stream: false, | ||
| temperature: None, | ||
| max_tokens: None, | ||
| max_completion_tokens: None, | ||
| tools: None, | ||
| tool_choice: None, | ||
| }; | ||
|
|
||
| let result = translate_request(&req, "claude-sonnet-4-6").unwrap(); | ||
| let json = serde_json::to_value(&result).unwrap(); | ||
|
|
||
| assert_eq!( | ||
| json["system"], | ||
| json!([{ | ||
| "type": "text", | ||
| "text": "You are helpful.", | ||
| "cache_control": {"type": "ephemeral"} | ||
| }]) | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_translate_request_without_system_prompt_omits_system() { | ||
| let req = ChatCompletionRequest { | ||
| model: "omi-sonnet".to_string(), | ||
| messages: vec![ChatMessage { | ||
| role: "user".to_string(), | ||
| content: Some(json!("Hello")), | ||
| name: None, | ||
| tool_calls: None, | ||
| tool_call_id: None, | ||
| }], | ||
| stream: false, | ||
| temperature: None, | ||
| max_tokens: None, | ||
| max_completion_tokens: None, | ||
| tools: None, | ||
| tool_choice: None, | ||
| }; | ||
|
|
||
| let result = translate_request(&req, "claude-sonnet-4-6").unwrap(); | ||
| let json = serde_json::to_value(&result).unwrap(); | ||
|
|
||
| assert!(result.system.is_none()); | ||
| assert!(json.get("system").is_none()); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_translate_request_empty_system_prompt_omits_system() { | ||
| // Empty or whitespace-only system prompts must NOT be sent as cached blocks | ||
| // (Anthropic rejects empty cached text blocks with 400). | ||
| for content in [Some(json!("")), Some(json!(" ")), None] { | ||
| let req = ChatCompletionRequest { | ||
| model: "omi-sonnet".to_string(), | ||
| messages: vec![ChatMessage { | ||
| role: "system".to_string(), | ||
| content: content.clone(), | ||
| name: None, | ||
| tool_calls: None, | ||
| tool_call_id: None, | ||
| }, ChatMessage { | ||
| role: "user".to_string(), | ||
| content: Some(json!("Hello")), | ||
| name: None, | ||
| tool_calls: None, | ||
| tool_call_id: None, | ||
| }], | ||
| stream: false, | ||
| temperature: None, | ||
| max_tokens: None, | ||
| max_completion_tokens: None, | ||
| tools: None, | ||
| tool_choice: None, | ||
| }; | ||
|
|
||
| let result = translate_request(&req, "claude-sonnet-4-6").unwrap(); | ||
| assert!( | ||
| result.system.is_none(), | ||
| "empty/whitespace system prompt must omit system field, got: {:?}", | ||
| result.system | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_translate_request_max_completion_tokens_preferred() { | ||
| // OpenAI renamed `max_tokens` → `max_completion_tokens` for reasoning | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
block_typeandcache_typefields are plainString, so a typo (e.g."Ephemeral"or"Text") compiles cleanly but produces a 400 from Anthropic at runtime. Since these fields are discriminants with a fixed, known set of valid values, typed enums would catch mistakes at compile time with zero runtime cost.