microsoft · bmehta001 · May 4, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/samples/cpp/live-audio-transcription/README.md b/samples/cpp/live-audio-transcription/README.md
@@ -26,3 +26,7 @@ g++ -std=c++20 main.cpp -lfoundry_local -o live-audio-transcription-example
 # Synthetic 440Hz sine wave (no microphone needed)
 ./live-audio-transcription-example --synth
 ```
+
+Press `Ctrl+C` to request a graceful stop. The sample passes that signal to
+execution-provider and model downloads so long-running downloads can be
+cancelled before transcription starts.
diff --git a/samples/cpp/live-audio-transcription/main.cpp b/samples/cpp/live-audio-transcription/main.cpp
@@ -122,7 +122,8 @@ int main(int argc, char* argv[]) {
 
         foundry_local::Manager::Create(config);
         auto& manager = foundry_local::Manager::Instance();
-        manager.EnsureEpsDownloaded();
+        auto isCancellationRequested = [] { return !g_running.load(); };
+        manager.DownloadAndRegisterEps(nullptr, isCancellationRequested);
 
         auto& catalog = manager.GetCatalog();
         auto* model = catalog.GetModel("nemotron-speech-streaming-en-0.6b");
@@ -131,9 +132,12 @@ int main(int argc, char* argv[]) {
         }
 
         std::cout << "Downloading model (if needed)..." << std::endl;
-        model->Download([](float pct) {
-            std::cout << "\rDownloading: " << pct << "%   " << std::flush;
-        });
+        model->Download(
+            [](float pct) {
+                std::cout << "\rDownloading: " << pct << "%   " << std::flush;
+                return true;
+            },
+            isCancellationRequested);
         std::cout << std::endl;
         std::cout << "Loading model..." << std::endl;
         model->Load();

diff --git a/sdk/cpp/include/foundry_local_manager.h b/sdk/cpp/include/foundry_local_manager.h
@@ -83,15 +83,21 @@ namespace foundry_local {
 
         /// Download and register all available execution providers.
         /// @param progressCallback Optional callback invoked with (ep_name, percent) during download.
+        /// @param isCancellationRequested Optional callback checked on each progress update. Return true to cancel.
         /// @return Result describing which EPs were registered or failed.
-        EpDownloadResult DownloadAndRegisterEps(EpProgressCallback progressCallback = nullptr) const;
+        EpDownloadResult DownloadAndRegisterEps(
+            EpProgressCallback progressCallback = nullptr,
+            CancellationCallback isCancellationRequested = nullptr) const;
 
         /// Download and register specific execution providers by name.
         /// @param names EP names to download (as returned by DiscoverEps).
         /// @param progressCallback Optional callback invoked with (ep_name, percent) during download.
+        /// @param isCancellationRequested Optional callback checked on each progress update. Return true to cancel.
         /// @return Result describing which EPs were registered or failed.
-        EpDownloadResult DownloadAndRegisterEps(const std::vector<std::string>& names,
-                                                 EpProgressCallback progressCallback = nullptr) const;
+        EpDownloadResult DownloadAndRegisterEps(
+            const std::vector<std::string>& names,
+            EpProgressCallback progressCallback = nullptr,
+            CancellationCallback isCancellationRequested = nullptr) const;
 
     private:
         explicit Manager(Configuration configuration, ILogger* logger);

diff --git a/sdk/cpp/include/model.h b/sdk/cpp/include/model.h
@@ -10,6 +10,7 @@
 #include <memory>
 #include <functional>
 #include <filesystem>
+#include <utility>
 
 #include <gsl/pointers>
 #include <gsl/span>
@@ -33,6 +34,7 @@ namespace foundry_local {
 #endif
 
     using DownloadProgressCallback = std::function<bool(float percentage)>;
+    using CancellationCallback = std::function<bool()>;
 
     class IModel {
     public:
@@ -43,7 +45,11 @@ namespace foundry_local {
         virtual bool IsLoaded() const = 0;
         virtual bool IsCached() const = 0;
         virtual const std::filesystem::path& GetPath() const = 0;
-        virtual void Download(DownloadProgressCallback onProgress = nullptr) = 0;
+
+        /// Download the model, with an optional cancellation callback checked on each progress update.
+        /// Return true from isCancellationRequested to cancel the in-progress download.
+        virtual void Download(DownloadProgressCallback onProgress = nullptr,
+                              CancellationCallback isCancellationRequested = nullptr) = 0;
         virtual void Load() = 0;
         virtual void Unload() = 0;
         virtual void RemoveFromCache() = 0;
@@ -123,7 +129,8 @@ namespace foundry_local {
 
         const ModelInfo& GetInfo() const;
         const std::filesystem::path& GetPath() const override;
-        void Download(DownloadProgressCallback onProgress = nullptr) override;
+        void Download(DownloadProgressCallback onProgress = nullptr,
+                      CancellationCallback isCancellationRequested = nullptr) override;
         void Load() override;
 
         bool IsLoaded() const override;
@@ -158,8 +165,9 @@ namespace foundry_local {
         bool IsLoaded() const override { return SelectedVariant().IsLoaded(); }
         bool IsCached() const override { return SelectedVariant().IsCached(); }
         const std::filesystem::path& GetPath() const override { return SelectedVariant().GetPath(); }
-        void Download(DownloadProgressCallback onProgress = nullptr) override {
-            SelectedVariant().Download(std::move(onProgress));
+        void Download(DownloadProgressCallback onProgress = nullptr,
+                      CancellationCallback isCancellationRequested = nullptr) override {
+            SelectedVariant().Download(std::move(onProgress), std::move(isCancellationRequested));
         }
         void Load() override { SelectedVariant().Load(); }
         void Unload() override { SelectedVariant().Unload(); }

diff --git a/sdk/cpp/include/openai/audio_client.h b/sdk/cpp/include/openai/audio_client.h
@@ -33,9 +33,13 @@ namespace foundry_local {
         const std::string& GetModelId() const noexcept { return modelId_; }
 
         AudioCreateTranscriptionResponse TranscribeAudio(const std::filesystem::path& audioFilePath) const;
+        AudioCreateTranscriptionResponse TranscribeAudio(const std::filesystem::path& audioFilePath,
+                                                         std::function<bool()> isCancellationRequested) const;
 
         using StreamCallback = std::function<void(const AudioCreateTranscriptionResponse& chunk)>;
         void TranscribeAudioStreaming(const std::filesystem::path& audioFilePath, const StreamCallback& onChunk) const;
+        void TranscribeAudioStreaming(const std::filesystem::path& audioFilePath, const StreamCallback& onChunk,
+                                      std::function<bool()> isCancellationRequested) const;
 
         /// Create a new live audio transcription session for streaming PCM audio.
         std::unique_ptr<LiveAudioTranscriptionSession> CreateLiveTranscriptionSession() const;

diff --git a/sdk/cpp/include/openai/chat_client.h b/sdk/cpp/include/openai/chat_client.h
@@ -89,17 +89,30 @@ namespace foundry_local {
 
         ChatCompletionCreateResponse CompleteChat(gsl::span<const ChatMessage> messages,
                                                   const ChatSettings& settings) const;
+        ChatCompletionCreateResponse CompleteChat(gsl::span<const ChatMessage> messages,
+                                                  const ChatSettings& settings,
+                                                  std::function<bool()> isCancellationRequested) const;
 
         ChatCompletionCreateResponse CompleteChat(gsl::span<const ChatMessage> messages,
                                                   gsl::span<const ToolDefinition> tools,
                                                   const ChatSettings& settings) const;
+        ChatCompletionCreateResponse CompleteChat(gsl::span<const ChatMessage> messages,
+                                                  gsl::span<const ToolDefinition> tools,
+                                                  const ChatSettings& settings,
+                                                  std::function<bool()> isCancellationRequested) const;
 
         using StreamCallback = std::function<void(const ChatCompletionCreateResponse& chunk)>;
         void CompleteChatStreaming(gsl::span<const ChatMessage> messages, const ChatSettings& settings,
                                    const StreamCallback& onChunk) const;
+        void CompleteChatStreaming(gsl::span<const ChatMessage> messages, const ChatSettings& settings,
+                                   const StreamCallback& onChunk,
+                                   std::function<bool()> isCancellationRequested) const;
 
         void CompleteChatStreaming(gsl::span<const ChatMessage> messages, gsl::span<const ToolDefinition> tools,
                                    const ChatSettings& settings, const StreamCallback& onChunk) const;
+        void CompleteChatStreaming(gsl::span<const ChatMessage> messages, gsl::span<const ToolDefinition> tools,
+                                   const ChatSettings& settings, const StreamCallback& onChunk,
+                                   std::function<bool()> isCancellationRequested) const;
 
     private:
         OpenAIChatClient(gsl::not_null<foundry_local::Internal::IFoundryLocalCore*> core, std::string_view modelId,

diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp
@@ -4,6 +4,8 @@
 #include "foundry_local.h"
 
 #include <cstdio>
+#include <atomic>
+#include <csignal>
 #include <iostream>
 #include <string>
 #include <vector>
@@ -14,6 +16,18 @@
 
 using namespace foundry_local;
 
+namespace {
+std::atomic<bool> g_cancelRequested{false};
+
+void SignalHandler(int /*signum*/) {
+    g_cancelRequested.store(true);
+}
+
+bool IsCancellationRequested() {
+    return g_cancelRequested.load();
+}
+} // namespace
+
 // ---------------------------------------------------------------------------
 // Logger
 // ---------------------------------------------------------------------------
@@ -118,7 +132,8 @@ void ChatNonStreaming(Manager& manager, const std::string& alias) {
         PreferCpuVariant(*concreteModel);
     }
 
-    model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; });
+    model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; },
+                    IsCancellationRequested);
     std::cout << "\n";
 
     model->Load();
@@ -211,7 +226,8 @@ void TranscribeAudio(Manager& manager, const std::string& alias, const std::stri
         PreferCpuVariant(*concreteModel);
     }
 
-    model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; });
+    model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; },
+                    IsCancellationRequested);
     std::cout << "\n";
 
     model->Load();
@@ -263,7 +279,8 @@ void ChatWithToolCalling(Manager& manager, const std::string& alias) {
         PreferCpuVariant(*concreteModel);
     }
 
-    model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; });
+    model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; },
+                    IsCancellationRequested);
     std::cout << "\n";
 
     model->Load();
@@ -376,6 +393,8 @@ int main(int argc, char* argv[]) {
     const std::string audioPath = (argc > 3) ? argv[3] : "";
 
     try {
+        std::signal(SIGINT, SignalHandler);
+
         StdLogger logger;
         Manager::Create({"SampleApp"}, &logger);
         auto& manager = Manager::Instance();
@@ -399,7 +418,7 @@ int main(int argc, char* argv[]) {
                     }
                     printf("\r  %-30s  %5.1f%%", epName.c_str(), percent);
                     fflush(stdout);
-                });
+                }, IsCancellationRequested);
                 if (!currentEp.empty()) std::cout << "\n";
             } else {
                 std::cout << "\nNo execution providers to download.\n";

diff --git a/sdk/cpp/src/audio_client.cpp b/sdk/cpp/src/audio_client.cpp
@@ -5,6 +5,7 @@
 #include <string_view>
 #include <filesystem>
 #include <cstdint>
+#include <utility>
 
 #include <gsl/span>
 #include <nlohmann/json.hpp>
@@ -26,13 +27,19 @@ namespace foundry_local {
 
     AudioCreateTranscriptionResponse OpenAIAudioClient::TranscribeAudio(
         const std::filesystem::path& audioFilePath) const {
+        return TranscribeAudio(audioFilePath, nullptr);
+    }
+
+    AudioCreateTranscriptionResponse OpenAIAudioClient::TranscribeAudio(
+        const std::filesystem::path& audioFilePath, std::function<bool()> isCancellationRequested) const {
         nlohmann::json openAiReq = {{"Model", modelId_}, {"FileName", audioFilePath.string()}};
         CoreInteropRequest req("audio_transcribe");
         req.AddParam("OpenAICreateRequest", openAiReq.dump());
 
         std::string json = req.ToJson();
 
-        auto coreResponse = core_->call(req.Command(), *logger_, &json);
+        auto coreResponse = core_->call(req.Command(), *logger_, &json, nullptr, nullptr,
+                                       std::move(isCancellationRequested));
         if (coreResponse.HasError()) {
             throw Exception("Audio transcription failed: " + coreResponse.error, *logger_);
         }
@@ -45,6 +52,12 @@ namespace foundry_local {
 
     void OpenAIAudioClient::TranscribeAudioStreaming(const std::filesystem::path& audioFilePath,
                                                      const StreamCallback& onChunk) const {
+        TranscribeAudioStreaming(audioFilePath, onChunk, nullptr);
+    }
+
+    void OpenAIAudioClient::TranscribeAudioStreaming(const std::filesystem::path& audioFilePath,
+                                                     const StreamCallback& onChunk,
+                                                     std::function<bool()> isCancellationRequested) const {
         nlohmann::json openAiReq = {{"Model", modelId_}, {"FileName", audioFilePath.string()}};
         CoreInteropRequest req("audio_transcribe");
         req.AddParam("OpenAICreateRequest", openAiReq.dump());
@@ -58,7 +71,8 @@ namespace foundry_local {
                 chunk.text = text;
                 onChunk(chunk);
             },
-            "Streaming audio transcription failed: ");
+            "Streaming audio transcription failed: ",
+            std::move(isCancellationRequested));
     }
 
     OpenAIAudioClient::OpenAIAudioClient(const IModel& model)

diff --git a/sdk/cpp/src/chat_client.cpp b/sdk/cpp/src/chat_client.cpp
@@ -5,6 +5,7 @@
 #include <string_view>
 #include <cstdint>
 #include <ctime>
+#include <utility>
 
 #include <gsl/span>
 #include <nlohmann/json.hpp>
@@ -97,16 +98,30 @@ namespace foundry_local {
         return CompleteChat(messages, {}, settings);
     }
 
+    ChatCompletionCreateResponse OpenAIChatClient::CompleteChat(gsl::span<const ChatMessage> messages,
+                                                                const ChatSettings& settings,
+                                                                std::function<bool()> isCancellationRequested) const {
+        return CompleteChat(messages, {}, settings, std::move(isCancellationRequested));
+    }
+
     ChatCompletionCreateResponse OpenAIChatClient::CompleteChat(gsl::span<const ChatMessage> messages,
                                                                 gsl::span<const ToolDefinition> tools,
                                                                 const ChatSettings& settings) const {
+        return CompleteChat(messages, tools, settings, nullptr);
+    }
+
+    ChatCompletionCreateResponse OpenAIChatClient::CompleteChat(gsl::span<const ChatMessage> messages,
+                                                                gsl::span<const ToolDefinition> tools,
+                                                                const ChatSettings& settings,
+                                                                std::function<bool()> isCancellationRequested) const {
         std::string openAiReqJson = BuildChatRequestJson(messages, tools, settings, /*stream=*/false);
 
         CoreInteropRequest req("chat_completions");
         req.AddParam("OpenAICreateRequest", openAiReqJson);
 
         std::string json = req.ToJson();
-        auto response = core_->call(req.Command(), *logger_, &json);
+        auto response = core_->call(req.Command(), *logger_, &json, nullptr, nullptr,
+                                   std::move(isCancellationRequested));
         if (response.HasError()) {
             throw Exception("Chat completion failed: " + response.error, *logger_);
         }
@@ -119,9 +134,22 @@ namespace foundry_local {
         CompleteChatStreaming(messages, {}, settings, onChunk);
     }
 
+    void OpenAIChatClient::CompleteChatStreaming(gsl::span<const ChatMessage> messages, const ChatSettings& settings,
+                                                 const StreamCallback& onChunk,
+                                                 std::function<bool()> isCancellationRequested) const {
+        CompleteChatStreaming(messages, {}, settings, onChunk, std::move(isCancellationRequested));
+    }
+
     void OpenAIChatClient::CompleteChatStreaming(gsl::span<const ChatMessage> messages,
                                                  gsl::span<const ToolDefinition> tools, const ChatSettings& settings,
                                                  const StreamCallback& onChunk) const {
+        CompleteChatStreaming(messages, tools, settings, onChunk, nullptr);
+    }
+
+    void OpenAIChatClient::CompleteChatStreaming(gsl::span<const ChatMessage> messages,
+                                                 gsl::span<const ToolDefinition> tools, const ChatSettings& settings,
+                                                 const StreamCallback& onChunk,
+                                                 std::function<bool()> isCancellationRequested) const {
         std::string openAiReqJson = BuildChatRequestJson(messages, tools, settings, /*stream=*/true);
 
         CoreInteropRequest req("chat_completions");
@@ -134,7 +162,8 @@ namespace foundry_local {
                 auto parsed = nlohmann::json::parse(chunk).get<ChatCompletionCreateResponse>();
                 onChunk(parsed);
             },
-            "Streaming chat completion failed: ");
+            "Streaming chat completion failed: ",
+            std::move(isCancellationRequested));
     }
 
     OpenAIChatClient::OpenAIChatClient(const IModel& model)