Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f3354f6
feat(platform-wallet)!: shutdown() joins coordinator threads and retu…
lklimek Jun 22, 2026
261178e
fix(platform-wallet): RAII-guard is_syncing so a coordinator panic ca…
lklimek Jun 22, 2026
42d734d
refactor(rs-dash-async): add AtomicFlagGuard RAII helper
lklimek Jun 23, 2026
6e78b77
fix(platform-wallet): refine CoordinatorThreadStatus variants + tight…
lklimek Jun 23, 2026
5f80450
test(rs-dash-async): assert AtomicFlagGuard contract + add #[must_use]
lklimek Jun 23, 2026
6b2cd39
fix(platform-wallet): make coordinator passes cancellable + converge …
lklimek Jun 23, 2026
13a22dd
fix(platform-wallet): bound clear_shielded + tidy shutdown docs/logging
lklimek Jun 23, 2026
93b8954
fix(platform-wallet-ffi): timeout-bound the shielded sync stop bridge
lklimek Jun 23, 2026
747f5f0
Merge branch 'v3.1-dev' into feat/platform-wallet-shutdown-join
lklimek Jun 23, 2026
2bd9501
fix(platform-wallet)!: close residual coordinator-thread UAF on shutdown
lklimek Jun 23, 2026
7c975ed
fix(platform-wallet)!: surface non-clean shielded drain on clear/stop
lklimek Jun 23, 2026
5f63c95
fix(platform-wallet): reap prior coordinator thread outside backgroun…
lklimek Jun 23, 2026
2b068ba
fix(platform-wallet): close shielded epilogue TOCTOU + pin restart reap
lklimek Jun 24, 2026
5017ba1
fix(swift-sdk): retain wallet callback context on incomplete shutdown
lklimek Jun 24, 2026
b491773
test(platform-wallet): bound cleanup quiesce in restart-reap regressi…
lklimek Jun 24, 2026
76c8bee
fix(platform-wallet): track detached coordinator threads so shutdown(…
lklimek Jun 24, 2026
3cca1cf
perf(platform-wallet): drain coordinators concurrently in shutdown() …
lklimek Jun 24, 2026
8c52811
feat(dash-async): add shared ThreadRegistry worker-lifecycle engine
lklimek Jun 24, 2026
ac9a51a
feat(dash-async): key-scope parked orphans for any_alive_for()
lklimek Jun 24, 2026
d20aed0
refactor(platform-wallet): migrate sync coordinators onto shared Thre…
lklimek Jun 24, 2026
d190f29
test(dash-async): anchor DrainHook compile_fail doctest to E0277 + no…
lklimek Jun 24, 2026
3e81fc1
fix(dash-async,platform-wallet): harden ThreadRegistry lifecycle + do…
lklimek Jun 24, 2026
911f99f
refactor(platform-wallet): extract CoordinatorLifecycle to dedup the …
lklimek Jun 24, 2026
22647a7
fix(platform-wallet): raise quiescing gate in CoordinatorLifecycle::q…
lklimek Jun 25, 2026
7f3aeb5
fix(dash-async): park a restarted worker's prior under the slot lock …
lklimek Jun 25, 2026
41791c0
fix(platform-wallet-ffi): gate shielded_sync_stop success on orphan l…
lklimek Jun 25, 2026
4b099a9
fix(platform-wallet): bound clear_shielded's drain and hold its quies…
lklimek Jun 25, 2026
7be68c5
refactor(dash-async): full spawn-failure rollback + drop stale doc hi…
lklimek Jun 25, 2026
3821389
docs(swift-sdk): broaden deinit comment for shielded_sync_stop's orph…
lklimek Jun 25, 2026
748c4f8
fix(platform-wallet): make the quiescing<->is_syncing handshake self-…
lklimek Jun 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 64 additions & 0 deletions packages/rs-dash-async/src/atomic.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
use std::sync::atomic::{AtomicBool, Ordering};

/// RAII guard that clears an [`AtomicBool`] flag to `false` on drop.
///
/// Callers set the flag to `true` before constructing the guard (typically
/// via a `compare_exchange`); the guard resets it on every exit path,
/// including panics, so a panicked holder can never leave the flag wedged.
///
/// **Panic-strategy caveat:** the clear-on-panic guarantee relies on
/// destructors running while the stack unwinds, so it holds under
/// `panic = "unwind"` (the default). Under `panic = "abort"` — e.g. the
/// iOS release profiles — a panic aborts the process immediately and no
/// `Drop` runs; there is simply no "after" left for the flag to gate.
#[must_use = "AtomicFlagGuard clears the flag on drop; binding to `_` or using as a statement drops it immediately"]
pub struct AtomicFlagGuard<'a>(&'a AtomicBool);
Comment thread
Claudius-Maginificent marked this conversation as resolved.

impl<'a> AtomicFlagGuard<'a> {
/// Wrap `flag`. Does **not** set it to `true` — the caller is
/// responsible for doing that before constructing the guard.
pub fn new(flag: &'a AtomicBool) -> Self {
Comment thread
Claudius-Maginificent marked this conversation as resolved.
Self(flag)
}
}

impl Drop for AtomicFlagGuard<'_> {
fn drop(&mut self) {
self.0.store(false, Ordering::Release);
Comment thread
Claudius-Maginificent marked this conversation as resolved.
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::panic::{catch_unwind, AssertUnwindSafe};

/// A guard constructed over a `true` flag holds it while in scope and
/// clears it to `false` on a normal scope exit.
#[test]
fn clears_flag_on_normal_drop() {
let flag = AtomicBool::new(true);
{
let _guard = AtomicFlagGuard::new(&flag);
assert!(flag.load(Ordering::Acquire), "flag stays set while held");
}
assert!(!flag.load(Ordering::Acquire), "flag cleared on drop");
}

/// The clear also runs while unwinding a panic — the load-bearing
/// property the sync coordinators lean on so a panicked pass can't
/// leave `is_syncing` latched and wedge `quiesce()`'s drain.
#[test]
fn clears_flag_while_unwinding_panic() {
let flag = AtomicBool::new(true);
let result = catch_unwind(AssertUnwindSafe(|| {
let _guard = AtomicFlagGuard::new(&flag);
panic!("boom while holding the guard");
}));
assert!(result.is_err(), "the panic propagated out of catch_unwind");
assert!(
!flag.load(Ordering::Acquire),
"Drop ran during unwinding and cleared the flag"
);
}
}
4 changes: 4 additions & 0 deletions packages/rs-dash-async/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
//!
//! Provides [`block_on`] -- a function that bridges async futures into sync code,
//! handling multiple tokio runtime flavors (no runtime, current-thread, multi-thread).
//!
//! Also provides [`AtomicFlagGuard`] — a RAII guard for panic-safe `AtomicBool` flag resets.
mod atomic;
mod block_on;

pub use atomic::AtomicFlagGuard;
pub use block_on::{block_on, AsyncError};
2 changes: 1 addition & 1 deletion packages/rs-platform-wallet-ffi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ rs-sdk-ffi = { path = "../rs-sdk-ffi" }
once_cell = "1.19"
parking_lot = { version = "0.12", features = ["send_guard"] }
lazy_static = "1.4"
tokio = { version = "1", features = ["rt-multi-thread"] }
tokio = { version = "1", features = ["rt-multi-thread", "time"] }
tokio-metrics = { workspace = true, optional = true }

# Core dependencies (for Network type)
Expand Down
58 changes: 58 additions & 0 deletions packages/rs-platform-wallet-ffi/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,33 @@ pub enum PlatformWalletFFIResultCode {
/// and could double-send if the original spend landed.
ErrorShieldedSpendUnconfirmed = 18,

/// A background coordinator drain did not complete cleanly within the
/// join deadline — one or more `!Send` sync threads may still be alive
/// and still hold a reference to the host-owned callback context, so they
/// could fire one final callback through it. On this code the host **must
/// not** free the callback context immediately: either keep it alive for a
/// further grace period, or accept the (statistically tiny) race.
///
/// Returned by three callers, which differ in whether the operation may
/// be **retried**:
/// - `platform_wallet_manager_destroy`: the manager **IS** torn down
/// (removed from storage) regardless — do **not** retry `destroy`; the
/// handle is already gone. Only the callback-context lifetime caveat
/// above applies.
/// - `platform_wallet_manager_shielded_sync_stop`: the manager is **NOT**
/// torn down — only the shielded loop's drain was non-clean. The host
/// may retry the stop (or proceed to `destroy`); the handle stays valid.
/// - `platform_wallet_manager_shielded_clear`: the manager is **NOT** torn
/// down and the store was left **intact** (Clear aborted before touching
/// it). The host may retry the clear, and must **not** commit its own
/// persistence wipe — doing so would desync the host's rows from the
/// still-populated shared tree.
///
/// Distinct from a normal operation error (the underlying operation may
/// well have made progress); the terminal coordinator status is rendered
/// into the result message.
ErrorShutdownIncomplete = 19,

NotFound = 98, // Used exclusively for all the Option that are retuned as errors
ErrorUnknown = 99,
}
Expand Down Expand Up @@ -237,6 +264,14 @@ impl From<PlatformWalletError> for PlatformWalletFFIResult {
PlatformWalletError::ShieldedSpendUnconfirmed { .. } => {
PlatformWalletFFIResultCode::ErrorShieldedSpendUnconfirmed
}
// A Clear that refused because the in-flight shielded pass didn't
// drain cleanly: surface it as ErrorShutdownIncomplete (symmetric
// with `platform_wallet_manager_destroy`) so the host defers
// freeing its callback context AND does not commit its own
// persistence wipe — the store was intentionally left intact.
PlatformWalletError::ShieldedShutdownIncomplete { .. } => {
PlatformWalletFFIResultCode::ErrorShutdownIncomplete
}
_ => PlatformWalletFFIResultCode::ErrorUnknown,
};
PlatformWalletFFIResult::err(code, error.to_string())
Expand Down Expand Up @@ -595,6 +630,29 @@ mod tests {
assert_eq!(msg, rendered, "Display payload must survive verbatim");
}

/// A Clear that refused on a non-clean shielded drain must surface as
/// `ErrorShutdownIncomplete` (symmetric with `destroy`), not flatten to
/// `ErrorUnknown`, so the host knows to defer freeing its callback
/// context and to NOT commit its own persistence wipe. The typed Display
/// rendering (carrying the terminal coordinator status) survives verbatim.
#[test]
fn shielded_shutdown_incomplete_maps_to_dedicated_code() {
let err = PlatformWalletError::ShieldedShutdownIncomplete {
status: platform_wallet::CoordinatorThreadStatus::Timeout,
};
let rendered = err.to_string();
let result: PlatformWalletFFIResult = err.into();
assert_eq!(
result.code,
PlatformWalletFFIResultCode::ErrorShutdownIncomplete,
"ShieldedShutdownIncomplete should map to ErrorShutdownIncomplete (rendered: {rendered})"
);
let msg = unsafe { std::ffi::CStr::from_ptr(result.message) }
.to_string_lossy()
.into_owned();
assert_eq!(msg, rendered, "Display payload must survive verbatim");
}

/// Other wallet-error variants without a dedicated FFI arm still
/// fall through to `ErrorUnknown` while carrying the typed
/// Display rendering as the message. Pin this so the catch-all
Expand Down
24 changes: 23 additions & 1 deletion packages/rs-platform-wallet-ffi/src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,29 @@ pub unsafe extern "C" fn platform_wallet_manager_destroy(
// left alive to fire a callback against freed memory.
// `shutdown()` is idempotent, so this is safe even if the host
// already stopped some sync managers before calling destroy.
runtime().block_on(manager.shutdown());
// It now joins the coordinator OS threads and returns their
// per-thread exit status; the C ABI exposes none of that, so we
// just log it (a panicked loop is worth surfacing) and drop it.
let status = runtime().block_on(manager.shutdown());
if !status.all_clean() {
tracing::warn!(
?status,
"platform wallet coordinator(s) did not exit cleanly; \
host must not free the callback context immediately"
);
// Return a distinct non-ok code so the host can delay freeing
// its callback context. A lingering coordinator thread (e.g. one
// that timed out) still holds an Arc to the event handler and may
// fire one final callback through the host-owned context pointer;
// returning ok() here would signal that the context is safe to
// free when it may not be yet.
return PlatformWalletFFIResult::err(
PlatformWalletFFIResultCode::ErrorShutdownIncomplete,
format!("coordinator(s) did not exit cleanly: {status:?}"),
);
} else {
tracing::debug!(?status, "platform wallet coordinators joined cleanly");
}
}
PlatformWalletFFIResult::ok()
Comment thread
Claudius-Maginificent marked this conversation as resolved.
}
Expand Down
68 changes: 59 additions & 9 deletions packages/rs-platform-wallet-ffi/src/shielded_sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,20 @@ pub unsafe extern "C" fn platform_wallet_manager_shielded_sync_start(
/// Stop the shielded sync manager and wait for any in-flight pass to
/// drain before returning. No-op if not running.
///
/// Uses `quiesce` rather than cancel-only stop, so on return: the loop
/// is cancelled, no new pass will start, and any in-flight pass has
/// Uses `quiesce` rather than cancel-only stop, so on a clean return: the
/// loop is cancelled, no new pass will start, and any in-flight pass has
/// fully drained — its **persistence callbacks have completed** (no
/// note/sync-state row can be written after this returns) and its
/// completion-event *dispatch* on the Rust side has run.
///
/// Returns `ErrorShutdownIncomplete` instead of `Success` when that drain
/// did **not** complete cleanly (the in-flight pass timed out on the join
/// backstop, or the loop ended non-cleanly). The terminal coordinator
/// status is rendered into the result message. On this code the host must
/// **not** free the callback context immediately — a lingering pass may
/// still fire one final callback through it (symmetric with
/// `platform_wallet_manager_destroy`).
///
/// Caveat on host-observed events: a host that marshals the completion
/// callback onto its own executor (e.g. the Swift trampoline hops it to
/// the `@MainActor`) may still observe that final, already-dispatched
Expand All @@ -88,9 +96,40 @@ pub unsafe extern "C" fn platform_wallet_manager_shielded_sync_stop(
handle: Handle,
) -> PlatformWalletFFIResult {
let option = PLATFORM_WALLET_MANAGER_STORAGE.with_item(handle, |manager| {
runtime().block_on(manager.shielded_sync().quiesce());
runtime().block_on(async {
// Bound the quiesce with the same backstop `shutdown()` uses so
// a stalled in-flight pass can't hang the host's stop call
// forever. Cancellation makes the drain prompt; this only
// matters if a pass's drop wedges. A timeout (the future was
// dropped at the deadline) is reported as the non-clean
// `Timeout` status, matching `shutdown()`'s backstop
// substitution, so the host learns the drain may be incomplete.
match tokio::time::timeout(
Duration::from_secs(platform_wallet::SHUTDOWN_JOIN_TIMEOUT_SECS),
manager.shielded_sync().quiesce(),
)
.await
{
Ok(status) => status,
Err(_elapsed) => platform_wallet::CoordinatorThreadStatus::Timeout,
}
})
});
unwrap_option_or_return!(option);
let status = unwrap_option_or_return!(option);
// Symmetric with `platform_wallet_manager_destroy`: a non-clean drain
// means the shielded loop may still hold a reference to the host-owned
// event-handler / persister context and could fire one final callback,
// so signal the host to defer freeing that context rather than returning
// ok() and inviting a use-after-free.
if !status.is_clean() {
return PlatformWalletFFIResult::err(
PlatformWalletFFIResultCode::ErrorShutdownIncomplete,
format!(
"shielded sync stop did not drain cleanly ({status:?}); \
host must not free the callback context immediately"
),
);
}
PlatformWalletFFIResult::ok()
}
Comment thread
Claudius-Maginificent marked this conversation as resolved.

Expand Down Expand Up @@ -417,7 +456,9 @@ pub unsafe extern "C" fn platform_wallet_manager_configure_shielded(
/// via the changeset path.
///
/// Returns `ErrorWalletOperation` if the Rust-side store reset
/// fails. The host **must** check this before wiping its own
/// fails, or `ErrorShutdownIncomplete` if the in-flight sync pass
/// did not drain cleanly first (in which case the store is left
/// intact). The host **must** check this before wiping its own
/// persistence: a silent failure would leave the shared tree
/// populated while the host drops its rows, and the next cold
/// resync would gate-skip every re-downloaded position against the
Expand All @@ -443,10 +484,19 @@ pub unsafe extern "C" fn platform_wallet_manager_shielded_clear(
});
let result = unwrap_option_or_return!(option);
if let Err(e) = result {
return PlatformWalletFFIResult::err(
PlatformWalletFFIResultCode::ErrorWalletOperation,
format!("clear_shielded failed: {e}"),
);
// A non-clean / timed-out quiesce aborts the clear *before* the store
// is touched: surface it as ErrorShutdownIncomplete (symmetric with
// destroy / shielded_sync_stop) so the host defers freeing its
// callback context and does NOT commit its own persistence wipe — the
// store was intentionally left intact. Every other clear failure is a
// store-reset error → ErrorWalletOperation, as before.
let code = match &e {
platform_wallet::PlatformWalletError::ShieldedShutdownIncomplete { .. } => {
PlatformWalletFFIResultCode::ErrorShutdownIncomplete
}
_ => PlatformWalletFFIResultCode::ErrorWalletOperation,
};
return PlatformWalletFFIResult::err(code, format!("clear_shielded failed: {e}"));
}
PlatformWalletFFIResult::ok()
}
Expand Down
1 change: 1 addition & 0 deletions packages/rs-platform-wallet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ bimap = "0.6"
# Async runtime
tokio = { version = "1", features = ["sync", "rt", "time", "macros"] }
tokio-util = { version = "0.7.12" }
dash-async = { path = "../rs-dash-async" }

# Logging
tracing = "0.1"
Expand Down
21 changes: 21 additions & 0 deletions packages/rs-platform-wallet/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,27 @@ pub enum PlatformWalletError {

#[error("Shielded sub-wallet not bound: call bind_shielded first")]
ShieldedNotBound,

/// A Clear/wipe could not safely complete because the shielded sync
/// coordinator's in-flight pass did not drain cleanly first — it either
/// timed out on the join backstop or its loop ended non-cleanly
/// (cancelled / panicked). The shared commitment-tree store is therefore
/// **left intact** (not wiped): a still-running pass could re-persist
/// notes into the store immediately after a `clear()`, desyncing the
/// host's wiped rows from a repopulated tree and gate-skipping every
/// re-downloaded position on the next cold resync. The host **must not**
/// commit its own persistence wipe; retry Clear once the pass settles.
/// Carries the terminal [`CoordinatorThreadStatus`] for diagnostics.
///
/// [`CoordinatorThreadStatus`]: crate::manager::CoordinatorThreadStatus
#[error(
"shielded clear aborted: sync coordinator did not drain cleanly \
({status:?}); commitment-tree store left intact so an in-flight pass \
cannot re-persist into a wiped store — retry once the pass settles"
)]
ShieldedShutdownIncomplete {
status: crate::manager::CoordinatorThreadStatus,
},
}

/// Check whether an SDK error indicates that an InstantSend lock proof was
Expand Down
5 changes: 4 additions & 1 deletion packages/rs-platform-wallet/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ pub use manager::platform_address_sync::{
PlatformAddressSyncManager, PlatformAddressSyncSummary, WalletSyncOutcome,
DEFAULT_SYNC_INTERVAL_SECS,
};
pub use manager::PlatformWalletManager;
pub use manager::{
CoordinatorExitStatus, CoordinatorThreadStatus, PlatformWalletManager,
SHUTDOWN_JOIN_TIMEOUT_SECS,
};
pub use spv::SpvRuntime;
pub use wallet::asset_lock::manager::AssetLockManager;
pub use wallet::asset_lock::tracked::{AssetLockStatus, TrackedAssetLock};
Expand Down
Loading
Loading