diff --git a/backend/daemon/daemon.go b/backend/daemon/daemon.go
index e3b95f5a3..72f973e59 100644
--- a/backend/daemon/daemon.go
+++ b/backend/daemon/daemon.go
@@ -227,7 +227,7 @@ func Load(ctx context.Context, cfg config.Config, r *storage.Store, oo ...Option
 	}
 
 	a.HTTPServer, a.HTTPListener, err = initHTTP(cfg.Base, cfg.HTTP.Port, a.GRPCServer, &a.clean, a.g, a.Index,
-		fm, a.Net, a.RPC.Daemon, opts.extraHTTPHandlers...)
+		fm, a.Net, a.Syncing.SchedulerSnapshot, a.RPC.Daemon, opts.extraHTTPHandlers...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/backend/daemon/http.go b/backend/daemon/http.go
index 5a820dbe2..eb6f96e69 100644
--- a/backend/daemon/http.go
+++ b/backend/daemon/http.go
@@ -13,6 +13,7 @@ import (
 	"seed/backend/blob"
 	"seed/backend/config"
 	"seed/backend/hmnet"
+	"seed/backend/hmnet/syncing"
 	"seed/backend/logging"
 	"seed/backend/util/cleanup"
 	"seed/backend/util/trcstats"
@@ -138,6 +139,7 @@ func initHTTP(
 	blobs blockstore.Blockstore,
 	ipfsHandler *hmnet.FileManager,
 	p2pnet *hmnet.Node,
+	schedulerSnap func() syncing.SchedulerSnapshot,
 	daemonServer *daemonapi.Server,
 	extraHandlers ...func(*Router),
 ) (srv *http.Server, lis net.Listener, err error) {
@@ -178,7 +180,7 @@ func initHTTP(
 		router.Handle("/debug/requests", http.DefaultServeMux, RouteNav)
 		router.Handle("/debug/events", http.DefaultServeMux, RouteNav)
 		router.Handle("/debug/p2p", p2pnet.DebugHandler(), RouteNav)
-		router.Handle("/debug/network", p2pnet.NetworkDebugHandler(), RouteNav)
+		router.Handle("/debug/network", p2pnet.NetworkDebugHandler(schedulerSnap), RouteNav)
 
 		router.Handle("/hm/api/config", p2pnet.HMAPIConfigHandler(), RouteNav)
 		router.Handle(vaultConnectionHandoffPath, corsMiddleware(http.HandlerFunc(daemonServer.HandleVaultHandoff)), 0)
diff --git a/backend/hmnet/http_debug_network.go b/backend/hmnet/http_debug_network.go
index 5ce841382..e7706d366 100644
--- a/backend/hmnet/http_debug_network.go
+++ b/backend/hmnet/http_debug_network.go
@@ -9,6 +9,7 @@ import (
 	"sort"
 	"time"
 
+	"seed/backend/hmnet/syncing"
 	"seed/backend/util/bwcounter"
 
 	"github.com/libp2p/go-libp2p/core/network"
@@ -21,11 +22,16 @@ import (
 // sync/discovery latency, peer-table state, and reachability — modeled on
 // /debug/traces. Latency cells are color-coded against fixed thresholds
 // (p10>50ms, p50>100ms, p90>1s, p99>5s warn).
-func (n *Node) NetworkDebugHandler() http.Handler {
+//
+// schedulerSnap is an optional callback returning the syncing scheduler's
+// counters. Pass nil if no syncing service is wired up; the page omits the
+// scheduler section in that case.
+func (n *Node) NetworkDebugHandler(schedulerSnap func() syncing.SchedulerSnapshot) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Content-Type", "text/html; charset=utf-8")
 
 		page := n.buildPage(r.Context())
+		page.Scheduler = buildScheduler(schedulerSnap)
 
 		if err := pageTpl.Execute(w, page); err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
@@ -44,6 +50,21 @@ type networkPage struct {
 	Sections     []section
 	Bandwidth    bandwidthSection
 	Reachability reachSection
+	Scheduler    *schedulerDebugSection
+}
+
+// schedulerDebugSection renders the syncing scheduler's instantaneous
+// queue/in-progress sizes plus cumulative preemption counters, so a human
+// looking at /debug/network can confirm whether the preemption machinery
+// is firing at all in production.
+type schedulerDebugSection struct {
+	TasksTotal             string
+	QueueLen               string
+	InProgress             string
+	InProgressHot          string
+	InProgressSubscription string
+	PreemptHotCount        string
+	PreemptSubsCount       string
 }
 
 // bandwidthSection holds all bandwidth-related tables for the page. Each table
@@ -374,6 +395,24 @@ func (n *Node) buildPage(ctx context.Context) networkPage {
 	return page
 }
 
+// buildScheduler renders a syncing-scheduler snapshot block, or nil if no
+// snapshot func was provided.
+func buildScheduler(snap func() syncing.SchedulerSnapshot) *schedulerDebugSection {
+	if snap == nil {
+		return nil
+	}
+	s := snap()
+	return &schedulerDebugSection{
+		TasksTotal:             fmt.Sprintf("%d", s.TasksTotal),
+		QueueLen:               fmt.Sprintf("%d", s.QueueLen),
+		InProgress:             fmt.Sprintf("%d", s.InProgress),
+		InProgressHot:          fmt.Sprintf("%d", s.InProgressHot),
+		InProgressSubscription: fmt.Sprintf("%d", s.InProgressSubscription),
+		PreemptHotCount:        fmt.Sprintf("%d", s.PreemptHotCount),
+		PreemptSubsCount:       fmt.Sprintf("%d", s.PreemptSubsCount),
+	}
+}
+
 // buildBandwidth assembles the bandwidth section from the libp2p metrics and
 // the two HTTP counters owned by the Node. Numbers are pre-formatted into
 // human-readable strings so the template stays simple.
@@ -1557,6 +1596,26 @@ details.howto>summary{color:#0a58ca;font-weight:600}
 {{.Bandwidth.Help}}
 </details>
 
+{{if .Scheduler}}
+{{with .Scheduler}}
+<h2>Discovery scheduler</h2>
+<div class="subtitle">queue/in-progress sizes are instantaneous; preempt counts are cumulative since startup</div>
+<table class="kv">
+<tr><td>tasks tracked</td><td class="num">{{.TasksTotal}}</td></tr>
+<tr><td>queued</td><td class="num">{{.QueueLen}}</td></tr>
+<tr><td>in progress (total)</td><td class="num">{{.InProgress}}</td></tr>
+<tr><td>&nbsp;&nbsp;hot (ephemeral)</td><td class="num">{{.InProgressHot}}</td></tr>
+<tr><td>&nbsp;&nbsp;subscription</td><td class="num">{{.InProgressSubscription}}</td></tr>
+<tr><td>preempt total — subscription cancelled for hot</td><td class="num">{{.PreemptSubsCount}}</td></tr>
+<tr><td>preempt total — older hot cancelled for newer hot</td><td class="num">{{.PreemptHotCount}}</td></tr>
+</table>
+<details class="help">
+<summary>What does this mean?</summary>
+<p>Lifetime of a discovery task: <em>queued</em> → <em>in progress</em> (subscription or hot/ephemeral) → done. The two <em>preempt</em> counters track the only paths that cancel an in-flight task to make room for a higher-priority one. If both stay at zero across a busy session, the preemption machinery isn't actually load-bearing and could be removed.</p>
+</details>
+{{end}}
+{{end}}
+
 <h2>Reachability snapshot</h2>
 <div class="subtitle">peerstore peers, connected first; showing top {{len .Reachability.Rows}} of {{.Reachability.Total}}</div>
 <table class="reach">
diff --git a/backend/hmnet/http_debug_network_test.go b/backend/hmnet/http_debug_network_test.go
index da2462482..b71b73822 100644
--- a/backend/hmnet/http_debug_network_test.go
+++ b/backend/hmnet/http_debug_network_test.go
@@ -2,8 +2,11 @@ package hmnet
 
 import (
 	"math"
+	"strings"
 	"testing"
 
+	"seed/backend/hmnet/syncing"
+
 	dto "github.com/prometheus/client_model/go"
 	"github.com/stretchr/testify/require"
 )
@@ -63,6 +66,60 @@ func TestHistStatsPercentileFallsBackForInfBucket(t *testing.T) {
 	require.Equal(t, 2.0, h.percentile(0.99))
 }
 
+func TestBuildScheduler_NilSnap(t *testing.T) {
+	// A nil snapshot func must produce no scheduler section.
+	require.Nil(t, buildScheduler(nil))
+}
+
+func TestBuildScheduler_WithSnap(t *testing.T) {
+	snap := func() syncing.SchedulerSnapshot {
+		return syncing.SchedulerSnapshot{
+			TasksTotal:             5,
+			QueueLen:               2,
+			InProgress:             3,
+			InProgressHot:          2,
+			InProgressSubscription: 1,
+			PreemptHotCount:        7,
+			PreemptSubsCount:       11,
+		}
+	}
+
+	got := buildScheduler(snap)
+	require.NotNil(t, got)
+	require.Equal(t, "5", got.TasksTotal)
+	require.Equal(t, "2", got.QueueLen)
+	require.Equal(t, "3", got.InProgress)
+	require.Equal(t, "2", got.InProgressHot)
+	require.Equal(t, "1", got.InProgressSubscription)
+	require.Equal(t, "7", got.PreemptHotCount)
+	require.Equal(t, "11", got.PreemptSubsCount)
+}
+
+// TestPageTpl_RendersSchedulerSection runs the actual page template with
+// a populated Scheduler section to make sure the new <h2> block renders
+// without template errors and contains the snapshot's numeric values.
+func TestPageTpl_RendersSchedulerSection(t *testing.T) {
+	page := networkPage{
+		PeerID:     "peer-1",
+		ProtocolID: "/test/0.0.1",
+		Scheduler: &schedulerDebugSection{
+			TasksTotal:             "42",
+			QueueLen:               "5",
+			InProgress:             "3",
+			InProgressHot:          "2",
+			InProgressSubscription: "1",
+			PreemptHotCount:        "9",
+			PreemptSubsCount:       "13",
+		},
+	}
+	var sb strings.Builder
+	require.NoError(t, pageTpl.Execute(&sb, page))
+	html := sb.String()
+	require.Contains(t, html, "Discovery scheduler")
+	require.Contains(t, html, "42")
+	require.Contains(t, html, "preempt total — older hot cancelled for newer hot")
+}
+
 func TestHistStatsPercentileSkipsEmptyEarlyBuckets(t *testing.T) {
 	// Buckets 0 and 1 are empty; bucket 2 ((10ms, 100ms]) holds all
 	// observations. p=0.5 → target=5. prevUpper after bucket 1 is 0.01.
diff --git a/backend/hmnet/syncing/discovery.go b/backend/hmnet/syncing/discovery.go
index 4cd109848..6e15f522a 100644
--- a/backend/hmnet/syncing/discovery.go
+++ b/backend/hmnet/syncing/discovery.go
@@ -20,7 +20,6 @@ import (
 	"time"
 
 	"github.com/ipfs/go-cid"
-	"github.com/libp2p/go-libp2p/core/network"
 	"github.com/libp2p/go-libp2p/core/peer"
 	"github.com/libp2p/go-libp2p/core/peerstore"
 	"github.com/multiformats/go-multicodec"
@@ -115,34 +114,49 @@ func (s *Service) DiscoverObjectWithProgress(ctx context.Context, entityID blob.
 	// state. We use them as a fallback before the DHT when no peer is connected,
 	// since syncWithPeer will dial on demand.
 	const maxKnownPeerFallback = 20
-	var knownPeers []peer.ID
-	if err = s.db.WithSave(ctxLocalPeers, func(conn *sqlite.Conn) error {
-		return sqlitex.Exec(conn, qListPeersWithPid(), func(stmt *sqlite.Stmt) error {
-			addresStr := stmt.ColumnText(0)
-			pid := stmt.ColumnText(1)
-			addrList := strings.Split(addresStr, ",")
-			info, err := netutil.AddrInfoFromStrings(addrList...)
-			if err != nil {
-				s.log.Warn("Can't discover from peer since it has malformed addresses", zap.String("PID", pid), zap.Error(err))
-				return nil
-			}
-			if s.host.Network().Connectedness(info.ID) == network.Connected {
-				allPeers = append(allPeers, info.ID)
-			} else if len(knownPeers) < maxKnownPeerFallback {
-				s.host.Peerstore().AddAddrs(info.ID, info.Addrs, peerstore.TempAddrTTL)
-				knownPeers = append(knownPeers, info.ID)
-			}
 
-			return nil
-		})
-	}); err != nil {
-		MDiscoverPhaseSeconds.WithLabelValues("peer_select").Observe(time.Since(peerSelectStart).Seconds())
-		return "", err
+	// Fast path: ask the host directly for the currently-connected peers
+	// instead of scanning the entire peers table and parsing every
+	// multiaddr just to filter on Connectedness. The host already maintains
+	// this set in O(connected) memory; it's the same predicate the original
+	// loop computed via Connectedness == Connected.
+	self := s.host.ID()
+	for _, pid := range s.host.Network().Peers() {
+		if pid == self {
+			continue
+		}
+		allPeers = append(allPeers, pid)
 	}
-	MDiscoverPhaseSeconds.WithLabelValues("peer_select").Observe(time.Since(peerSelectStart).Seconds())
-	if len(allPeers) == 0 && len(knownPeers) > 0 {
-		allPeers = knownPeers
+
+	// Slow path: only when no peers are currently connected do we hit the
+	// DB to find a small fallback set we can dial. The query is bounded
+	// (LIMIT) so the multiaddr parse cost is bounded too — the prior
+	// unbounded scan was the source of the per-call CPU regression.
+	if len(allPeers) == 0 {
+		if err = s.db.WithSave(ctxLocalPeers, func(conn *sqlite.Conn) error {
+			return sqlitex.Exec(conn, qListPeersWithPidLimit(), func(stmt *sqlite.Stmt) error {
+				addresStr := stmt.ColumnText(0)
+				pid := stmt.ColumnText(1)
+				info, err := netutil.AddrInfoFromStrings(strings.Split(addresStr, ",")...)
+				if err != nil {
+					s.log.Warn("Can't discover from peer since it has malformed addresses", zap.String("PID", pid), zap.Error(err))
+					return nil
+				}
+				// peerstore.RecentlyConnectedAddrTTL is the right TTL for
+				// "we last saw this peer at this address" semantics — the
+				// dial path may run many seconds later as syncs queue up,
+				// and TempAddrTTL (the previous choice) is short enough
+				// that addresses can expire before we get to dial.
+				s.host.Peerstore().AddAddrs(info.ID, info.Addrs, peerstore.RecentlyConnectedAddrTTL)
+				allPeers = append(allPeers, info.ID)
+				return nil
+			}, maxKnownPeerFallback)
+		}); err != nil {
+			MDiscoverPhaseSeconds.WithLabelValues("peer_select").Observe(time.Since(peerSelectStart).Seconds())
+			return "", err
+		}
 	}
+	MDiscoverPhaseSeconds.WithLabelValues("peer_select").Observe(time.Since(peerSelectStart).Seconds())
 
 	// Create RBSR store once for reuse across all peers.
 	dkeys := colx.HashSet[DiscoveryKey]{
@@ -860,3 +874,17 @@ var qListPeersWithPid = dqb.Str(`
 		pid
 	FROM peers;
 `)
+
+// qListPeersWithPidLimit is the bounded variant used by the disconnected-peer
+// fallback in DiscoverObjectWithProgress. We only need a small set to seed
+// dialing — most-recently-updated peers first — and previously the unbounded
+// scan was reading every row out of the table even though only the first 20
+// were kept. Caps the SQLite work at the size of the fallback budget.
+var qListPeersWithPidLimit = dqb.Str(`
+	SELECT
+		addresses,
+		pid
+	FROM peers
+	ORDER BY updated_at DESC
+	LIMIT ?;
+`)
diff --git a/backend/hmnet/syncing/scheduler.go b/backend/hmnet/syncing/scheduler.go
index a57ef6898..f973c6dac 100644
--- a/backend/hmnet/syncing/scheduler.go
+++ b/backend/hmnet/syncing/scheduler.go
@@ -12,6 +12,7 @@ import (
 	"seed/backend/util/maybe"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 
 	"golang.org/x/sync/errgroup"
@@ -60,6 +61,11 @@ type taskHandle struct {
 	hotDeadline  time.Time
 	runCount     uint64 // Number of times task has been executed.
 
+	// hotIndex tracks the position of this task inside scheduler.inProgressHot
+	// while it is running as an ephemeral hot task. Maintained by the heap's
+	// OnIndexChange callback. Unset for queued, completed, or subscription tasks.
+	hotIndex maybe.Value[int]
+
 	// Cancellation. Set by dispatchReadyTasks under s.mu before dispatching, cleared in executeTask's unwind.
 	cancelFunc context.CancelFunc
 	runCtx     context.Context
@@ -110,11 +116,28 @@ type scheduler struct {
 	tasks map[DiscoveryKey]*taskHandle
 	queue *heap.Heap[*taskHandle]
 
+	// inProgressHot indexes ephemeral hot tasks currently running, ordered
+	// by hotDeadline ascending so the head is always the oldest (the most
+	// likely preemption victim and the soonest heartbeat to expire). Lets
+	// preempt/cleanup/wake decisions stay O(1)/O(log n) instead of walking
+	// the full s.tasks map every dispatch tick.
+	inProgressHot *heap.Heap[*taskHandle]
+
+	// inProgressSubs indexes subscription tasks currently running. Keyed by
+	// DiscoveryKey for O(1) "any one" picks during subscription preemption.
+	inProgressSubs map[DiscoveryKey]*taskHandle
+
 	// inProgress counts tasks that are currently being executed by a worker
 	// goroutine. Bounded by cfg.MaxWorkers. Used to detect saturation for
 	// preemption decisions.
 	inProgress int
 
+	// Observability counters surfaced via /debug/network so we can confirm
+	// whether preemption ever fires in production. Atomics so reads from
+	// the debug page don't need to acquire s.mu.
+	preemptSubsCount atomic.Uint64
+	preemptHotCount  atomic.Uint64
+
 	// Heartbeat TTL for hot tasks. Installed from defaultHotTTL in newScheduler.
 	// Tests may assign a smaller value before calling run().
 	hotTTL time.Duration
@@ -127,11 +150,12 @@ func newScheduler(disc discoverer, cfg config.Syncing) *scheduler {
 	}
 
 	s := &scheduler{
-		disc:        disc,
-		cfg:         cfg,
-		timer:       time.NewTimer(0),
-		tasks:       make(map[DiscoveryKey]*taskHandle),
-		hotTTL: defaultHotTTL,
+		disc:           disc,
+		cfg:            cfg,
+		timer:          time.NewTimer(0),
+		tasks:          make(map[DiscoveryKey]*taskHandle),
+		inProgressSubs: make(map[DiscoveryKey]*taskHandle),
+		hotTTL:         defaultHotTTL,
 		// Two-tier priority: hot tasks always before cold. Within hot tier,
 		// LIFO by hotDeadline desc (most recently touched wins); within cold
 		// tier, FIFO by nextRunTime asc (earliest due wins). nextRunTime is
@@ -148,6 +172,11 @@ func newScheduler(disc discoverer, cfg config.Syncing) *scheduler {
 			}
 			return a.nextRunTime.Before(b.nextRunTime)
 		}),
+		// Ascending by hotDeadline: head = oldest = next-to-expire = first
+		// preemption victim.
+		inProgressHot: heap.New(func(a, b *taskHandle) bool {
+			return a.hotDeadline.Before(b.hotDeadline)
+		}),
 	}
 
 	s.workers.SetLimit(cfg.MaxWorkers)
@@ -160,6 +189,13 @@ func newScheduler(disc discoverer, cfg config.Syncing) *scheduler {
 			task.queueIndex.Set(newIndex)
 		}
 	}
+	s.inProgressHot.OnIndexChange = func(task *taskHandle, newIndex int) {
+		if newIndex < 0 {
+			task.hotIndex.Clear()
+		} else {
+			task.hotIndex.Set(newIndex)
+		}
+	}
 
 	return s
 }
@@ -183,8 +219,11 @@ func (s *scheduler) run(ctx context.Context) (err error) {
 		case <-s.timer.C:
 			s.mu.Lock()
 			nextWake := s.dispatchReadyTasks(ctx)
-			s.timer.Reset(nextWake)
 			s.mu.Unlock()
+			// time.Timer.Reset is goroutine-safe, so we keep it out of the
+			// critical section to shrink the window during which scheduleTask
+			// callers contend on s.mu.
+			s.timer.Reset(nextWake)
 		}
 	}
 }
@@ -192,12 +231,12 @@ func (s *scheduler) run(ctx context.Context) (err error) {
 // executeTask runs discovery and updates task state directly. It runs in a
 // goroutine spawned by the dispatch loop via errgroup.TryGo and owns a single
 // MaxWorkers slot for its lifetime.
-func (s *scheduler) executeTask(task *taskHandle) {
-	s.mu.Lock()
-	taskCtx := task.runCtx
-	prog := task.progress
-	s.mu.Unlock()
-
+//
+// taskCtx and prog are passed by value so executeTask never has to grab s.mu
+// just to read them off task — the dispatch path captures them locally
+// before assigning the task fields, so the closure already holds the
+// correct values.
+func (s *scheduler) executeTask(taskCtx context.Context, task *taskHandle, prog *Progress) {
 	var blobTypes []string
 	if task.key.BlobTypes != "" {
 		blobTypes = strings.Split(task.key.BlobTypes, ",")
@@ -223,6 +262,7 @@ func (s *scheduler) executeTask(task *taskHandle) {
 	}
 	task.runCtx = nil
 	s.inProgress--
+	s.markFinished(task)
 
 	now := time.Now()
 
@@ -302,6 +342,11 @@ func (s *scheduler) scheduleTaskLocked(key DiscoveryKey, now time.Time, opts sch
 	if task.state == TaskStateInProgress {
 		// Re-touching a running task only updates the heartbeat deadline.
 		// Re-running decisions happen when the current run completes.
+		// If the task is in inProgressHot, its order key (hotDeadline)
+		// just changed, so fix the in-progress hot heap too.
+		if task.hotIndex.IsSet() {
+			s.inProgressHot.Fix(task.hotIndex.Value())
+		}
 		return task.Info()
 	}
 
@@ -328,6 +373,13 @@ func (s *scheduler) removeSubscriptions(keys ...DiscoveryKey) {
 			continue
 		}
 
+		// If the task is currently running as a subscription, drop the
+		// in-progress subscription tracking now that it isn't one anymore.
+		// We don't migrate it into inProgressHot — it'll finish normally
+		// and exit through markFinished, which is idempotent.
+		if existing, ok := s.inProgressSubs[key]; ok && existing == task {
+			delete(s.inProgressSubs, key)
+		}
 		task.subscription = false
 		now := time.Now()
 
@@ -381,23 +433,38 @@ func (s *scheduler) dispatchReadyTasks(ctx context.Context) (nextWake time.Durat
 			return s.boundedWake(wake, time.Now())
 		}
 
-		// Task is ready. Pop it from the queue and prepare its run context.
+		// Task is ready. Pop it from the queue.
 		s.queue.Pop()
-		task.state = TaskStateInProgress
-		task.progress = &Progress{}
-		taskCtx, cancel := context.WithCancel(ctx)
-		task.runCtx = taskCtx
-		task.cancelFunc = cancel
-
-		// Dispatch via errgroup.TryGo. TryGo respects the MaxWorkers limit
-		// set in newScheduler, so saturation is authoritative here — either
-		// we get a goroutine slot or we don't.
-		if s.inProgress < s.cfg.MaxWorkers && s.workers.TryGo(func() error {
-			s.executeTask(task)
-			return nil
-		}) {
-			s.inProgress++
-			continue
+
+		// Capacity check first — only allocate the per-task context when
+		// we're committed to dispatching. Under saturation the prior code
+		// allocated a context.WithCancel + Progress per peeked task and
+		// immediately discarded them, which showed up directly in the
+		// allocation profile.
+		if s.inProgress < s.cfg.MaxWorkers {
+			taskCtx, cancel := context.WithCancel(ctx)
+			prog := &Progress{}
+			task.state = TaskStateInProgress
+			task.progress = prog
+			task.runCtx = taskCtx
+			task.cancelFunc = cancel
+			// TryGo respects the MaxWorkers limit; on the rare losing race
+			// we unwind state and re-enqueue.
+			if s.workers.TryGo(func() error {
+				s.executeTask(taskCtx, task, prog)
+				return nil
+			}) {
+				s.inProgress++
+				s.markInProgress(task)
+				continue
+			}
+			// Race lost: TryGo refused despite the capacity check. Reset
+			// state and fall through to the saturation branch.
+			cancel()
+			task.runCtx = nil
+			task.cancelFunc = nil
+			task.state = TaskStateIdle
+			task.progress = nil
 		}
 
 		// Worker pool is full. For hot tasks, try to make room by preempting
@@ -413,12 +480,6 @@ func (s *scheduler) dispatchReadyTasks(ctx context.Context) (nextWake time.Durat
 			}
 		}
 
-		// Release the unused context and reset the task.
-		cancel()
-		task.runCtx = nil
-		task.cancelFunc = nil
-		task.state = TaskStateIdle
-		task.progress = nil
 		s.enqueueLocked(task, now)
 
 		if preempted {
@@ -449,6 +510,34 @@ func (s *scheduler) dispatchReadyTasks(ctx context.Context) (nextWake time.Durat
 	return s.boundedWake(wake, now)
 }
 
+// markInProgress adds a freshly-dispatched task to the appropriate
+// in-progress index. Caller must hold s.mu.
+func (s *scheduler) markInProgress(task *taskHandle) {
+	if task.subscription {
+		s.inProgressSubs[task.key] = task
+		return
+	}
+	if task.hotDeadline.IsZero() {
+		// Non-subscription, non-hot tasks aren't expected to land here —
+		// scheduleNext deletes them — but be defensive.
+		return
+	}
+	s.inProgressHot.Push(task)
+}
+
+// markFinished removes a completing or cancelling task from the in-progress
+// indexes. Idempotent: safe to call after preemption/cleanup paths have
+// already removed the task to keep it from being picked again.
+// Caller must hold s.mu.
+func (s *scheduler) markFinished(task *taskHandle) {
+	if existing, ok := s.inProgressSubs[task.key]; ok && existing == task {
+		delete(s.inProgressSubs, task.key)
+	}
+	if task.hotIndex.IsSet() {
+		s.inProgressHot.Remove(task.hotIndex.Value())
+	}
+}
+
 // enqueueLocked inserts or reorders the task in the queue. Caller must hold
 // s.mu. Must not be called while task.state == TaskStateInProgress.
 func (s *scheduler) enqueueLocked(task *taskHandle, now time.Time) {
@@ -467,18 +556,17 @@ func (s *scheduler) enqueueLocked(task *taskHandle, now time.Time) {
 // preemptSubscriptionLocked cancels an in-flight subscription so its worker
 // slot can be reused by a higher-priority hot task. Returns true if a
 // subscription was cancelled. Caller must hold s.mu.
+//
+// Pops the victim from inProgressSubs eagerly so a follow-up preemption in
+// the same dispatch sweep doesn't pick the same task.
 func (s *scheduler) preemptSubscriptionLocked() bool {
-	for _, t := range s.tasks {
-		if t.state != TaskStateInProgress {
-			continue
-		}
-		if !t.subscription {
-			continue
-		}
+	for key, t := range s.inProgressSubs {
 		if t.cancelFunc == nil {
 			continue
 		}
+		delete(s.inProgressSubs, key)
 		t.cancelFunc()
+		s.preemptSubsCount.Add(1)
 		return true
 	}
 	return false
@@ -490,84 +578,64 @@ func (s *scheduler) preemptSubscriptionLocked() bool {
 // cancellation branch. Returns true if a task was preempted. Caller must
 // hold s.mu.
 func (s *scheduler) preemptOldestHotLocked(incoming *taskHandle, now time.Time) bool {
-	var victim *taskHandle
-	for _, t := range s.tasks {
-		if t.state != TaskStateInProgress {
-			continue
-		}
-		if t.subscription {
-			continue
-		}
-		if !t.IsHot(now) {
-			// Heartbeat already expired; cleanupExpiredHotTasksLocked will reap it.
-			continue
-		}
-		if t.cancelFunc == nil {
-			continue
-		}
-		if t.key == incoming.key {
-			continue
-		}
-		// Victim must have a strictly older heartbeat than the incoming task;
-		// otherwise preempting just delays the user's most recent request.
-		if !t.hotDeadline.Before(incoming.hotDeadline) {
-			continue
-		}
-		if victim == nil || t.hotDeadline.Before(victim.hotDeadline) {
-			victim = t
-		}
+	if s.inProgressHot.Len() == 0 {
+		return false
+	}
+	victim := s.inProgressHot.Peek()
+	if victim.key == incoming.key {
+		return false
 	}
-	if victim == nil {
+	if !victim.IsHot(now) {
+		// Heartbeat already expired; cleanupExpiredHotTasksLocked will reap it.
 		return false
 	}
+	// Victim must have a strictly older heartbeat than the incoming task;
+	// otherwise preempting just delays the user's most recent request.
+	if !victim.hotDeadline.Before(incoming.hotDeadline) {
+		return false
+	}
+	if victim.cancelFunc == nil {
+		return false
+	}
+	s.inProgressHot.Remove(victim.hotIndex.Value())
 	victim.cancelFunc()
+	s.preemptHotCount.Add(1)
 	return true
 }
 
 // cleanupExpiredHotTasksLocked cancels in-progress ephemeral hot tasks whose
 // heartbeat deadline has expired (the frontend stopped polling, meaning the
-// user is no longer viewing that document) and drops any queued ephemeral
-// hot tasks in the same state. Caller must hold s.mu.
+// user is no longer viewing that document). Caller must hold s.mu.
+//
+// Idle-but-queued ephemeral hot tasks whose deadline expired are reaped
+// lazily by the dispatch-loop tier-migration check, so we don't sweep the
+// queue here.
 func (s *scheduler) cleanupExpiredHotTasksLocked(now time.Time) {
-	var toDelete []*taskHandle
-	for _, t := range s.tasks {
-		if t.subscription {
-			continue
-		}
-		if t.IsHot(now) {
-			continue
+	for s.inProgressHot.Len() > 0 {
+		head := s.inProgressHot.Peek()
+		if !head.hotDeadline.Before(now) {
+			return
 		}
-		switch t.state {
-		case TaskStateInProgress:
-			if t.cancelFunc != nil {
-				t.cancelFunc()
-			}
-		default:
-			if t.queueIndex.IsSet() {
-				toDelete = append(toDelete, t)
-			}
+		s.inProgressHot.Remove(head.hotIndex.Value())
+		if head.cancelFunc != nil {
+			head.cancelFunc()
 		}
 	}
-	for _, t := range toDelete {
-		s.queue.Remove(t.queueIndex.Value())
-		delete(s.tasks, t.key)
-	}
 }
 
 // boundedWake narrows `candidate` by the earliest in-progress hot-task
 // heartbeat expiry so cleanup runs promptly. Caller must hold s.mu.
 func (s *scheduler) boundedWake(candidate time.Duration, now time.Time) time.Duration {
-	for _, t := range s.tasks {
-		if t.state != TaskStateInProgress || t.subscription {
-			continue
-		}
-		if t.hotDeadline.IsZero() {
-			continue
-		}
-		d := max(t.hotDeadline.Sub(now), 0)
-		if d < candidate {
-			candidate = d
-		}
+	if s.inProgressHot.Len() == 0 {
+		return candidate
+	}
+	head := s.inProgressHot.Peek()
+	if head.hotDeadline.IsZero() {
+		return candidate
+	}
+	d := max(head.hotDeadline.Sub(now), 0)
+	if d < candidate {
+		return d
 	}
 	return candidate
 }
@@ -613,3 +681,33 @@ func (s *scheduler) resetTimer(now time.Time) {
 	}
 	s.timer.Reset(s.boundedWake(wake, now))
 }
+
+// SchedulerSnapshot is a point-in-time view of scheduler internals,
+// surfaced via /debug/network so we can confirm whether preemption ever
+// fires in production without standing up a metrics scraper. All fields
+// are cumulative since process start except Tasks/Queue/InProgress*, which
+// are instantaneous.
+type SchedulerSnapshot struct {
+	TasksTotal             int
+	QueueLen               int
+	InProgress             int
+	InProgressHot          int
+	InProgressSubscription int
+	PreemptHotCount        uint64
+	PreemptSubsCount       uint64
+}
+
+// snapshot returns a consistent view of the scheduler counters.
+func (s *scheduler) snapshot() SchedulerSnapshot {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return SchedulerSnapshot{
+		TasksTotal:             len(s.tasks),
+		QueueLen:               s.queue.Len(),
+		InProgress:             s.inProgress,
+		InProgressHot:          s.inProgressHot.Len(),
+		InProgressSubscription: len(s.inProgressSubs),
+		PreemptHotCount:        s.preemptHotCount.Load(),
+		PreemptSubsCount:       s.preemptSubsCount.Load(),
+	}
+}
diff --git a/backend/hmnet/syncing/syncing.go b/backend/hmnet/syncing/syncing.go
index 458263cd3..8893ce8a7 100644
--- a/backend/hmnet/syncing/syncing.go
+++ b/backend/hmnet/syncing/syncing.go
@@ -557,6 +557,14 @@ func (s *Service) listSubscriptionsFromDB(ctx context.Context) ([]Subscription,
 	return subs, nil
 }
 
+// SchedulerSnapshot returns a point-in-time view of the scheduler's
+// internal counters and queue sizes. Safe to call from any goroutine.
+// Used by /debug/network to surface whether preemption is firing in
+// production.
+func (s *Service) SchedulerSnapshot() SchedulerSnapshot {
+	return s.scheduler.snapshot()
+}
+
 // TouchHotTask returns an existing task or creates a new ephemeral one.
 // If a subscription task already exists, it wakes it up and returns its info.
 //