From 2143c1841e5fce07d6f27b5c430ff18269c2445b Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Wed, 13 May 2026 16:05:40 -0300 Subject: [PATCH 01/20] Add prop_leios_late_join failing test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parameterise runThreadNet over NodeJoinPlan and add a new property that starts node 3 at a random slot while nodes 0–2 run from slot 0. This demonstrates the crash in resolveLeiosBlock when a late-joining node encounters a CertRB referencing an EB it never received. --- .../test/cardano-test/Test/ThreadNet/Leios.hs | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs b/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs index f6a9b5ff20..125ed845ed 100644 --- a/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs +++ b/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs @@ -100,8 +100,10 @@ import Test.Consensus.Cardano.ProtocolInfo (Era (Conway), hardForkInto) import Test.QuickCheck ( Property , Testable + , choose , conjoin , counterexample + , forAll , tabulate , (.&&.) , (.||.) @@ -137,7 +139,7 @@ import Test.ThreadNet.Network , TraceThreadNetNode (FromLeios, FromLeiosPeer, FromMempool) ) import Test.ThreadNet.TxGen.Cardano (CardanoTxGenExtra (..)) -import Test.ThreadNet.Util.NodeJoinPlan (trivialNodeJoinPlan) +import Test.ThreadNet.Util.NodeJoinPlan (NodeJoinPlan (..), trivialNodeJoinPlan) import Test.ThreadNet.Util.NodeRestarts (noRestarts) import Test.ThreadNet.Util.NodeToNodeVersion (newestVersion) import Test.ThreadNet.Util.NodeTopology (meshNodeTopology) @@ -152,6 +154,8 @@ tests = "Leios ThreadNet" [ adjustQuickCheckTests (`div` 10) $ testProperty "basic functionality" prop_leios + , adjustQuickCheckTests (`div` 10) $ + testProperty "late join" prop_leios_late_join ] -- | Verify a suite of basic Leios ThreadNet invariants in a single run: @@ -183,7 +187,9 @@ prop_leios seed = numSlots = 200 :: Word64 (testOutput, ProtocolInfo{pInfoConfig, pInfoInitLedger}) = - runThreadNet seed (NumSlots numSlots) (NumCoreNodes $ fromIntegral numNodes) + runThreadNet seed (NumSlots numSlots) numCoreNodes (trivialNodeJoinPlan numCoreNodes) + + numCoreNodes = NumCoreNodes $ fromIntegral numNodes traces = testOutput.allTraces @@ -334,6 +340,39 @@ prop_leios seed = | (s1, s2) <- zip certifyingBlocks (drop 1 certifyingBlocks) ] +-- | Late-joining node must not crash. +-- +-- 4 nodes, 200 slots. Nodes 0–2 join at slot 0; node 3 joins at a random +-- slot. On the current codebase this crashes in 'resolveLeiosBlock' because +-- the late node receives a CertRB referencing an EB it never saw. +prop_leios_late_join :: Seed -> Property +prop_leios_late_join seed = + forAll (choose (1, fromIntegral numSlots - 1)) $ \lateJoinSlot -> + let + joinPlan = + NodeJoinPlan $ + Map.fromList + [ (CoreNodeId 0, SlotNo 0) + , (CoreNodeId 1, SlotNo 0) + , (CoreNodeId 2, SlotNo 0) + , (CoreNodeId 3, SlotNo $ fromIntegral (lateJoinSlot :: Int)) + ] + + numCoreNodes = NumCoreNodes 4 + + (testOutput, _) = + runThreadNet seed (NumSlots numSlots) numCoreNodes joinPlan + + nodeChains = + Chain.toOldestFirst . nodeOutputFinalChain <$> testOutput.testOutputNodes + in + -- The simulation must not throw (currently crashes in resolveLeiosBlock). + not (null nodeChains) + & counterexample "test output was empty" + & counterexample ("late join slot: " <> show lateJoinSlot) + where + numSlots = 200 :: Word64 + -- | Independently compute cumulative tx bytes by resolving each block in the -- chain (filling in EB closures from the LeiosDB) and summing individual -- 'sizeTxF' values per transaction. @@ -420,8 +459,9 @@ runThreadNet :: Seed -> NumSlots -> NumCoreNodes -> + NodeJoinPlan -> (TestOutput (CardanoBlock StandardCrypto), ProtocolInfo (CardanoBlock StandardCrypto)) -runThreadNet initSeed numSlots numCoreNodes = +runThreadNet initSeed numSlots numCoreNodes joinPlan = ( runTestNetwork testConfig testConfigB @@ -503,7 +543,7 @@ runThreadNet initSeed numSlots numCoreNodes = { forgeEbbEnv = Nothing , future = EraFinal slotLength shelleyGenesis.sgEpochLength , messageDelay = noCalcMessageDelay - , nodeJoinPlan = trivialNodeJoinPlan numCoreNodes + , nodeJoinPlan = joinPlan , nodeRestarts = noRestarts , txGenExtra = CardanoTxGenExtra From a2ea47598a3a742f0431723a6521937a058d9c66 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Wed, 13 May 2026 16:30:32 -0300 Subject: [PATCH 02/20] Add hbMayCertifiedEb to HeaderBody Add a Maybe LeiosPoint field to the Praos HeaderBody that records which EB a certifying ranking block certifies. The CBOR codec uses length-switching (10/11/12) to stay backwards compatible with non-Leios headers. Forging passes the certificate's EB point for CertRBs and Nothing for regular transaction blocks. The field propagates through HeaderView, mkHeader, and all construction sites (generators, examples). --- .../Consensus/Shelley/Ledger/Forge.hs | 2 ++ .../Consensus/Shelley/Protocol/Abstract.hs | 4 ++- .../Consensus/Shelley/Protocol/Praos.hs | 4 ++- .../Consensus/Shelley/Protocol/TPraos.hs | 2 +- .../Test/Consensus/Shelley/Examples.hs | 1 + .../Test/Consensus/Shelley/Generators.hs | 1 + .../Consensus/Protocol/Praos/Header.hs | 31 ++++++++++++------- .../Consensus/Protocol/Praos/Views.hs | 3 +- .../Protocol/Serialisation/Generators.hs | 1 + .../Consensus/Protocol/Praos/Header.hs | 1 + .../src/ouroboros-consensus/LeiosDemoTypes.hs | 14 ++++++++- 11 files changed, 48 insertions(+), 16 deletions(-) diff --git a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Ledger/Forge.hs b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Ledger/Forge.hs index d4699e2d90..a6d5f68494 100644 --- a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Ledger/Forge.hs +++ b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Ledger/Forge.hs @@ -94,6 +94,7 @@ forgeShelleyBlock hotKey cbl mayLeiosInfo ForgeBlockArgs{..} = do (SL.bodyBytesSize protocolVersion body) protocolVersion (snd <$> mayForgedEbAnn) + Nothing let blk = mkShelleyBlock $ @@ -116,6 +117,7 @@ forgeShelleyBlock hotKey cbl mayLeiosInfo ForgeBlockArgs{..} = do (SL.bodyBytesSize protocolVersion body) protocolVersion Nothing -- FIXME(bladyjoker): Skip announcement when certifying https://github.com/input-output-hk/ouroboros-leios/issues/838 + (Just $ Leios.leiosCertificateEbPoint cert) let blk = mkShelleyBlock $ SL.Block diff --git a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/Abstract.hs b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/Abstract.hs index 4395a1d7b9..08a548b61a 100644 --- a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/Abstract.hs +++ b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/Abstract.hs @@ -48,7 +48,7 @@ import Data.Kind (Type) import Data.Typeable (Typeable) import Data.Word (Word64) import GHC.Generics (Generic) -import LeiosDemoTypes (EbAnnouncement) +import LeiosDemoTypes (EbAnnouncement, LeiosPoint) import NoThunks.Class (NoThunks) import Numeric.Natural (Natural) import Ouroboros.Consensus.Protocol.Abstract @@ -165,6 +165,8 @@ class ProtocolHeaderSupportsKES proto where ProtVer -> -- | Leios EB announcement Maybe EbAnnouncement -> + -- | Leios certified EB + Maybe LeiosPoint -> m (ShelleyProtocolHeader proto) -- | ProtocolHeaderSupportsProtocol` provides support for the concrete diff --git a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/Praos.hs b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/Praos.hs index c04c7526c4..2a8306834b 100644 --- a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/Praos.hs +++ b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/Praos.hs @@ -144,7 +144,7 @@ instance PraosCrypto c => ProtocolHeaderSupportsKES (Praos c) where currentKesPeriod - startOfKesPeriod | otherwise = 0 - mkHeader hk cbl il slotNo blockNo prevHash bodyHash bodySize protVer mayEbAnnouncement = do + mkHeader hk cbl il slotNo blockNo prevHash bodyHash bodySize protVer mayEbAnnouncement mayCertifiedEb = do PraosFields{praosSignature, praosToSign} <- forgePraosFields hk cbl il mkBhBodyBytes pure $ Header praosToSign praosSignature where @@ -167,6 +167,7 @@ instance PraosCrypto c => ProtocolHeaderSupportsKES (Praos c) where , hbOCert = praosToSignOCert , hbProtVer = protVer , hbMayEbAnnouncement = mayEbAnnouncement + , hbMayCertifiedEb = mayCertifiedEb } instance PraosCrypto c => ProtocolHeaderSupportsProtocol (Praos c) where @@ -182,6 +183,7 @@ instance PraosCrypto c => ProtocolHeaderSupportsProtocol (Praos c) where , hvSigned = headerBody , hvSignature = headerSig , hvMayEbAnnouncement = hbMayEbAnnouncement headerBody + , hvMayCertifiedEb = hbMayCertifiedEb headerBody } pHeaderIssuer = hbVk . headerBody pHeaderIssueNo = SL.ocertN . hbOCert . headerBody diff --git a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/TPraos.hs b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/TPraos.hs index e86cd9188e..319eb12ffa 100644 --- a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/TPraos.hs +++ b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Protocol/TPraos.hs @@ -90,7 +90,7 @@ instance PraosCrypto c => ProtocolHeaderSupportsKES (TPraos c) where currentKesPeriod - startOfKesPeriod | otherwise = 0 - mkHeader hotKey canBeLeader isLeader curSlot curNo prevHash bbHash actualBodySize protVer _mayEbAnnouncement = do + mkHeader hotKey canBeLeader isLeader curSlot curNo prevHash bbHash actualBodySize protVer _mayEbAnnouncement _mayCertifiedEb = do TPraosFields{tpraosSignature, tpraosToSign} <- forgeTPraosFields hotKey canBeLeader isLeader mkBhBody pure $ SL.BHeader tpraosToSign tpraosSignature diff --git a/ouroboros-consensus-cardano/src/unstable-shelley-testlib/Test/Consensus/Shelley/Examples.hs b/ouroboros-consensus-cardano/src/unstable-shelley-testlib/Test/Consensus/Shelley/Examples.hs index 5eb9c64e62..7d76583e8e 100644 --- a/ouroboros-consensus-cardano/src/unstable-shelley-testlib/Test/Consensus/Shelley/Examples.hs +++ b/ouroboros-consensus-cardano/src/unstable-shelley-testlib/Test/Consensus/Shelley/Examples.hs @@ -283,6 +283,7 @@ fromShelleyLedgerExamplesPraos , hbOCert = SL.bheaderOCert bhBody , hbProtVer = SL.bprotver bhBody , hbMayEbAnnouncement = Nothing + , hbMayCertifiedEb = Nothing } hSig = coerce bhSig hash = ShelleyHash $ SL.unHashHeader sleHashHeader diff --git a/ouroboros-consensus-cardano/src/unstable-shelley-testlib/Test/Consensus/Shelley/Generators.hs b/ouroboros-consensus-cardano/src/unstable-shelley-testlib/Test/Consensus/Shelley/Generators.hs index 353a17af0d..45f0cd676e 100644 --- a/ouroboros-consensus-cardano/src/unstable-shelley-testlib/Test/Consensus/Shelley/Generators.hs +++ b/ouroboros-consensus-cardano/src/unstable-shelley-testlib/Test/Consensus/Shelley/Generators.hs @@ -145,6 +145,7 @@ instance , Praos.hbOCert = SL.bheaderOCert bhBody , Praos.hbProtVer = SL.bprotver bhBody , Praos.hbMayEbAnnouncement = Nothing + , Praos.hbMayCertifiedEb = Nothing } hSig = coerce bhSig diff --git a/ouroboros-consensus-protocol/src/ouroboros-consensus-protocol/Ouroboros/Consensus/Protocol/Praos/Header.hs b/ouroboros-consensus-protocol/src/ouroboros-consensus-protocol/Ouroboros/Consensus/Protocol/Praos/Header.hs index 208dbb5de8..ad78c52171 100644 --- a/ouroboros-consensus-protocol/src/ouroboros-consensus-protocol/Ouroboros/Consensus/Protocol/Praos/Header.hs +++ b/ouroboros-consensus-protocol/src/ouroboros-consensus-protocol/Ouroboros/Consensus/Protocol/Praos/Header.hs @@ -5,6 +5,7 @@ {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TupleSections #-} {-# LANGUAGE TypeApplications #-} -- | Block header associated with Praos. @@ -70,7 +71,7 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as BSL import Data.Word (Word32) import GHC.Generics (Generic) -import LeiosDemoTypes (EbAnnouncement) +import LeiosDemoTypes (EbAnnouncement, LeiosPoint) import NoThunks.Class (AllowThunksIn (..), NoThunks (..)) import Ouroboros.Consensus.Protocol.Praos.VRF (InputVRF) @@ -99,6 +100,10 @@ data HeaderBody crypto = HeaderBody -- ^ protocol version , hbMayEbAnnouncement :: Maybe EbAnnouncement -- ^ Leios EB announcement + , hbMayCertifiedEb :: Maybe LeiosPoint + -- ^ The EB certified by this block's certificate, if any. + -- See CIP-0164 §"Ranking Blocks" for how certified EBs + -- link ranking blocks to their EB transaction closures. } deriving Generic @@ -203,13 +208,14 @@ instance Crypto crypto => EncCBOR (HeaderBody crypto) where , hbOCert , hbProtVer , hbMayEbAnnouncement + , hbMayCertifiedEb } = - let (len, encEbAnnouncement) = case hbMayEbAnnouncement of -- TODO(bladyjoker): This shennanigans is here for backwards compatibility, remove it! - Nothing -> (10, mempty) - Just ebAnnouncement -> - ( 11 - , encCBOR ebAnnouncement - ) + -- TODO(bladyjoker): This shennanigans is here for backwards compatibility, remove it! + let (len, encLeiosFields) = case (hbMayEbAnnouncement, hbMayCertifiedEb) of + (Nothing, Nothing) -> (10, mempty) + (Just ebAnn, Nothing) -> (11, encCBOR ebAnn) + (mayEbAnn, Just certEb) -> + (12, encCBOR mayEbAnn <> encCBOR certEb) in mconcat [ encodeListLen len , encCBOR hbBlockNo @@ -222,7 +228,7 @@ instance Crypto crypto => EncCBOR (HeaderBody crypto) where , encCBOR hbBodyHash , encCBOR hbOCert , encCBOR hbProtVer - , encEbAnnouncement + , encLeiosFields ] instance Crypto crypto => DecCBOR (HeaderBody crypto) where @@ -238,9 +244,11 @@ instance Crypto crypto => DecCBOR (HeaderBody crypto) where hbBodyHash <- decCBOR hbOCert <- unCBORGroup <$> decCBOR hbProtVer <- decCBOR - hbMayEbAnnouncement <- case len of - 10 -> return Nothing - 11 -> Just <$> decCBOR @EbAnnouncement -- TODO(bladyjoker): This shennanigans is here for backwards compatibility, remove it! + -- TODO(bladyjoker): This shennanigans is here for backwards compatibility, remove it! + (hbMayEbAnnouncement, hbMayCertifiedEb) <- case len of + 10 -> return (Nothing, Nothing) + 11 -> (,Nothing) . Just <$> decCBOR @EbAnnouncement + 12 -> (,) <$> decCBOR @(Maybe EbAnnouncement) <*> (Just <$> decCBOR @LeiosPoint) _ -> fail $ "Praos HeaderBody CBOR has wrong length: " <> show len return $ HeaderBody @@ -255,6 +263,7 @@ instance Crypto crypto => DecCBOR (HeaderBody crypto) where , hbOCert , hbProtVer , hbMayEbAnnouncement + , hbMayCertifiedEb } encodeHeaderRaw :: diff --git a/ouroboros-consensus-protocol/src/ouroboros-consensus-protocol/Ouroboros/Consensus/Protocol/Praos/Views.hs b/ouroboros-consensus-protocol/src/ouroboros-consensus-protocol/Ouroboros/Consensus/Protocol/Praos/Views.hs index cae725449e..a36d2c4e33 100644 --- a/ouroboros-consensus-protocol/src/ouroboros-consensus-protocol/Ouroboros/Consensus/Protocol/Praos/Views.hs +++ b/ouroboros-consensus-protocol/src/ouroboros-consensus-protocol/Ouroboros/Consensus/Protocol/Praos/Views.hs @@ -15,7 +15,7 @@ import Cardano.Protocol.TPraos.BHeader (PrevHash) import Cardano.Protocol.TPraos.OCert (OCert) import Cardano.Slotting.Slot (SlotNo) import Data.Word (Word16, Word32) -import LeiosDemoTypes (EbAnnouncement) +import LeiosDemoTypes (EbAnnouncement, LeiosPoint) import Ouroboros.Consensus.Protocol.Praos.Header (HeaderBody) import Ouroboros.Consensus.Protocol.Praos.VRF (InputVRF) @@ -38,6 +38,7 @@ data HeaderView crypto = HeaderView , hvSignature :: !(SignedKES (KES crypto) (HeaderBody crypto)) -- ^ KES Signature of the header , hvMayEbAnnouncement :: !(Maybe EbAnnouncement) + , hvMayCertifiedEb :: !(Maybe LeiosPoint) } data LedgerView = LedgerView diff --git a/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Consensus/Protocol/Serialisation/Generators.hs b/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Consensus/Protocol/Serialisation/Generators.hs index e92e1b2c2c..518019a8c5 100644 --- a/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Consensus/Protocol/Serialisation/Generators.hs +++ b/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Consensus/Protocol/Serialisation/Generators.hs @@ -62,6 +62,7 @@ instance Praos.PraosCrypto c => Arbitrary (HeaderBody c) where <*> ocert <*> arbitrary <*> arbitrary + <*> pure Nothing instance Praos.PraosCrypto c => Arbitrary (Header c) where arbitrary = do diff --git a/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Ouroboros/Consensus/Protocol/Praos/Header.hs b/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Ouroboros/Consensus/Protocol/Praos/Header.hs index f474443a8e..133a755a6f 100644 --- a/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Ouroboros/Consensus/Protocol/Praos/Header.hs +++ b/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Ouroboros/Consensus/Protocol/Praos/Header.hs @@ -446,6 +446,7 @@ genHeaderBody context = do hbBodyHash <- genHash (hbOCert, kesPeriod) <- genCert hbSlotNo context let hbMayEbAnnouncement = Nothing + hbMayCertifiedEb = Nothing let hbProtVer = protocolVersionZero headerBody = HeaderBody{..} pure $ (headerBody, kesPeriod) diff --git a/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoTypes.hs b/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoTypes.hs index 312dcd1746..92238b20d7 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoTypes.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoTypes.hs @@ -19,7 +19,8 @@ module LeiosDemoTypes (module LeiosDemoTypes) where import Cardano.Binary (FromCBOR (fromCBOR), ToCBOR, enforceSize, serialize', toCBOR) import qualified Cardano.Crypto.Hash as Hash -import Cardano.Ledger.Binary (DecCBOR, EncCBOR) +import Cardano.Ledger.Binary (DecCBOR (..), EncCBOR (..)) +import qualified Cardano.Ledger.Binary as LedgerBinary import Cardano.Ledger.Core (EraTx, Tx) import Cardano.Prelude (NFData, NonEmpty, toList, toString, (&)) import Cardano.Slotting.Slot (SlotNo (SlotNo)) @@ -128,6 +129,17 @@ decodeLeiosPoint = do enforceSize (fromString "LeiosPoint") 2 MkLeiosPoint <$> decode <*> decodeEbHash +instance EncCBOR LeiosPoint where + encCBOR (MkLeiosPoint ebSlot ebHash) = + LedgerBinary.encodeListLen 2 + <> encCBOR ebSlot + <> encCBOR ebHash + +instance DecCBOR LeiosPoint where + decCBOR = do + LedgerBinary.decodeListLenOf 2 + MkLeiosPoint <$> decCBOR <*> decCBOR + -- | Types used in Praos headers data EbAnnouncement = EbAnnouncement { ebAnnouncementHash :: EbHash From af9ce50ecaa595eec58ab9764fdfd95c65d44841 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Wed, 13 May 2026 17:36:40 -0300 Subject: [PATCH 03/20] Filter pending CertRBs from ChainSel When a CertRB arrives whose EB closure is not in the LeiosDB, record it in cdbPendingEBs and skip chain selection. Subsequent chain selections filter pending hashes from both lookupBlockInfo (predecessor tracing) and succsOf (successor enumeration), making the CertRB invisible until its EB closure arrives. Adds certifiedEbFromHeader to ResolveLeiosBlock so ChainSel can inspect the header without reaching into block-type-specific layers. --- .../Ouroboros/Consensus/Cardano/Block.hs | 8 ++- .../Consensus/Shelley/Ledger/Ledger.hs | 8 +++ .../Consensus/Storage/ChainDB/Impl.hs | 2 + .../Storage/ChainDB/Impl/Background.hs | 3 ++ .../Storage/ChainDB/Impl/ChainSel.hs | 51 ++++++++++++++++--- .../Consensus/Storage/ChainDB/Impl/Types.hs | 6 +++ .../Consensus/Storage/LedgerDB/Forker.hs | 7 +++ 7 files changed, 76 insertions(+), 9 deletions(-) diff --git a/ouroboros-consensus-cardano/src/ouroboros-consensus-cardano/Ouroboros/Consensus/Cardano/Block.hs b/ouroboros-consensus-cardano/src/ouroboros-consensus-cardano/Ouroboros/Consensus/Cardano/Block.hs index 99af2ee481..93b1201354 100644 --- a/ouroboros-consensus-cardano/src/ouroboros-consensus-cardano/Ouroboros/Consensus/Cardano/Block.hs +++ b/ouroboros-consensus-cardano/src/ouroboros-consensus-cardano/Ouroboros/Consensus/Cardano/Block.hs @@ -215,7 +215,9 @@ import Ouroboros.Consensus.Protocol.Praos (Praos, PraosCrypto) import Ouroboros.Consensus.Protocol.TPraos (TPraos) import Ouroboros.Consensus.Shelley.Eras import Ouroboros.Consensus.Shelley.Ledger (ShelleyBlock, ShelleyCompatible) -import Ouroboros.Consensus.Storage.LedgerDB (ResolveLeiosBlock (resolveLeiosBlock)) +import Ouroboros.Consensus.Storage.LedgerDB + ( ResolveLeiosBlock (certifiedEbFromHeader, resolveLeiosBlock) + ) import Ouroboros.Consensus.TypeFamilyWrappers {------------------------------------------------------------------------------- @@ -1403,3 +1405,7 @@ instance injectConwayBlock <$> resolveLeiosBlock db conwayHdrSt conwayBlk resolveLeiosBlock _ _ blk = return blk + + certifiedEbFromHeader (HeaderConway conwayHdr) = + certifiedEbFromHeader conwayHdr + certifiedEbFromHeader _ = Nothing diff --git a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Ledger/Ledger.hs b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Ledger/Ledger.hs index 0f61f331d5..ab2ea25fde 100644 --- a/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Ledger/Ledger.hs +++ b/ouroboros-consensus-cardano/src/shelley/Ouroboros/Consensus/Shelley/Ledger/Ledger.hs @@ -149,6 +149,10 @@ import Ouroboros.Consensus.Protocol.Praos , PraosState (praosStateLastSlot, praosStateLeios) , withOriginToSlotNo ) +import Ouroboros.Consensus.Protocol.Praos.Header + ( Header (headerBody) + , HeaderBody (hbMayCertifiedEb) + ) import Ouroboros.Consensus.Protocol.TPraos (TPraos) import Ouroboros.Consensus.Shelley.Ledger.Block import Ouroboros.Consensus.Shelley.Ledger.Config @@ -158,6 +162,7 @@ import Ouroboros.Consensus.Shelley.Protocol.Abstract , envelopeChecks , mkHeaderView ) +import Ouroboros.Consensus.Shelley.Protocol.Praos () import Ouroboros.Consensus.Storage.LedgerDB import Ouroboros.Consensus.Util.CBOR ( decodeWithOrigin @@ -372,6 +377,9 @@ instance } resolveLeiosBlock _leiosDb _hdrSt blk = return blk + certifiedEbFromHeader (ShelleyHeader rawHdr _) = + hbMayCertifiedEb (headerBody rawHdr) + deserialiseShelleyTx :: forall era. ShelleyBasedEra era => BS.ByteString -> Core.Tx era deserialiseShelleyTx bs = case CB.decodeFullAnnotator (Core.eraProtVerLow @era) "Leios Tx" CB.decCBOR (BL.fromStrict bs) of Left err -> error $ "Failed to deserialise Tx era: " <> show err diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index 72668e65d6..f4cccf860e 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -229,6 +229,7 @@ openDBInternal leiosDb args launchBgTasks = runWithTempRegistry $ do chainSelFuse <- newFuse "chain selection" chainSelQueue <- newChainSelQueue (Args.cdbsBlocksToAddSize cdbSpecificArgs) varChainSelStarvation <- newTVarIO ChainSelStarvationOngoing + varPendingEBs <- newTVarIO Map.empty let env = CDB @@ -254,6 +255,7 @@ openDBInternal leiosDb args launchBgTasks = runWithTempRegistry $ do , cdbChainSelQueue = chainSelQueue , cdbLoE = Args.cdbsLoE cdbSpecificArgs , cdbChainSelStarvation = varChainSelStarvation + , cdbPendingEBs = varPendingEBs } h <- fmap CDBHandle $ newTVarIO $ ChainDbOpen env let chainDB = diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index ac369c143f..b3a59609ab 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -72,6 +72,7 @@ import Ouroboros.Consensus.Storage.ChainDB.Impl.ChainSel ) import Ouroboros.Consensus.Storage.ChainDB.Impl.Types import qualified Ouroboros.Consensus.Storage.ImmutableDB as ImmutableDB +import Ouroboros.Consensus.Storage.LedgerDB (ResolveLeiosBlock) import qualified Ouroboros.Consensus.Storage.LedgerDB as LedgerDB import qualified Ouroboros.Consensus.Storage.VolatileDB as VolatileDB import Ouroboros.Consensus.Util @@ -92,6 +93,7 @@ launchBgTasks :: , BlockSupportsDiffusionPipelining blk , InspectLedger blk , HasHardForkHistory blk + , ResolveLeiosBlock blk ) => LeiosDbHandle m -> ChainDbEnv m blk -> @@ -520,6 +522,7 @@ addBlockRunner :: , InspectLedger blk , HasHardForkHistory blk , HasCallStack + , ResolveLeiosBlock blk ) => LeiosDbHandle m -> Fuse m -> diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs index 7b875fa320..37cb039875 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs @@ -24,7 +24,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.ChainSel import Cardano.Ledger.BaseTypes (unNonZero) import Control.Exception (assert) -import Control.Monad (forM, forM_, when) +import Control.Monad (forM, forM_, unless, when) import Control.Monad.Except () import Control.Monad.Trans.Class (lift) import Control.Monad.Trans.State.Strict @@ -43,7 +43,7 @@ import Data.Maybe.Strict (StrictMaybe (..), strictMaybeToMaybe) import Data.Set (Set) import qualified Data.Set as Set import GHC.Stack (HasCallStack) -import LeiosDemoDb (LeiosDbConnection) +import LeiosDemoDb (LeiosDbConnection (leiosDbQueryCompletedEbByPoint)) import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config import Ouroboros.Consensus.Fragment.Diff (ChainDiff (..)) @@ -348,6 +348,7 @@ chainSelSync :: , InspectLedger blk , HasHardForkHistory blk , HasCallStack + , ResolveLeiosBlock blk ) => LeiosDbConnection m -> ChainDbEnv m blk -> @@ -426,7 +427,25 @@ chainSelSync leiosDb cdb@CDB{..} (ChainSelAddBlock BlockToAdd{blockToAdd = b, .. encloseWith (traceEv >$< addBlockTracer) $ VolatileDB.putBlock cdbVolatileDB b lift $ deliverWrittenToDisk True - chainSelectionForBlock leiosDb cdb (BlockCache.singleton b) hdr blockPunish + + -- If this block certifies an EB whose closure we don't have yet, + -- record it as pending and skip chain selection — it would be + -- filtered out anyway. ChainSel will be re-triggered when the + -- EB closure arrives (step 3). + isPending <- lift $ case certifiedEbFromHeader hdr of + Nothing -> pure False + Just leiosPoint -> do + mayClosure <- leiosDbQueryCompletedEbByPoint leiosDb leiosPoint + case mayClosure of + Just _ -> pure False + Nothing -> do + atomically $ + modifyTVar cdbPendingEBs $ + Map.insert leiosPoint (headerHash hdr) + pure True + + unless isPending $ + chainSelectionForBlock leiosDb cdb (BlockCache.singleton b) hdr blockPunish newTip <- lift $ atomically $ Query.getTipPoint cdb @@ -539,14 +558,15 @@ chainSelectionForBlock :: InvalidBlockPunishment m -> Electric m () chainSelectionForBlock leiosDb cdb@CDB{..} blockCache hdr punish = electric $ withRegistry $ \rr -> do - (invalid, succsOf, lookupBlockInfo, curChain, tipPoint) <- + (invalid, succsOf, lookupBlockInfo, curChain, tipPoint, pendingHashes) <- atomically $ - (,,,,) + (,,,,,) <$> (forgetFingerprint <$> readTVar cdbInvalid) <*> VolatileDB.filterByPredecessor cdbVolatileDB <*> VolatileDB.getBlockInfo cdbVolatileDB <*> Query.getCurrentChain cdb <*> Query.getTipPoint cdb + <*> (Set.fromList . Map.elems <$> readTVar cdbPendingEBs) -- This is safe: the LedgerDB tip doesn't change in between the previous -- atomically block and this call to 'withTipForker'. LedgerDB.withTipForker cdbLedgerDB rr $ \curForker -> do @@ -561,9 +581,13 @@ chainSelectionForBlock leiosDb cdb@CDB{..} blockCache hdr punish = electric $ wi immBlockNo :: WithOrigin BlockNo immBlockNo = AF.anchorBlockNo curChain - -- Let these two functions ignore invalid blocks - lookupBlockInfo' = ignoreInvalid cdb invalid lookupBlockInfo - succsOf' = ignoreInvalidSuc cdb invalid succsOf + -- Let these two functions ignore invalid blocks and pending CertRBs + lookupBlockInfo' = + ignorePending pendingHashes $ + ignoreInvalid cdb invalid lookupBlockInfo + succsOf' = + Set.filter (`Set.notMember` pendingHashes) + . ignoreInvalidSuc cdb invalid succsOf -- The preconditions assert (isJust $ lookupBlockInfo (headerHash hdr)) $ return () @@ -1416,3 +1440,14 @@ ignoreInvalidSuc :: (ChainHash blk -> Set (HeaderHash blk)) ignoreInvalidSuc _ invalid succsOf = Set.filter (`Map.notMember` invalid) . succsOf + +-- | Wrap a lookup function so that pending CertRBs (whose EB closure +-- hasn't arrived yet) are invisible to chain selection. +ignorePending :: + Ord h => + Set h -> + (h -> Maybe a) -> + (h -> Maybe a) +ignorePending pending getter hash + | Set.member hash pending = Nothing + | otherwise = getter hash diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 37ef98fe9d..452edcc765 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -92,6 +92,7 @@ import Data.Typeable import Data.Void (Void) import Data.Word (Word64) import GHC.Generics (Generic) +import LeiosDemoTypes (LeiosPoint) import NoThunks.Class (OnlyCheckWhnfNamed (..)) import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config @@ -350,6 +351,11 @@ data ChainDbEnv m blk = CDB , cdbChainSelStarvation :: !(StrictTVar m ChainSelStarvation) -- ^ Information on the last starvation of ChainSel, whether ongoing or -- ended recently. + , cdbPendingEBs :: !(StrictTVar m (Map LeiosPoint (HeaderHash blk))) + -- ^ CertRBs whose EB closure hasn't been fetched yet. Keyed by the + -- missing EB's 'LeiosPoint', value is the CertRB's header hash. + -- ChainSel filters these out of successor candidates until the EB + -- closure arrives. } deriving Generic diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/Forker.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/Forker.hs index b7cbecc549..c07a2062bb 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/Forker.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/Forker.hs @@ -74,6 +74,7 @@ import qualified Data.Set as Set import Data.Word import GHC.Generics import LeiosDemoDb (LeiosDbConnection) +import LeiosDemoTypes (LeiosPoint) import NoThunks.Class import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config @@ -441,6 +442,12 @@ class ResolveLeiosBlock blk where Monad m => LeiosDbConnection m -> HeaderState blk -> blk -> m blk resolveLeiosBlock _ _ blk = return blk + -- | Extract the certified EB point from a header, if this block + -- certifies an EB. Used by ChainSel to defer blocks whose EB + -- closure hasn't been fetched yet. + certifiedEbFromHeader :: Header blk -> Maybe LeiosPoint + certifiedEbFromHeader _ = Nothing + -- | Apply blocks to the given forker applyBlock :: forall m bm c l blk. From ec16b11ba0503d39facee6795f5924c5116be729 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Thu, 14 May 2026 12:44:56 -0300 Subject: [PATCH 04/20] Add chain-consistency assertion to prop_leios_late_join Assert that all nodes converge to the same chain. Fails as expected: the late node's chain is shorter (1 block vs 10) because CertRBs with missing EB closures are permanently excluded from ChainSel. --- .../test/cardano-test/Test/ThreadNet/Leios.hs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs b/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs index 125ed845ed..ad012cacc5 100644 --- a/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs +++ b/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs @@ -366,9 +366,18 @@ prop_leios_late_join seed = nodeChains = Chain.toOldestFirst . nodeOutputFinalChain <$> testOutput.testOutputNodes in - -- The simulation must not throw (currently crashes in resolveLeiosBlock). - not (null nodeChains) - & counterexample "test output was empty" + -- The simulation must not throw (fixed in step 2). + conjoin + [ not (null nodeChains) + & counterexample "test output was empty" + , -- All nodes should converge to the same chain. Fails after step 2 + -- because CertRBs with missing closures are permanently excluded, + -- making the late node's chain shorter. Step 3 (ChainSel re-trigger + -- on EB arrival) fixes this. + all (== head (Map.elems nodeChains)) nodeChains + & counterexample "nodes have different chains" + & counterexample ("chain lengths: " <> show (fmap length nodeChains)) + ] & counterexample ("late join slot: " <> show lateJoinSlot) where numSlots = 200 :: Word64 From 75ef78bb7fba5ecd8faa9da1ccacc648eafe11eb Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Thu, 14 May 2026 16:10:10 -0300 Subject: [PATCH 05/20] Re-trigger ChainSel when EB closure arrives Add ChainSelReprocessBlock message type that re-runs chain selection for a single CertRB whose EB closure was previously missing. A new background thread (ebCompletionRunner) subscribes to LeiosDB notifications and enqueues ChainSelReprocessBlock when an EB becomes complete. The chain-consistency assertion still fails: the re-trigger fires correctly but most EB closures never complete on the late node because the fetch mechanism doesn't deliver historical EB bodies and txs. --- .../Storage/ChainDB/Impl/Background.hs | 41 ++++++++++++++++++- .../Storage/ChainDB/Impl/ChainSel.hs | 11 +++++ .../Consensus/Storage/ChainDB/Impl/Types.hs | 28 +++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index b3a59609ab..6681d12784 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -56,7 +56,8 @@ import Data.Void (Void) import Data.Word import GHC.Generics (Generic) import GHC.Stack (HasCallStack) -import LeiosDemoDb (LeiosDbHandle (..)) +import Control.Concurrent.Class.MonadSTM.Strict (readTChan) +import LeiosDemoDb (LeiosDbConnection (..), LeiosDbHandle (..), LeiosEbNotification (..)) import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config import Ouroboros.Consensus.HardFork.Abstract @@ -104,6 +105,9 @@ launchBgTasks leiosDb cdb@CDB{..} replayed = do !addBlockThread <- launch "ChainDB.addBlockRunner" $ addBlockRunner leiosDb cdbChainSelFuse cdb + !ebCompletionThread <- + launch "ChainDB.ebCompletionRunner" $ + ebCompletionRunner leiosDb cdb gcSchedule <- newGcSchedule !gcThread <- launch "ChainDB.gcScheduleRunner" $ @@ -114,7 +118,7 @@ launchBgTasks leiosDb cdb@CDB{..} replayed = do copyAndSnapshotRunner cdb gcSchedule replayed cdbCopyFuse atomically $ writeTVar cdbKillBgThreads $ - sequence_ [addBlockThread, gcThread, copyAndSnapshotThread] + sequence_ [addBlockThread, ebCompletionThread, gcThread, copyAndSnapshotThread] where launch :: String -> m Void -> m (m ()) launch = fmap cancelThread .: forkLinkedThread cdbRegistry @@ -542,6 +546,8 @@ addBlockRunner leiosDb fuse cdb@CDB{..} = do case message of ChainSelReprocessLoEBlocks varProcessed -> void $ tryPutTMVar varProcessed () + ChainSelReprocessBlock _ varProcessed -> + void $ tryPutTMVar varProcessed () ChainSelAddBlock BlockToAdd{varBlockWrittenToDisk, varBlockProcessed} -> do _ <- tryPutTMVar @@ -558,6 +564,8 @@ addBlockRunner leiosDb fuse cdb@CDB{..} = do lift $ case message of ChainSelReprocessLoEBlocks _ -> trace PoppedReprocessLoEBlocksFromQueue + ChainSelReprocessBlock _ _ -> + trace PoppedReprocessLoEBlocksFromQueue ChainSelAddBlock BlockToAdd{blockToAdd} -> trace $ PoppedBlockFromQueue $ @@ -568,3 +576,32 @@ addBlockRunner leiosDb fuse cdb@CDB{..} = do ) where starvationTracer = Tracer $ traceWith cdbTracer . TraceChainSelStarvationEvent + +-- | Monitor LeiosDB for EB completions and re-trigger ChainSel for +-- CertRBs whose EB closure was previously missing. +-- +-- When a CertRB arrives with a missing EB closure, 'chainSelSync' +-- records it in 'cdbPendingEBs' and skips chain selection. This +-- thread watches for the closure to appear in the LeiosDB, then +-- enqueues a 'ChainSelReprocessBlock' so the CertRB can be selected. +ebCompletionRunner :: + IOLike m => + LeiosDbHandle m -> + ChainDbEnv m blk -> + m Void +ebCompletionRunner leiosDb CDB{..} = do + notifChan <- subscribeEbNotifications leiosDb + leiosConn <- open leiosDb + forever $ do + notif <- atomically $ readTChan notifChan + let leiosPoint = case notif of + AcquiredEb point _ -> point + AcquiredEbTxs point -> point + pending <- atomically $ readTVar cdbPendingEBs + case Map.lookup leiosPoint pending of + Nothing -> pure () + Just certRBHash -> do + mayComplete <- leiosDbQueryCompletedEbByPoint leiosConn leiosPoint + case mayComplete of + Nothing -> pure () + Just _ -> void $ addReprocessBlock cdbChainSelQueue certRBHash diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs index 37cb039875..f7c65a79b7 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs @@ -389,6 +389,17 @@ chainSelSync leiosDb cdb@CDB{..} (ChainSelReprocessLoEBlocks varProcessed) = do for_ loeHeaders $ \hdr -> chainSelectionForBlock leiosDb cdb BlockCache.empty hdr noPunishment lift $ atomically $ putTMVar varProcessed () +-- Re-trigger chain selection for a CertRB whose EB closure has arrived. +-- Remove it from the pending set so 'chainSelectionForBlock' no longer +-- filters it out, then run chain selection for that block. +chainSelSync leiosDb cdb@CDB{..} (ChainSelReprocessBlock hash varProcessed) = do + lift $ + atomically $ + modifyTVar cdbPendingEBs $ + Map.filter (/= hash) + hdr <- lift $ VolatileDB.getKnownBlockComponent cdbVolatileDB GetHeader hash + chainSelectionForBlock leiosDb cdb BlockCache.empty hdr noPunishment + lift $ atomically $ putTMVar varProcessed () chainSelSync leiosDb cdb@CDB{..} (ChainSelAddBlock BlockToAdd{blockToAdd = b, ..}) = do (isMember, invalid, curChain) <- lift $ diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 452edcc765..74901cf6f4 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -55,6 +55,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types , ChainSelMessage (..) , ChainSelQueue -- opaque , addBlockToAdd + , addReprocessBlock , addReprocessLoEBlocks , closeChainSelQueue , getChainSelMessage @@ -553,6 +554,14 @@ data ChainSelMessage m blk ChainSelReprocessLoEBlocks -- | Used for 'ChainSelectionPromise'. !(StrictTMVar m ()) + | -- | Reprocess a single block whose EB closure has arrived. + -- Used by the late-join mechanism: when a CertRB's missing EB + -- closure becomes available, this message re-triggers ChainSel + -- for that specific block. + ChainSelReprocessBlock + !(HeaderHash blk) + -- Used for 'ChainSelectionPromise'. + !(StrictTMVar m ()) -- | Create a new 'ChainSelQueue' with the given size. newChainSelQueue :: (IOLike m, StandardHash blk, Typeable blk) => Word -> m (ChainSelQueue m blk) @@ -612,6 +621,21 @@ addReprocessLoEBlocks tracer ChainSelQueue{varChainSelQueue} = do ChainSelReprocessLoEBlocks varProcessed return $ ChainSelectionPromise waitUntilRan +-- | Re-trigger chain selection for a single block whose EB closure has +-- arrived. Modelled on 'addReprocessLoEBlocks'. +addReprocessBlock :: + IOLike m => + ChainSelQueue m blk -> + HeaderHash blk -> + m (ChainSelectionPromise m) +addReprocessBlock ChainSelQueue{varChainSelQueue} hash = do + varProcessed <- newEmptyTMVarIO + let waitUntilRan = atomically $ readTMVar varProcessed + atomically $ + writeTBQueue varChainSelQueue $ + ChainSelReprocessBlock hash varProcessed + return $ ChainSelectionPromise waitUntilRan + -- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the -- queue is empty; in that case, reports the starvation (and its end) via the -- given tracer. @@ -649,6 +673,7 @@ getChainSelMessage starvationTracer starvationVar chainSelQueue = traceWith starvationTracer $ ChainSelStarvation (FallingEdgeWith pt) atomically . writeTVar starvationVar . ChainSelStarvationEndedAt =<< getMonotonicTime ChainSelReprocessLoEBlocks{} -> pure () + ChainSelReprocessBlock{} -> pure () -- TODO Can't use tryReadTBQueue from io-classes because it is broken for IOSim -- (but not for IO). https://github.com/input-output-hk/io-sim/issues/195 @@ -670,6 +695,7 @@ closeChainSelQueue ChainSelQueue{varChainSelQueue = queue} = do blockAdd = \case ChainSelAddBlock ab -> Just ab ChainSelReprocessLoEBlocks _ -> Nothing + ChainSelReprocessBlock _ _ -> Nothing -- | To invoke when the given 'ChainSelMessage' has been processed by ChainSel. -- This is used to remove the respective point from the multiset of points in @@ -684,6 +710,8 @@ processedChainSelMessage ChainSelQueue{varChainSelPoints} = \case modifyTVar varChainSelPoints $ MultiSet.delete (blockRealPoint blk) ChainSelReprocessLoEBlocks{} -> pure () + ChainSelReprocessBlock{} -> + pure () -- | Return a function to test the membership memberChainSelQueue :: From 525a3ef357cecec99082cc69b9bc6585736b24dc Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Thu, 14 May 2026 19:38:31 -0300 Subject: [PATCH 06/20] Fetch missing CertRB closures on late-joining nodes When ChainSel filters a CertRB because its EB closure is missing, drive a fetch through LeiosFetch using each peer's ChainSync candidate fragment as a fallback peer source. * Expose cdbPendingEBs via ChainDB.getPendingCertRBs. * pendingEbReconciler in NodeKernel mirrors the pending set into Leios missingEbBodies with size 0; it never overwrites offer-supplied sizes and only removes its own size-0 entries. * leiosFetchLogic walks per-peer ChainSync candidate fragments, extracts certified EB hashes via certifiedEbFromHeader, and passes a per-peer Set EbHash to leiosFetchLogicIteration. * choosePeerEb and choosePeerTx fall back to candidate-derived peers when no peer has offered the EB body / tx-closure. A peer whose candidate contains the CertRB must have validated the closure locally, so it must also hold both the body and the txs. * Relax the response-size check in msgLeiosBlock when the expected size is 0; the hash check remains authoritative. --- .../Ouroboros/Consensus/NodeKernel.hs | 57 ++++++++++++- .../src/ouroboros-consensus/LeiosDemoLogic.hs | 79 ++++++++++++------- .../Consensus/Storage/ChainDB/API.hs | 6 ++ .../Consensus/Storage/ChainDB/Impl.hs | 1 + .../Consensus/Storage/ChainDB/Impl/Query.hs | 10 +++ 5 files changed, 124 insertions(+), 29 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index e80f057768..09c6f44f40 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -56,7 +56,7 @@ import Data.Functor ((<&>)) import Data.Hashable (Hashable) import Data.List.NonEmpty (NonEmpty) import qualified Data.List.NonEmpty as NE -import Data.Maybe (isJust, mapMaybe) +import Data.Maybe (fromMaybe, isJust, mapMaybe) import Data.Proxy import qualified Data.Text as Text import Data.Void (Void) @@ -165,6 +165,8 @@ import Control.Concurrent.Class.MonadSTM.Strict (readTChan) import qualified Data.ByteString as BS import Data.Map (Map) import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set import LeiosDemoDb ( LeiosDbConnection , LeiosDbHandle (..) @@ -176,9 +178,11 @@ import LeiosDemoDb import qualified LeiosDemoDb as LeiosDb import qualified LeiosDemoLogic as Leios import LeiosDemoTypes - ( ForgedLeiosEb + ( EbHash + , ForgedLeiosEb , LeiosOutstanding , LeiosPeerVars + , LeiosPoint (..) , LeiosVote (..) , TraceLeiosKernel (..) , VoterId (..) @@ -419,6 +423,39 @@ initNodeKernel getLeiosOutstanding <- MVar.newMVar Leios.emptyLeiosOutstanding -- TODO init from DB getLeiosReady <- MVar.newEmptyMVar + -- Mirror cdbPendingEBs into Leios missingEbBodies with size 0 so that the + -- LeiosFetch client tries to fetch closures for CertRBs that ChainSel is + -- holding back. + lastAppliedPendingEBs <- newTVarIO Map.empty + void $ + forkLinkedThread registry "NodeKernel.pendingEbReconciler" $ + forever $ do + (added, removed) <- atomically $ do + new <- ChainDB.getPendingCertRBs chainDB + old <- readTVar lastAppliedPendingEBs + when (new == old) retry + writeTVar lastAppliedPendingEBs new + pure (Map.difference new old, Map.difference old new) + MVar.modifyMVar_ getLeiosOutstanding $ \outstanding -> do + let bodies0 = Leios.missingEbBodies outstanding + -- Add entries from new pending CertRBs; never overwrite an + -- existing non-zero (offer-supplied) size. + bodies1 = + Map.foldlWithKey' + (\acc point _ -> Map.alter (Just . fromMaybe 0) point acc) + bodies0 + added + -- Drop entries we no longer need; keep offer-driven entries + -- (non-zero size) untouched. + bodies2 = + Map.foldlWithKey' + (\acc point _ -> + Map.update (\sz -> if sz == 0 then Nothing else Just sz) point acc) + bodies1 + removed + pure outstanding{Leios.missingEbBodies = bodies2} + void $ MVar.tryPutMVar getLeiosReady () + void $ forkLinkedThread registry "NodeKernel.leiosFetchLogic" $ do leiosConn <- allocate_ registry (LeiosDb.open leiosDB) LeiosDb.close @@ -429,6 +466,21 @@ initNodeKernel iterationStart <- getMonotonicTime leiosPeersVars <- MVar.readMVar getLeiosPeersVars offerings <- mapM (MVar.readMVar . Leios.offerings) leiosPeersVars + -- Per-peer certified EBs derived from ChainSync candidate fragments, + -- used as a fallback peer source for fetching EB closures that no + -- peer has explicitly offered. + candidateCertEbs <- atomically $ do + handles <- cschcMap varChainSyncHandles + fmap (Map.mapKeysMonotonic Leios.MkPeerId) $ + forM handles $ \handle -> do + state <- readTVar (cschState handle) + pure $ + Set.fromList + [ pointEbHash + | hwt <- AF.toOldestFirst (csCandidate state) + , Just MkLeiosPoint{pointEbHash} <- + [LedgerDB.certifiedEbFromHeader (hwtHeader hwt)] + ] newDecisions <- MVar.modifyMVar getLeiosOutstanding $ \outstanding -> do -- Filter outstanding work against DB before running fetch iteration. -- This removes EBs and TXs we already have (e.g., from forging or other peers). @@ -438,6 +490,7 @@ initNodeKernel Leios.leiosFetchLogicIteration Leios.demoLeiosFetchStaticEnv offerings + candidateCertEbs filteredOutstanding pure (outstanding', newDecisions) traceWith tracer $ MkTraceLeiosKernel $ "leiosFetchLogic: decided" diff --git a/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoLogic.hs b/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoLogic.hs index bba89278b8..1bbb9d5454 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoLogic.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoLogic.hs @@ -262,9 +262,13 @@ leiosFetchLogicIteration :: Ord pid => LeiosFetchStaticEnv -> Map (PeerId pid) (Set EbHash, Set EbHash) -> + -- | Per-peer certified EB hashes inferred from ChainSync candidate + -- fragments. Used as a fallback peer source when no peer has offered the + -- EB body (e.g. for CertRBs whose closure pre-dates the local node). + Map (PeerId pid) (Set EbHash) -> LeiosOutstanding pid -> (LeiosOutstanding pid, LeiosFetchDecisions pid) -leiosFetchLogicIteration env offerings = +leiosFetchLogicIteration env offerings candidateCertEbs = \acc -> go1 acc emptyLeiosFetchDecisions $ expand $ @@ -330,17 +334,25 @@ leiosFetchLogicIteration env offerings = choosePeerEb :: Set (PeerId pid) -> LeiosOutstanding pid -> EbHash -> Maybe (PeerId pid) choosePeerEb peerIds acc ebHash = - foldr (\a _ -> Just a) Nothing $ - [ peerId - | (peerId, (ebHashes, _ebHashes)) <- - Map.toList $ -- TODO prioritize/shuffle? - (`Map.withoutKeys` peerIds) $ -- not already requested from this peer - offerings - , Map.findWithDefault 0 peerId (Leios.requestedBytesSizePerPeer acc) - <= Leios.maxRequestedBytesSizePerPeer env - , -- peer can be sent more requests - ebHash `Set.member` ebHashes -- peer has offered this EB body - ] + case pickFrom (Map.map fst offerings) of + Just peerId -> Just peerId + -- No peer has offered this EB body; fall back to peers whose ChainSync + -- candidate fragment includes the CertRB that depends on this EB. + Nothing -> pickFrom candidateCertEbs + where + pickFrom :: Map (PeerId pid) (Set EbHash) -> Maybe (PeerId pid) + pickFrom source = + foldr (\a _ -> Just a) Nothing $ + [ peerId + | (peerId, ebHashes) <- + Map.toList $ -- TODO prioritize/shuffle? + (`Map.withoutKeys` peerIds) $ -- not already requested from this peer + source + , Map.findWithDefault 0 peerId (Leios.requestedBytesSizePerPeer acc) + <= Leios.maxRequestedBytesSizePerPeer env + , -- peer can be sent more requests + ebHash `Set.member` ebHashes + ] goTx2 :: LeiosOutstanding pid -> @@ -388,20 +400,29 @@ leiosFetchLogicIteration env offerings = BytesSize -> Maybe (PeerId pid, Map EbHash Int) choosePeerTx peerIds acc txOffsets targetTxBytesSize = - foldr (\a _ -> Just a) Nothing $ - [ (peerId, Map.map fst txOffsets') - | (peerId, (_ebIds, ebIds)) <- - Map.toList $ -- TODO prioritize/shuffle? - (`Map.withoutKeys` peerIds) $ -- not already requested from this peer - offerings - , Map.findWithDefault 0 peerId (Leios.requestedBytesSizePerPeer acc) - <= Leios.maxRequestedBytesSizePerPeer env - , -- peer can be sent more requests - let txOffsets' = txOffsets `Map.restrictKeys` ebIds - , case Map.lookupMax txOffsets' of - Nothing -> False - Just (_ebHash, (_txOffset, txBytesSize)) -> targetTxBytesSize == txBytesSize -- peer has offered at least one EB closure that includes this tx with the same size - ] + case pickFrom (Map.map snd offerings) of + Just hit -> Just hit + -- Same fallback rationale as 'choosePeerEb': if a peer's ChainSync + -- candidate contains a CertRB for this EB, the peer must have validated + -- the full closure locally and therefore also has the txs. + Nothing -> pickFrom candidateCertEbs + where + pickFrom :: Map (PeerId pid) (Set EbHash) -> Maybe (PeerId pid, Map EbHash Int) + pickFrom source = + foldr (\a _ -> Just a) Nothing $ + [ (peerId, Map.map fst txOffsets') + | (peerId, ebIds) <- + Map.toList $ -- TODO prioritize/shuffle? + (`Map.withoutKeys` peerIds) $ -- not already requested from this peer + source + , Map.findWithDefault 0 peerId (Leios.requestedBytesSizePerPeer acc) + <= Leios.maxRequestedBytesSizePerPeer env + , -- peer can be sent more requests + let txOffsets' = txOffsets `Map.restrictKeys` ebIds + , case Map.lookupMax txOffsets' of + Nothing -> False + Just (_ebHash, (_txOffset, txBytesSize)) -> targetTxBytesSize == txBytesSize + ] packRequests :: LeiosFetchStaticEnv -> @@ -561,7 +582,11 @@ msgLeiosBlock ktracer tracer (outstandingVar, readyVar) db peerId req eb = do let MkLeiosPoint _ebSlot ebHash = point do let ebBytesSize' = leiosEbBytesSize eb - when (ebBytesSize' /= ebBytesSize) $ do + -- A 0 expected size signals a ChainSel-driven request (see + -- 'pendingEbReconciler'): we don't know the EB body size up front because + -- 'hbMayCertifiedEb' only carries the 'LeiosPoint'. The hash check below + -- is the authoritative integrity check. + when (ebBytesSize /= 0 && ebBytesSize' /= ebBytesSize) $ do error $ "MsgLeiosBlock size mismatch: " <> show (ebBytesSize', ebBytesSize) let ebHash' = hashLeiosEb eb when (ebHash' /= ebHash) $ do diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs index 7eeb5716f7..78bbbdeef7 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs @@ -74,8 +74,10 @@ module Ouroboros.Consensus.Storage.ChainDB.API import Control.Monad (void) import Control.ResourceRegistry +import Data.Map.Strict (Map) import Data.Typeable (Typeable) import GHC.Generics (Generic) +import LeiosDemoTypes (LeiosPoint) import Ouroboros.Consensus.Block import Ouroboros.Consensus.HeaderStateHistory ( HeaderStateHistory (..) @@ -383,6 +385,10 @@ data ChainDB m blk = ChainDB , getChainSelStarvation :: STM m ChainSelStarvation -- ^ Whether ChainSel is currently starved, or when was last time it -- stopped being starved. + , getPendingCertRBs :: STM m (Map LeiosPoint (HeaderHash blk)) + -- ^ CertRBs filtered from ChainSel candidates because their EB closure + -- is not yet available locally. Keyed by the missing EB's 'LeiosPoint', + -- value is the CertRB's header hash. , getLedgerTablesAtFor :: Point blk -> LedgerTables (ExtLedgerState blk) KeysMK -> diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index f4cccf860e..ee97f0f8fe 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -275,6 +275,7 @@ openDBInternal leiosDb args launchBgTasks = runWithTempRegistry $ do , newFollower = Follower.newFollower h , getIsInvalidBlock = getEnvSTM h Query.getIsInvalidBlock , getChainSelStarvation = getEnvSTM h Query.getChainSelStarvation + , getPendingCertRBs = getEnvSTM h Query.getPendingCertRBs , closeDB = closeDB h , isOpen = isOpen h , getCurrentLedger = getEnvSTM h Query.getCurrentLedger diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs index fcc148b6b1..26f46e6100 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs @@ -30,12 +30,15 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Query , getAnyKnownBlock , getAnyKnownBlockComponent , getChainSelStarvation + , getPendingCertRBs ) where import Cardano.Ledger.BaseTypes (unNonZero) import Control.ResourceRegistry (ResourceRegistry) +import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import qualified Data.Set as Set +import LeiosDemoTypes (LeiosPoint) import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config import Ouroboros.Consensus.HeaderStateHistory @@ -194,6 +197,13 @@ getChainSelStarvation :: STM m ChainSelStarvation getChainSelStarvation CDB{..} = readTVar cdbChainSelStarvation +getPendingCertRBs :: + forall m blk. + IOLike m => + ChainDbEnv m blk -> + STM m (Map LeiosPoint (HeaderHash blk)) +getPendingCertRBs cdb = readTVar (cdbPendingEBs cdb) + getIsValid :: forall m blk. (IOLike m, HasHeader blk) => From dd477343a83e6d5fa77e3d460b57a47ca15e47dd Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Thu, 14 May 2026 19:57:08 -0300 Subject: [PATCH 07/20] Cap late-join slot at numSlots/4 in prop_leios_late_join The previous range allowed the late node to join as late as numSlots-1, leaving insufficient catch-up time for the chain-consistency assertion to hold for reasons unrelated to the late-join logic under test. --- .../test/cardano-test/Test/ThreadNet/Leios.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs b/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs index ad012cacc5..1cfa5cc09a 100644 --- a/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs +++ b/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs @@ -347,7 +347,11 @@ prop_leios seed = -- the late node receives a CertRB referencing an EB it never saw. prop_leios_late_join :: Seed -> Property prop_leios_late_join seed = - forAll (choose (1, fromIntegral numSlots - 1)) $ \lateJoinSlot -> + -- Cap the join slot at numSlots/4 so the late node always has at least 3/4 + -- of the run to catch up. Otherwise samples near numSlots would fail the + -- chain-consistency assertion for catch-up-bandwidth reasons unrelated to + -- the late-join logic under test. + forAll (choose (1, fromIntegral numSlots `div` 4)) $ \lateJoinSlot -> let joinPlan = NodeJoinPlan $ From 0ad359d85ce63de5f2bd203e8192f7a230601e64 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 14:58:46 -0300 Subject: [PATCH 08/20] Sweep pending CertRBs in leiosFetchLogic Closes a TOCTOU window where a CertRB could remain stranded in cdbPendingEBs after its EB closure arrived: if the closure completed between ChainSel's closure-query and its cdbPendingEBs insert, the ebCompletionRunner notification fired against an empty pending set and was dropped. The sweep re-enqueues any pending CertRB whose closure is now in LeiosDb, covering this race and other missed-notification scenarios (subscription gap at startup, etc.). Adds addReprocessBlock to the ChainDB API record so the fetch loop can trigger ChainSel reprocessing without holding ChainDbEnv. --- .../Ouroboros/Consensus/NodeKernel.hs | 12 ++++++++++++ .../Ouroboros/Consensus/Storage/ChainDB/API.hs | 4 ++++ .../Ouroboros/Consensus/Storage/ChainDB/Impl.hs | 1 + 3 files changed, 17 insertions(+) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 09c6f44f40..8d4c60c9a3 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -464,6 +464,18 @@ initNodeKernel traceWith tracer $ MkTraceLeiosKernel "leiosFetchLogic: wait for leios ready" () <- MVar.takeMVar getLeiosReady iterationStart <- getMonotonicTime + -- Sweep cdbPendingEBs for CertRBs whose EB closure has since + -- become available locally, and re-enqueue them for ChainSel. + -- ebCompletionRunner already does this when a notification fires, + -- but it can miss the window if the closure completes between + -- ChainSel's "is the closure present?" check and its insert into + -- cdbPendingEBs. The sweep covers that race and any other + -- missed-notification scenarios (e.g. subscription startup gap). + pending <- atomically $ ChainDB.getPendingCertRBs chainDB + forM_ (Map.toList pending) $ \(leiosPoint, certRBHash) -> do + mayComplete <- LeiosDb.leiosDbQueryCompletedEbByPoint leiosConn leiosPoint + when (isJust mayComplete) $ + void $ ChainDB.addReprocessBlock chainDB certRBHash leiosPeersVars <- MVar.readMVar getLeiosPeersVars offerings <- mapM (MVar.readMVar . Leios.offerings) leiosPeersVars -- Per-peer certified EBs derived from ChainSync candidate fragments, diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs index 78bbbdeef7..5a42c376c4 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs @@ -160,6 +160,10 @@ data ChainDB m blk = ChainDB -- https://github.com/IntersectMBO/ouroboros-consensus/blob/main/docs/website/contents/for-developers/HandlingBlocksFromTheFuture.md#handling-blocks-from-the-future , chainSelAsync :: m (ChainSelectionPromise m) -- ^ Trigger reprocessing of blocks postponed by the LoE. + , addReprocessBlock :: HeaderHash blk -> m (ChainSelectionPromise m) + -- ^ Re-trigger chain selection for a single CertRB whose EB closure + -- has just become available locally. Removes the block from + -- 'getPendingCertRBs' and re-runs ChainSel against its header. , getCurrentChain :: STM m (AnchoredFragment (Header blk)) -- ^ Get the current chain fragment -- diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index ee97f0f8fe..7fc9c07038 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -262,6 +262,7 @@ openDBInternal leiosDb args launchBgTasks = runWithTempRegistry $ do API.ChainDB { addBlockAsync = getEnv2 h ChainSel.addBlockAsync , chainSelAsync = getEnv h ChainSel.triggerChainSelectionAsync + , addReprocessBlock = getEnv1 h (\env' -> addReprocessBlock (cdbChainSelQueue env')) , getCurrentChain = getEnvSTM h Query.getCurrentChain , getCurrentChainWithTime = getEnvSTM h Query.getCurrentChainWithTime , getTipBlock = getEnv h Query.getTipBlock From ca87a5b51cffc5864c9ae3107141e9af69696bf7 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 15:02:59 -0300 Subject: [PATCH 09/20] Recheck closure after cdbPendingEBs insert If the EB closure completes between ChainSel's first 'is the closure present?' query and its cdbPendingEBs insert, ebCompletionRunner fires against an empty pending set and drops the notification. The previous commit's leiosFetchLogic sweep covers this race on its iteration cadence; this inline recheck closes the immediate window so the CertRB is processed in-place rather than waiting for the next tick. Cross-references between the two sites: ChainSel.hs points at the sweep as the load-bearing fix, NodeKernel.hs points at the recheck as the local optimization for the immediate race. --- .../Ouroboros/Consensus/NodeKernel.hs | 7 +++++ .../Storage/ChainDB/Impl/ChainSel.hs | 28 +++++++++++++++++-- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 8d4c60c9a3..cda468837f 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -471,6 +471,13 @@ initNodeKernel -- ChainSel's "is the closure present?" check and its insert into -- cdbPendingEBs. The sweep covers that race and any other -- missed-notification scenarios (e.g. subscription startup gap). + -- + -- An inline recheck in ChainSel.hs (the add-block path that + -- inserts into cdbPendingEBs) closes the immediate race window + -- so callers don't have to wait for the next sweep tick; this + -- sweep is the load-bearing fix and also handles cases the + -- inline recheck can't see (e.g. closure arrival just after a + -- prior pending-insert). pending <- atomically $ ChainDB.getPendingCertRBs chainDB forM_ (Map.toList pending) $ \(leiosPoint, certRBHash) -> do mayComplete <- LeiosDb.leiosDbQueryCompletedEbByPoint leiosConn leiosPoint diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs index f7c65a79b7..10270e723e 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs @@ -441,8 +441,23 @@ chainSelSync leiosDb cdb@CDB{..} (ChainSelAddBlock BlockToAdd{blockToAdd = b, .. -- If this block certifies an EB whose closure we don't have yet, -- record it as pending and skip chain selection — it would be - -- filtered out anyway. ChainSel will be re-triggered when the - -- EB closure arrives (step 3). + -- filtered out anyway. ChainSel will be re-triggered by + -- 'ebCompletionRunner' (Background.hs) when the EB closure + -- arrives. + -- + -- After inserting into 'cdbPendingEBs', re-query the LeiosDb: + -- the closure may have completed between the first query and + -- the insert, in which case 'ebCompletionRunner' fired against + -- an empty pending set and dropped its notification. If we hit + -- that window, remove the entry and fall through to chain + -- selection in-process — the in-place removal preserves the + -- "every removal pairs with a ChainSel run" property. + -- + -- A broader sweep in 'leiosFetchLogic' (NodeKernel.hs) covers + -- this race and other missed-notification scenarios on its + -- normal iteration cadence; this inline recheck is a local + -- optimization that closes the immediate window without + -- waiting for the next sweep tick. isPending <- lift $ case certifiedEbFromHeader hdr of Nothing -> pure False Just leiosPoint -> do @@ -453,7 +468,14 @@ chainSelSync leiosDb cdb@CDB{..} (ChainSelAddBlock BlockToAdd{blockToAdd = b, .. atomically $ modifyTVar cdbPendingEBs $ Map.insert leiosPoint (headerHash hdr) - pure True + mayClosure' <- leiosDbQueryCompletedEbByPoint leiosDb leiosPoint + case mayClosure' of + Just _ -> do + atomically $ + modifyTVar cdbPendingEBs $ + Map.delete leiosPoint + pure False + Nothing -> pure True unless isPending $ chainSelectionForBlock leiosDb cdb (BlockCache.singleton b) hdr blockPunish From 497122fc95459cc0d5780f942df21bff0fb35fd0 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 15:52:19 -0300 Subject: [PATCH 10/20] Fix Werror warnings in NodeKernel after late-join changes Drop unused Data.Set and EbHash imports, and rename the shadowing 'handle' binding to 'csHandle' in the candidateCertEbs computation. --- .../Ouroboros/Consensus/NodeKernel.hs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index cda468837f..c9f0494389 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -165,7 +165,6 @@ import Control.Concurrent.Class.MonadSTM.Strict (readTChan) import qualified Data.ByteString as BS import Data.Map (Map) import qualified Data.Map as Map -import Data.Set (Set) import qualified Data.Set as Set import LeiosDemoDb ( LeiosDbConnection @@ -178,8 +177,7 @@ import LeiosDemoDb import qualified LeiosDemoDb as LeiosDb import qualified LeiosDemoLogic as Leios import LeiosDemoTypes - ( EbHash - , ForgedLeiosEb + ( ForgedLeiosEb , LeiosOutstanding , LeiosPeerVars , LeiosPoint (..) @@ -491,8 +489,8 @@ initNodeKernel candidateCertEbs <- atomically $ do handles <- cschcMap varChainSyncHandles fmap (Map.mapKeysMonotonic Leios.MkPeerId) $ - forM handles $ \handle -> do - state <- readTVar (cschState handle) + forM handles $ \csHandle -> do + state <- readTVar (cschState csHandle) pure $ Set.fromList [ pointEbHash From 181cdf644cd3efb1f9563a5d2e193e69678e1270 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 15:57:32 -0300 Subject: [PATCH 11/20] Document CertRB-candidate peer-fallback invariant --- .../Ouroboros/Consensus/NodeKernel.hs | 8 ++++++++ .../src/ouroboros-consensus/LeiosDemoLogic.hs | 17 ++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index c9f0494389..c40eea23ee 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -486,6 +486,14 @@ initNodeKernel -- Per-peer certified EBs derived from ChainSync candidate fragments, -- used as a fallback peer source for fetching EB closures that no -- peer has explicitly offered. + -- + -- Load-bearing invariant: if a peer's candidate fragment contains + -- a CertRB, that peer has the EB closure. This rests on the + -- upstream's ChainSync server only emitting selected-chain + -- headers, the upstream's ChainSel filtering CertRBs with missing + -- closures (cdbPendingEBs), and LeiosDB not GC'ing closures. See + -- the fallback branches in 'choosePeerEb' / 'choosePeerTx' in + -- LeiosDemoLogic. candidateCertEbs <- atomically $ do handles <- cschcMap varChainSyncHandles fmap (Map.mapKeysMonotonic Leios.MkPeerId) $ diff --git a/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoLogic.hs b/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoLogic.hs index 1bbb9d5454..b931cdbff5 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoLogic.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoLogic.hs @@ -336,8 +336,14 @@ leiosFetchLogicIteration env offerings candidateCertEbs = choosePeerEb peerIds acc ebHash = case pickFrom (Map.map fst offerings) of Just peerId -> Just peerId - -- No peer has offered this EB body; fall back to peers whose ChainSync - -- candidate fragment includes the CertRB that depends on this EB. + -- Fallback: peers whose ChainSync candidate fragment contains the + -- CertRB that depends on this EB. + -- + -- Correctness depends on: (1) the upstream's ChainSync server only + -- emits MsgRollForward for selected-chain headers; (2) the upstream's + -- ChainSel filters CertRBs with missing closures via cdbPendingEBs; + -- (3) LeiosDB does not GC closures. Violations of any of these would + -- cause us to dispatch unsatisfiable fetches. Nothing -> pickFrom candidateCertEbs where pickFrom :: Map (PeerId pid) (Set EbHash) -> Maybe (PeerId pid) @@ -402,9 +408,10 @@ leiosFetchLogicIteration env offerings candidateCertEbs = choosePeerTx peerIds acc txOffsets targetTxBytesSize = case pickFrom (Map.map snd offerings) of Just hit -> Just hit - -- Same fallback rationale as 'choosePeerEb': if a peer's ChainSync - -- candidate contains a CertRB for this EB, the peer must have validated - -- the full closure locally and therefore also has the txs. + -- Fallback: peers whose ChainSync candidate fragment contains a CertRB + -- for this EB must have validated the full closure locally, so they + -- also have the txs. See 'choosePeerEb' for the load-bearing invariant + -- chain this depends on. Nothing -> pickFrom candidateCertEbs where pickFrom :: Map (PeerId pid) (Set EbHash) -> Maybe (PeerId pid, Map EbHash Int) From cdc3fad35b096155c3bbe8445371c83c3725754e Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 16:01:03 -0300 Subject: [PATCH 12/20] Bracket LeiosDbConnection in ebCompletionRunner Async cancellation on shutdown was not releasing the connection. The in-memory backend's close is a no-op so tests were unaffected, but the SQLite backend leaked the database handle. --- .../Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index 6681d12784..0e5b8e3c0c 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -591,8 +591,7 @@ ebCompletionRunner :: m Void ebCompletionRunner leiosDb CDB{..} = do notifChan <- subscribeEbNotifications leiosDb - leiosConn <- open leiosDb - forever $ do + bracket (open leiosDb) close $ \leiosConn -> forever $ do notif <- atomically $ readTChan notifChan let leiosPoint = case notif of AcquiredEb point _ -> point From 8311774682f513fed7fca70f40bf5dea8dce938c Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 16:24:47 -0300 Subject: [PATCH 13/20] Factor pendingEbReconciler discipline into LeiosDemoTypes Move the size-0 / offer-coexistence Map discipline out of the inline reconciler and into 'applyPendingAdded' / 'applyPendingRemoved' next to 'LeiosOutstanding'. Add unit tests for the two invariants: pending entries round-trip cleanly, and offer-supplied entries survive a pending add/remove. --- .../Ouroboros/Consensus/NodeKernel.hs | 24 +++-------- .../src/ouroboros-consensus/LeiosDemoTypes.hs | 31 ++++++++++++++ .../consensus-test/Test/LeiosDemoTypes.hs | 41 +++++++++++++++++++ 3 files changed, 77 insertions(+), 19 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index c40eea23ee..fc34282b56 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -56,7 +56,7 @@ import Data.Functor ((<&>)) import Data.Hashable (Hashable) import Data.List.NonEmpty (NonEmpty) import qualified Data.List.NonEmpty as NE -import Data.Maybe (fromMaybe, isJust, mapMaybe) +import Data.Maybe (isJust, mapMaybe) import Data.Proxy import qualified Data.Text as Text import Data.Void (Void) @@ -434,24 +434,10 @@ initNodeKernel when (new == old) retry writeTVar lastAppliedPendingEBs new pure (Map.difference new old, Map.difference old new) - MVar.modifyMVar_ getLeiosOutstanding $ \outstanding -> do - let bodies0 = Leios.missingEbBodies outstanding - -- Add entries from new pending CertRBs; never overwrite an - -- existing non-zero (offer-supplied) size. - bodies1 = - Map.foldlWithKey' - (\acc point _ -> Map.alter (Just . fromMaybe 0) point acc) - bodies0 - added - -- Drop entries we no longer need; keep offer-driven entries - -- (non-zero size) untouched. - bodies2 = - Map.foldlWithKey' - (\acc point _ -> - Map.update (\sz -> if sz == 0 then Nothing else Just sz) point acc) - bodies1 - removed - pure outstanding{Leios.missingEbBodies = bodies2} + MVar.modifyMVar_ getLeiosOutstanding $ + pure + . Leios.applyPendingRemoved (Map.keys removed) + . Leios.applyPendingAdded (Map.keys added) void $ MVar.tryPutMVar getLeiosReady () void $ diff --git a/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoTypes.hs b/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoTypes.hs index 92238b20d7..2ef59e4dc1 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoTypes.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/LeiosDemoTypes.hs @@ -44,6 +44,7 @@ import Data.IntMap (IntMap) import qualified Data.IntMap as IntMap import Data.Map (Map) import qualified Data.Map as Map +import Data.Maybe (fromMaybe) import Data.Sequence (Seq) import qualified Data.Sequence as Seq import Data.Set (Set) @@ -310,6 +311,36 @@ emptyLeiosOutstanding = , blockingPerEb = Map.empty } +-- | Insert pending-CertRB entries into 'missingEbBodies' with size 0, +-- without clobbering any existing (offer-supplied) non-zero size. Size 0 +-- is the sentinel for "we don't know the body size because the entry was +-- driven by ChainSel via 'cdbPendingEBs' rather than by an offer". +applyPendingAdded :: + Foldable f => f LeiosPoint -> LeiosOutstanding pid -> LeiosOutstanding pid +applyPendingAdded added outstanding = + outstanding + { missingEbBodies = + foldr + (\point acc -> Map.alter (Just . fromMaybe 0) point acc) + (missingEbBodies outstanding) + added + } + +-- | Drop pending-CertRB entries from 'missingEbBodies'. Only removes +-- entries we own (size 0); offer-supplied entries (non-zero) are +-- preserved. +applyPendingRemoved :: + Foldable f => f LeiosPoint -> LeiosOutstanding pid -> LeiosOutstanding pid +applyPendingRemoved removed outstanding = + outstanding + { missingEbBodies = + foldr + (\point acc -> + Map.update (\sz -> if sz == 0 then Nothing else Just sz) point acc) + (missingEbBodies outstanding) + removed + } + prettyLeiosOutstanding :: LeiosOutstanding pid -> String prettyLeiosOutstanding x = unlines $ diff --git a/ouroboros-consensus/test/consensus-test/Test/LeiosDemoTypes.hs b/ouroboros-consensus/test/consensus-test/Test/LeiosDemoTypes.hs index aa36873fa4..7d14e06f06 100644 --- a/ouroboros-consensus/test/consensus-test/Test/LeiosDemoTypes.hs +++ b/ouroboros-consensus/test/consensus-test/Test/LeiosDemoTypes.hs @@ -1,12 +1,20 @@ module Test.LeiosDemoTypes (tests) where import Cardano.Binary (serialize') +import Cardano.Slotting.Slot (SlotNo (SlotNo)) import qualified Data.ByteString as BS +import qualified Data.Map.Strict as Map import qualified Data.Vector as V import LeiosDemoTypes ( BytesSize + , EbHash (..) , LeiosEb (..) + , LeiosOutstanding (..) + , LeiosPoint (..) , TxHash (..) + , applyPendingAdded + , applyPendingRemoved + , emptyLeiosOutstanding , encodeLeiosEb , leiosEbBytesSize , maxTxsPerEb @@ -15,6 +23,7 @@ import Test.QuickCheck ( Gen , Property , chooseInt + , conjoin , counterexample , forAll , frequency @@ -29,8 +38,40 @@ tests = testGroup "LeiosDemoTypes" [ testProperty "leiosEbBytesSize consistent with encodeLeiosEb" prop_ebBytesSizeConsistent + , testProperty "reconciler: pending-only insert/remove round-trip" prop_reconciler_pending_only + , testProperty "reconciler: offer-supplied entry survives pending add/remove" prop_reconciler_offer_wins ] +-- | A pending CertRB whose body size is unknown gets inserted at size 0 +-- and is then dropped cleanly when it leaves the pending set. +prop_reconciler_pending_only :: Property +prop_reconciler_pending_only = + let point = MkLeiosPoint (SlotNo 42) (MkEbHash (BS.replicate 32 0)) + afterAdd = applyPendingAdded [point] (emptyLeiosOutstanding :: LeiosOutstanding ()) + afterRemove = applyPendingRemoved [point] afterAdd + in conjoin + [ Map.lookup point (missingEbBodies afterAdd) === Just 0 + , Map.lookup point (missingEbBodies afterRemove) === Nothing + ] + +-- | An offer that arrived first with a real size must not be clobbered by +-- a pending add and must survive a pending remove. This is the invariant +-- that keeps the fetch byte budget accurate. +prop_reconciler_offer_wins :: Property +prop_reconciler_offer_wins = + let point = MkLeiosPoint (SlotNo 17) (MkEbHash (BS.replicate 32 1)) + realSize = 1234 + start = + (emptyLeiosOutstanding :: LeiosOutstanding ()) + { missingEbBodies = Map.singleton point realSize + } + afterAdd = applyPendingAdded [point] start + afterRemove = applyPendingRemoved [point] afterAdd + in conjoin + [ Map.lookup point (missingEbBodies afterAdd) === Just realSize + , Map.lookup point (missingEbBodies afterRemove) === Just realSize + ] + -- | Minimum tx size as per the ASSUMPTION in 'leiosEbBytesSize'. minTxBytesSize :: Int minTxBytesSize = 55 From 20a6571d3fac76172de518beef2329914da1f609 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 16:38:51 -0300 Subject: [PATCH 14/20] Cover len-12 HeaderBody CBOR path in generators The HeaderBody generators were hardcoding hbMayCertifiedEb to Nothing, so the len-12 CBOR branch (and the (Nothing, Just) / (Just, Just) combinations of the two optional fields) was never exercised by roundtrip property tests. Add an Arbitrary LeiosPoint and let both HeaderBody generators sample the optionals. --- .../Consensus/Protocol/Serialisation/Generators.hs | 11 +++++++++-- .../Test/Ouroboros/Consensus/Protocol/Praos/Header.hs | 5 +++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Consensus/Protocol/Serialisation/Generators.hs b/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Consensus/Protocol/Serialisation/Generators.hs index 518019a8c5..272942d604 100644 --- a/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Consensus/Protocol/Serialisation/Generators.hs +++ b/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Consensus/Protocol/Serialisation/Generators.hs @@ -18,7 +18,11 @@ import Cardano.Slotting.Slot , WithOrigin (At, Origin) ) import qualified Data.ByteString as BS -import LeiosDemoTypes (EbAnnouncement (EbAnnouncement), EbHash (MkEbHash)) +import LeiosDemoTypes + ( EbAnnouncement (EbAnnouncement) + , EbHash (MkEbHash) + , LeiosPoint (MkLeiosPoint) + ) import Ouroboros.Consensus.Protocol.Praos (LeiosState (LeiosState), PraosState (PraosState)) import qualified Ouroboros.Consensus.Protocol.Praos as Praos import Ouroboros.Consensus.Protocol.Praos.Header @@ -62,7 +66,7 @@ instance Praos.PraosCrypto c => Arbitrary (HeaderBody c) where <*> ocert <*> arbitrary <*> arbitrary - <*> pure Nothing + <*> arbitrary instance Praos.PraosCrypto c => Arbitrary (Header c) where arbitrary = do @@ -90,5 +94,8 @@ instance Arbitrary PraosState where instance Arbitrary EbAnnouncement where arbitrary = EbAnnouncement <$> (MkEbHash . BS.pack <$> vector 32) <*> arbitrary +instance Arbitrary LeiosPoint where + arbitrary = MkLeiosPoint <$> arbitrary <*> (MkEbHash . BS.pack <$> vector 32) + instance Arbitrary LeiosState where arbitrary = pure $ LeiosState Nothing 0 diff --git a/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Ouroboros/Consensus/Protocol/Praos/Header.hs b/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Ouroboros/Consensus/Protocol/Praos/Header.hs index 133a755a6f..55d3109650 100644 --- a/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Ouroboros/Consensus/Protocol/Praos/Header.hs +++ b/ouroboros-consensus-protocol/src/unstable-protocol-testlib/Test/Ouroboros/Consensus/Protocol/Praos/Header.hs @@ -108,6 +108,7 @@ import Ouroboros.Consensus.Protocol.Praos.VRF , vrfLeaderValue ) import Ouroboros.Consensus.Protocol.TPraos (StandardCrypto) +import Test.Consensus.Protocol.Serialisation.Generators () import Test.QuickCheck ( Gen , arbitrary @@ -445,8 +446,8 @@ genHeaderBody context = do hbBodySize <- choose (1000, 90000) hbBodyHash <- genHash (hbOCert, kesPeriod) <- genCert hbSlotNo context - let hbMayEbAnnouncement = Nothing - hbMayCertifiedEb = Nothing + hbMayEbAnnouncement <- arbitrary + hbMayCertifiedEb <- arbitrary let hbProtVer = protocolVersionZero headerBody = HeaderBody{..} pure $ (headerBody, kesPeriod) From bcc908971d642c95d6924fbadefadd968dbbe970 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 17:19:16 -0300 Subject: [PATCH 15/20] Add dedicated trace events for ChainSelReprocessBlock Previously the new ChainSelReprocessBlock message reused the LoE event, conflating two unrelated reprocessing mechanisms on the operator side. Add AddedReprocessBlockToQueue / PoppedReprocessBlockFromQueue constructors, both carrying the CertRB hash so the events are correlatable across logs, and thread the tracer through addReprocessBlock. --- .../Ouroboros/Consensus/Storage/ChainDB/Impl.hs | 6 +++++- .../Consensus/Storage/ChainDB/Impl/Background.hs | 11 ++++++++--- .../Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs | 9 ++++++++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index 7fc9c07038..c8b207dce9 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -262,7 +262,11 @@ openDBInternal leiosDb args launchBgTasks = runWithTempRegistry $ do API.ChainDB { addBlockAsync = getEnv2 h ChainSel.addBlockAsync , chainSelAsync = getEnv h ChainSel.triggerChainSelectionAsync - , addReprocessBlock = getEnv1 h (\env' -> addReprocessBlock (cdbChainSelQueue env')) + , addReprocessBlock = + getEnv1 h $ \env' -> + addReprocessBlock + (TraceAddBlockEvent >$< cdbTracer env') + (cdbChainSelQueue env') , getCurrentChain = getEnvSTM h Query.getCurrentChain , getCurrentChainWithTime = getEnvSTM h Query.getCurrentChainWithTime , getTipBlock = getEnv h Query.getTipBlock diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index 0e5b8e3c0c..60ae138697 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -564,8 +564,8 @@ addBlockRunner leiosDb fuse cdb@CDB{..} = do lift $ case message of ChainSelReprocessLoEBlocks _ -> trace PoppedReprocessLoEBlocksFromQueue - ChainSelReprocessBlock _ _ -> - trace PoppedReprocessLoEBlocksFromQueue + ChainSelReprocessBlock hash _ -> + trace $ PoppedReprocessBlockFromQueue hash ChainSelAddBlock BlockToAdd{blockToAdd} -> trace $ PoppedBlockFromQueue $ @@ -603,4 +603,9 @@ ebCompletionRunner leiosDb CDB{..} = do mayComplete <- leiosDbQueryCompletedEbByPoint leiosConn leiosPoint case mayComplete of Nothing -> pure () - Just _ -> void $ addReprocessBlock cdbChainSelQueue certRBHash + Just _ -> + void $ + addReprocessBlock + (contramap TraceAddBlockEvent cdbTracer) + cdbChainSelQueue + certRBHash diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 74901cf6f4..80aafa164f 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -625,12 +625,14 @@ addReprocessLoEBlocks tracer ChainSelQueue{varChainSelQueue} = do -- arrived. Modelled on 'addReprocessLoEBlocks'. addReprocessBlock :: IOLike m => + Tracer m (TraceAddBlockEvent blk) -> ChainSelQueue m blk -> HeaderHash blk -> m (ChainSelectionPromise m) -addReprocessBlock ChainSelQueue{varChainSelQueue} hash = do +addReprocessBlock tracer ChainSelQueue{varChainSelQueue} hash = do varProcessed <- newEmptyTMVarIO let waitUntilRan = atomically $ readTMVar varProcessed + traceWith tracer $ AddedReprocessBlockToQueue hash atomically $ writeTBQueue varChainSelQueue $ ChainSelReprocessBlock hash varProcessed @@ -863,6 +865,11 @@ data TraceAddBlockEvent blk AddedReprocessLoEBlocksToQueue | -- | ChainSel will reprocess blocks that were postponed by the LoE. PoppedReprocessLoEBlocksFromQueue + | -- | A reprocess-block message was added to the queue, requesting ChainSel + -- to re-run for a CertRB whose EB closure has arrived. + AddedReprocessBlockToQueue !(HeaderHash blk) + | -- | ChainSel will reprocess the given CertRB. + PoppedReprocessBlockFromQueue !(HeaderHash blk) | -- | A block was added to the Volatile DB AddedBlockToVolatileDB (RealPoint blk) BlockNo IsEBB Enclosing | -- | The block fits onto the current chain, we'll try to use it to extend From 8414ae35f4e7c5fffa8e7517780f3884b5f47e87 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 17:29:49 -0300 Subject: [PATCH 16/20] Key ChainSelReprocessBlock removal by LeiosPoint Insertion into cdbPendingEBs is keyed by LeiosPoint, so removal should be too. Carrying the point on the reprocess message replaces an O(n) Map.filter (/= hash) with an O(log n) Map.delete and removes the implicit value-equals-header-hash invariant. The header hash stays on the message because the consumer still needs it to look up the header in the VolatileDB. --- .../Ouroboros/Consensus/NodeKernel.hs | 2 +- .../Ouroboros/Consensus/Storage/ChainDB/API.hs | 5 +++-- .../Ouroboros/Consensus/Storage/ChainDB/Impl.hs | 2 +- .../Consensus/Storage/ChainDB/Impl/Background.hs | 5 +++-- .../Consensus/Storage/ChainDB/Impl/ChainSel.hs | 7 ++----- .../Consensus/Storage/ChainDB/Impl/Types.hs | 12 +++++++++--- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index fc34282b56..5df1e2a785 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -466,7 +466,7 @@ initNodeKernel forM_ (Map.toList pending) $ \(leiosPoint, certRBHash) -> do mayComplete <- LeiosDb.leiosDbQueryCompletedEbByPoint leiosConn leiosPoint when (isJust mayComplete) $ - void $ ChainDB.addReprocessBlock chainDB certRBHash + void $ ChainDB.addReprocessBlock chainDB leiosPoint certRBHash leiosPeersVars <- MVar.readMVar getLeiosPeersVars offerings <- mapM (MVar.readMVar . Leios.offerings) leiosPeersVars -- Per-peer certified EBs derived from ChainSync candidate fragments, diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs index 5a42c376c4..23ce20d61c 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs @@ -160,10 +160,11 @@ data ChainDB m blk = ChainDB -- https://github.com/IntersectMBO/ouroboros-consensus/blob/main/docs/website/contents/for-developers/HandlingBlocksFromTheFuture.md#handling-blocks-from-the-future , chainSelAsync :: m (ChainSelectionPromise m) -- ^ Trigger reprocessing of blocks postponed by the LoE. - , addReprocessBlock :: HeaderHash blk -> m (ChainSelectionPromise m) + , addReprocessBlock :: LeiosPoint -> HeaderHash blk -> m (ChainSelectionPromise m) -- ^ Re-trigger chain selection for a single CertRB whose EB closure -- has just become available locally. Removes the block from - -- 'getPendingCertRBs' and re-runs ChainSel against its header. + -- 'getPendingCertRBs' (keyed by 'LeiosPoint') and re-runs ChainSel + -- against its header (looked up by 'HeaderHash'). , getCurrentChain :: STM m (AnchoredFragment (Header blk)) -- ^ Get the current chain fragment -- diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index c8b207dce9..fb61b4e09c 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -263,7 +263,7 @@ openDBInternal leiosDb args launchBgTasks = runWithTempRegistry $ do { addBlockAsync = getEnv2 h ChainSel.addBlockAsync , chainSelAsync = getEnv h ChainSel.triggerChainSelectionAsync , addReprocessBlock = - getEnv1 h $ \env' -> + getEnv2 h $ \env' -> addReprocessBlock (TraceAddBlockEvent >$< cdbTracer env') (cdbChainSelQueue env') diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index 60ae138697..274e0838f7 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -546,7 +546,7 @@ addBlockRunner leiosDb fuse cdb@CDB{..} = do case message of ChainSelReprocessLoEBlocks varProcessed -> void $ tryPutTMVar varProcessed () - ChainSelReprocessBlock _ varProcessed -> + ChainSelReprocessBlock _ _ varProcessed -> void $ tryPutTMVar varProcessed () ChainSelAddBlock BlockToAdd{varBlockWrittenToDisk, varBlockProcessed} -> do _ <- @@ -564,7 +564,7 @@ addBlockRunner leiosDb fuse cdb@CDB{..} = do lift $ case message of ChainSelReprocessLoEBlocks _ -> trace PoppedReprocessLoEBlocksFromQueue - ChainSelReprocessBlock hash _ -> + ChainSelReprocessBlock _ hash _ -> trace $ PoppedReprocessBlockFromQueue hash ChainSelAddBlock BlockToAdd{blockToAdd} -> trace $ @@ -608,4 +608,5 @@ ebCompletionRunner leiosDb CDB{..} = do addReprocessBlock (contramap TraceAddBlockEvent cdbTracer) cdbChainSelQueue + leiosPoint certRBHash diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs index 10270e723e..d93c985725 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs @@ -392,11 +392,8 @@ chainSelSync leiosDb cdb@CDB{..} (ChainSelReprocessLoEBlocks varProcessed) = do -- Re-trigger chain selection for a CertRB whose EB closure has arrived. -- Remove it from the pending set so 'chainSelectionForBlock' no longer -- filters it out, then run chain selection for that block. -chainSelSync leiosDb cdb@CDB{..} (ChainSelReprocessBlock hash varProcessed) = do - lift $ - atomically $ - modifyTVar cdbPendingEBs $ - Map.filter (/= hash) +chainSelSync leiosDb cdb@CDB{..} (ChainSelReprocessBlock point hash varProcessed) = do + lift $ atomically $ modifyTVar cdbPendingEBs $ Map.delete point hdr <- lift $ VolatileDB.getKnownBlockComponent cdbVolatileDB GetHeader hash chainSelectionForBlock leiosDb cdb BlockCache.empty hdr noPunishment lift $ atomically $ putTMVar varProcessed () diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 80aafa164f..404621366d 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -558,7 +558,12 @@ data ChainSelMessage m blk -- Used by the late-join mechanism: when a CertRB's missing EB -- closure becomes available, this message re-triggers ChainSel -- for that specific block. + -- + -- The 'LeiosPoint' is the missing EB's point, used as the key into + -- 'cdbPendingEBs'; the 'HeaderHash' is the CertRB itself, used to + -- look the header up in the VolatileDB. ChainSelReprocessBlock + !LeiosPoint !(HeaderHash blk) -- Used for 'ChainSelectionPromise'. !(StrictTMVar m ()) @@ -627,15 +632,16 @@ addReprocessBlock :: IOLike m => Tracer m (TraceAddBlockEvent blk) -> ChainSelQueue m blk -> + LeiosPoint -> HeaderHash blk -> m (ChainSelectionPromise m) -addReprocessBlock tracer ChainSelQueue{varChainSelQueue} hash = do +addReprocessBlock tracer ChainSelQueue{varChainSelQueue} point hash = do varProcessed <- newEmptyTMVarIO let waitUntilRan = atomically $ readTMVar varProcessed traceWith tracer $ AddedReprocessBlockToQueue hash atomically $ writeTBQueue varChainSelQueue $ - ChainSelReprocessBlock hash varProcessed + ChainSelReprocessBlock point hash varProcessed return $ ChainSelectionPromise waitUntilRan -- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the @@ -697,7 +703,7 @@ closeChainSelQueue ChainSelQueue{varChainSelQueue = queue} = do blockAdd = \case ChainSelAddBlock ab -> Just ab ChainSelReprocessLoEBlocks _ -> Nothing - ChainSelReprocessBlock _ _ -> Nothing + ChainSelReprocessBlock _ _ _ -> Nothing -- | To invoke when the given 'ChainSelMessage' has been processed by ChainSel. -- This is used to remove the respective point from the multiset of points in From b64f327fba54a699b1f4f87ce80f2ef509c39513 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 17:32:23 -0300 Subject: [PATCH 17/20] Strip plan-step references from prop_leios_late_join comments "Step 2" and "step 3" were private references to the late-join implementation plan. Replace them with cross-references to the mechanisms themselves (ChainSel filter, ebCompletionRunner). --- .../test/cardano-test/Test/ThreadNet/Leios.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs b/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs index 1cfa5cc09a..04968ace32 100644 --- a/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs +++ b/ouroboros-consensus-cardano/test/cardano-test/Test/ThreadNet/Leios.hs @@ -370,14 +370,15 @@ prop_leios_late_join seed = nodeChains = Chain.toOldestFirst . nodeOutputFinalChain <$> testOutput.testOutputNodes in - -- The simulation must not throw (fixed in step 2). + -- The simulation must not throw: ChainSel skips a CertRB whose EB + -- closure is missing instead of crashing in resolveLeiosBlock. conjoin [ not (null nodeChains) & counterexample "test output was empty" - , -- All nodes should converge to the same chain. Fails after step 2 - -- because CertRBs with missing closures are permanently excluded, - -- making the late node's chain shorter. Step 3 (ChainSel re-trigger - -- on EB arrival) fixes this. + , -- All nodes should converge to the same chain. Without the + -- ChainSel re-trigger on EB closure arrival ('ebCompletionRunner'), + -- the late node's chain stays shorter because the filtered + -- CertRBs are never reconsidered. all (== head (Map.elems nodeChains)) nodeChains & counterexample "nodes have different chains" & counterexample ("chain lengths: " <> show (fmap length nodeChains)) From 34385426f5e5253f569fcff5c718ed579fe5447a Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 17:36:58 -0300 Subject: [PATCH 18/20] Drop CDB{..} wildcards from late-join code paths The project is moving away from RecordWildCards. Project to explicit field accessors at the two sites this branch introduced new wildcard uses (ebCompletionRunner and the ChainSelReprocessBlock equation of chainSelSync). The pre-existing wildcards elsewhere in the file are left intact. --- .../Consensus/Storage/ChainDB/Impl/Background.hs | 8 ++++---- .../Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index 274e0838f7..a94adea406 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -589,14 +589,14 @@ ebCompletionRunner :: LeiosDbHandle m -> ChainDbEnv m blk -> m Void -ebCompletionRunner leiosDb CDB{..} = do +ebCompletionRunner leiosDb cdb = do notifChan <- subscribeEbNotifications leiosDb bracket (open leiosDb) close $ \leiosConn -> forever $ do notif <- atomically $ readTChan notifChan let leiosPoint = case notif of AcquiredEb point _ -> point AcquiredEbTxs point -> point - pending <- atomically $ readTVar cdbPendingEBs + pending <- atomically $ readTVar (cdbPendingEBs cdb) case Map.lookup leiosPoint pending of Nothing -> pure () Just certRBHash -> do @@ -606,7 +606,7 @@ ebCompletionRunner leiosDb CDB{..} = do Just _ -> void $ addReprocessBlock - (contramap TraceAddBlockEvent cdbTracer) - cdbChainSelQueue + (contramap TraceAddBlockEvent (cdbTracer cdb)) + (cdbChainSelQueue cdb) leiosPoint certRBHash diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs index d93c985725..dfdece60d6 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs @@ -392,9 +392,9 @@ chainSelSync leiosDb cdb@CDB{..} (ChainSelReprocessLoEBlocks varProcessed) = do -- Re-trigger chain selection for a CertRB whose EB closure has arrived. -- Remove it from the pending set so 'chainSelectionForBlock' no longer -- filters it out, then run chain selection for that block. -chainSelSync leiosDb cdb@CDB{..} (ChainSelReprocessBlock point hash varProcessed) = do - lift $ atomically $ modifyTVar cdbPendingEBs $ Map.delete point - hdr <- lift $ VolatileDB.getKnownBlockComponent cdbVolatileDB GetHeader hash +chainSelSync leiosDb cdb (ChainSelReprocessBlock point hash varProcessed) = do + lift $ atomically $ modifyTVar (cdbPendingEBs cdb) $ Map.delete point + hdr <- lift $ VolatileDB.getKnownBlockComponent (cdbVolatileDB cdb) GetHeader hash chainSelectionForBlock leiosDb cdb BlockCache.empty hdr noPunishment lift $ atomically $ putTMVar varProcessed () chainSelSync leiosDb cdb@CDB{..} (ChainSelAddBlock BlockToAdd{blockToAdd = b, ..}) = do From 8fc04447fda477f829daeb8f05c9950ca2feaf56 Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 17:42:34 -0300 Subject: [PATCH 19/20] Document candidateCertEbs construction in leiosFetchLogic Pull the per-peer scan into a named 'certifiedEbsFromCandidate' helper, and add inline comments explaining the mapKeysMonotonic safety justification and the singleton-list-generator pattern. No behaviour change. --- .../Ouroboros/Consensus/NodeKernel.hs | 53 ++++++++++++------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 5df1e2a785..e0ce1c78ea 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -469,29 +469,44 @@ initNodeKernel void $ ChainDB.addReprocessBlock chainDB leiosPoint certRBHash leiosPeersVars <- MVar.readMVar getLeiosPeersVars offerings <- mapM (MVar.readMVar . Leios.offerings) leiosPeersVars - -- Per-peer certified EBs derived from ChainSync candidate fragments, - -- used as a fallback peer source for fetching EB closures that no - -- peer has explicitly offered. + -- Per-peer set of EB hashes the peer has certified on its + -- selected chain. Used as a fallback peer source in + -- 'choosePeerEb' / 'choosePeerTx' when no peer has actively + -- offered the EB body or closure. -- - -- Load-bearing invariant: if a peer's candidate fragment contains - -- a CertRB, that peer has the EB closure. This rests on the - -- upstream's ChainSync server only emitting selected-chain - -- headers, the upstream's ChainSel filtering CertRBs with missing - -- closures (cdbPendingEBs), and LeiosDB not GC'ing closures. See - -- the fallback branches in 'choosePeerEb' / 'choosePeerTx' in - -- LeiosDemoLogic. + -- Load-bearing invariant: if a peer's candidate fragment + -- contains a CertRB, that peer has the EB closure. This rests + -- on the upstream's ChainSync server only emitting + -- selected-chain headers, the upstream's ChainSel filtering + -- CertRBs with missing closures (cdbPendingEBs), and LeiosDB + -- not GC'ing closures. See the fallback branches in + -- 'choosePeerEb' / 'choosePeerTx' in 'LeiosDemoLogic'. + let + -- For one peer, scan its candidate fragment and collect the + -- hashes of every EB that any header on the candidate + -- certifies. Only the EB hash matters for fetch-decision + -- keying; the slot is discarded. + certifiedEbsFromCandidate csHandle = do + state <- readTVar (cschState csHandle) + pure $ + Set.fromList + -- Singleton-list generator with a refutable pattern: + -- each header that does not certify an EB is silently + -- skipped. + [ pointEbHash + | hwt <- AF.toOldestFirst (csCandidate state) + , Just MkLeiosPoint{pointEbHash} <- + [LedgerDB.certifiedEbFromHeader (hwtHeader hwt)] + ] candidateCertEbs <- atomically $ do + -- Snapshot of all live ChainSync clients (one handle per + -- upstream peer). handles <- cschcMap varChainSyncHandles + -- Re-key from 'ConnectionId' to Leios's 'PeerId' newtype. + -- 'mapKeysMonotonic' is safe because 'MkPeerId' is just an + -- order-preserving wrapper. fmap (Map.mapKeysMonotonic Leios.MkPeerId) $ - forM handles $ \csHandle -> do - state <- readTVar (cschState csHandle) - pure $ - Set.fromList - [ pointEbHash - | hwt <- AF.toOldestFirst (csCandidate state) - , Just MkLeiosPoint{pointEbHash} <- - [LedgerDB.certifiedEbFromHeader (hwtHeader hwt)] - ] + forM handles certifiedEbsFromCandidate newDecisions <- MVar.modifyMVar getLeiosOutstanding $ \outstanding -> do -- Filter outstanding work against DB before running fetch iteration. -- This removes EBs and TXs we already have (e.g., from forging or other peers). From 1d339da376d9bf0df06ed34d8331e1a40cc73d3a Mon Sep 17 00:00:00 2001 From: Damian Nadales Date: Fri, 15 May 2026 17:47:09 -0300 Subject: [PATCH 20/20] Tolerate empty Shelley credentials in voting-key derivation The voting-key hack in protocolInfoCardano did `credssShelleyBased !! 0`, which crashes a relay node (no leader credentials) with "Prelude.!!: index too large" before diffusion starts. Use `listToMaybe` so a node without Shelley-based credentials gets `topLevelConfigVotingKey = Nothing` instead. --- .../Ouroboros/Consensus/Cardano/Node.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus-cardano/src/ouroboros-consensus-cardano/Ouroboros/Consensus/Cardano/Node.hs b/ouroboros-consensus-cardano/src/ouroboros-consensus-cardano/Ouroboros/Consensus/Cardano/Node.hs index 0fc2e95a4e..630e03656d 100644 --- a/ouroboros-consensus-cardano/src/ouroboros-consensus-cardano/Ouroboros/Consensus/Cardano/Node.hs +++ b/ouroboros-consensus-cardano/src/ouroboros-consensus-cardano/Ouroboros/Consensus/Cardano/Node.hs @@ -64,6 +64,7 @@ import Control.Exception (assert) import qualified Data.ByteString.Short as Short import Data.Functor.These (These1 (..)) import qualified Data.Map.Strict as Map +import Data.Maybe (listToMaybe) import Data.SOP.BasicFunctors import Data.SOP.Counting import Data.SOP.Functors (Flip (..)) @@ -814,11 +815,12 @@ protocolInfoCardano paramsCardano (Shelley.ShelleyStorageConfig tpraosSlotsPerKESPeriod k) , topLevelConfigCheckpoints = cardanoCheckpoints , -- FIXME: REMOVE THIS. Accesses and re-uses KES signing key material. + -- For nodes without Shelley-based leader credentials (e.g. relays), + -- there is no key material to re-use, so the voting key is Nothing. topLevelConfigVotingKey = - Just - . rawSerialiseUnsoundPureSignKeyKES + rawSerialiseUnsoundPureSignKeyKES . shelleyLeaderCredentialsInitSignKey - $ credssShelleyBased !! 0 + <$> listToMaybe credssShelleyBased } -- When the initial ledger state is not in the Byron era, register various