From f0e1590dfb08bf83c819bf801886283e8d38b904 Mon Sep 17 00:00:00 2001 From: kunxian xia Date: Thu, 25 Jun 2026 16:53:26 +0800 Subject: [PATCH 1/5] add section to explain the math behind tower module --- ceno_recursion_v2/docs/tower_air_spec.md | 173 +++++++++++++++++++---- 1 file changed, 147 insertions(+), 26 deletions(-) diff --git a/ceno_recursion_v2/docs/tower_air_spec.md b/ceno_recursion_v2/docs/tower_air_spec.md index a08dc2bb2..f3325be28 100644 --- a/ceno_recursion_v2/docs/tower_air_spec.md +++ b/ceno_recursion_v2/docs/tower_air_spec.md @@ -4,6 +4,149 @@ This document captures the current behavior of each GKR-related AIR that lives i can reason about constraints or plan refactors without diving back into Rust. Update the relevant section whenever an AIR’s columns, constraints, or interactions change. +## Ground Truth: Layer Reduction Math + +This section is the semantic source of truth for the tower layer reduction. The AIR-specific sections below describe how +the trace and buses realize these identities; protocol changes must preserve this math. + +Let layer `i` be the current parent layer and layer `i + 1` the child layer. For binary fan-in, a child point is written +as `(b, t)` with `t in {0, 1}`. The sumcheck proves the value at a parent point `r` by summing over Boolean `b` with the +multilinear equality polynomial `eq(r, b)`. + +For a product spec `j`: + +```text +Prod_j^i(b) = Prod_j^{i+1}(b, 0) * Prod_j^{i+1}(b, 1) + +Prod_j^i(r) = + sum_b eq(r, b) * Prod_j^{i+1}(b, 0) * Prod_j^{i+1}(b, 1) +``` + +For a LogUp spec `k`, with numerator `P_k` and denominator `Q_k`, the relation is fraction addition: + +```text +P_k^i(b) / Q_k^i(b) = + P_k^{i+1}(b, 0) / Q_k^{i+1}(b, 0) + + P_k^{i+1}(b, 1) / Q_k^{i+1}(b, 1) +``` + +Equivalently: + +```text +P_k^i(b) = + P_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) + + P_k^{i+1}(b, 1) * Q_k^{i+1}(b, 0) + +Q_k^i(b) = + Q_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) +``` + +Product specs include both read and write specs. LogUp specs contribute two batched polynomials, `P_k` and `Q_k`. A +single transcript-derived batching challenge `alpha` is used for all specs. If there are `n_prod` product specs, the +flattened batching order is: + +```text +Prod_0, ..., Prod_{n_prod-1}, P_0, Q_0, P_1, Q_1, ... +``` + +so the weights are consecutive powers of `alpha`. The batched parent claim is: + +```text +C_i(r) = + sum_j alpha^j * Prod_j^i(r) + + sum_k alpha^{n_prod + 2k} * P_k^i(r) + + sum_k alpha^{n_prod + 2k + 1} * Q_k^i(r) +``` + +Here `alpha^0 = 1`, matching `get_challenge_pows`. + +Substituting the layer relations gives the sumcheck target: + +```text +C_i(r) = sum_b eq(r, b) * T_i(b) + +T_i(b) = + sum_j alpha^j * Prod_j^{i+1}(b, 0) * Prod_j^{i+1}(b, 1) + + + sum_k alpha^{n_prod + 2k} * ( + P_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) + + P_k^{i+1}(b, 1) * Q_k^{i+1}(b, 0) + ) + + + sum_k alpha^{n_prod + 2k + 1} * ( + Q_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) + ) +``` + +If the sumcheck samples point `rho`, its final claim is: + +```text +claim_out = eq(r, rho) * T_i(rho) +``` + +where `eq(r, rho)` is accumulated round-by-round as: + +```text +eq_next = eq_cur * (xi * rho_i + (1 - xi) * (1 - rho_i)) +``` + +Verifier view for layer `i`: + +1. Verify the sumcheck proof for the claim `C_i(r)`. The proof returns the point `rho` and a final evaluation. The child + layer claims at that point are: + + ```text + Prod_j^{i+1}(rho, 0), Prod_j^{i+1}(rho, 1) + + P_k^{i+1}(rho, 0), P_k^{i+1}(rho, 1), + Q_k^{i+1}(rho, 0), Q_k^{i+1}(rho, 1) + ``` + + From those claims, compute: + + ```text + T_i(rho) = + sum_j alpha^j * Prod_j^{i+1}(rho, 0) * Prod_j^{i+1}(rho, 1) + + + sum_k alpha^{n_prod + 2k} * ( + P_k^{i+1}(rho, 0) * Q_k^{i+1}(rho, 1) + + P_k^{i+1}(rho, 1) * Q_k^{i+1}(rho, 0) + ) + + + sum_k alpha^{n_prod + 2k + 1} * ( + Q_k^{i+1}(rho, 0) * Q_k^{i+1}(rho, 1) + ) + ``` + + The sumcheck final evaluation must equal `eq(r, rho) * T_i(rho)`. + +2. If layer `i + 1` is not terminal, derive the next layer's expected sum after sampling `mu` and a fresh batching + challenge `alpha_next`: + + ```text + r_next = (rho, mu) + + C_{i+1}(r_next) = + sum_j alpha_next^j * Prod_j^{i+1}(r_next) + + sum_k alpha_next^{n_prod + 2k} * P_k^{i+1}(r_next) + + sum_k alpha_next^{n_prod + 2k + 1} * Q_k^{i+1}(r_next) + ``` + + Each carried claim is the multilinear interpolation at `mu`: + + ```text + Prod_j^{i+1}(r_next) = + (1 - mu) * Prod_j^{i+1}(rho, 0) + + mu * Prod_j^{i+1}(rho, 1) + ``` + + with the same interpolation for each LogUp `P_k` and `Q_k`. Specs that have no remaining reduction round do not + contribute to the next expected sum. + +Both LogUp cross terms in `T_i` are part of the semantic statement. If an implementation splits or reuses accumulators, +the final sumcheck target must still include the `P0 * Q1 + P1 * Q0` numerator-cross contribution and the `Q0 * Q1` +denominator-cross contribution with their corresponding powers of `alpha`. + ## TowerInputAir (`src/tower/input/air.rs`) ### Columns @@ -176,9 +319,8 @@ AIR’s columns, constraints, or interactions change. - Transcript data `p_xi_0`, `p_xi_1`, `q_xi_0`, `q_xi_1`, interpolated `p_xi`, `q_xi`. - Challenges `lambda`, `lambda_prime`, `mu`. - Running powers `pow_lambda`, `pow_lambda_prime`. - - Accumulators: `acc_sum` for the standard `(p_xi + lambda * q_xi)` contribution, `acc_p_cross`, `acc_q_cross` for - the - log-up initialization terms that previously lived in their own AIR. + - Accumulators: `acc_sum` for the standard `(p_xi + lambda * q_xi)` contribution, plus `acc_p_cross` and + `acc_q_cross` for the LogUp numerator-cross and denominator-cross terms in the layer relation. ### Constraints @@ -196,29 +338,8 @@ AIR’s columns, constraints, or interactions change. - Receives the layer challenge message with both `lambda` and `lambda_prime` on the first row. - Final row sends `TowerLogupClaimMessage { lambda_claim = acc_sum, lambda_prime_claim = acc_q_cross }` plus - `num_logup_count`. (The `acc_p_cross` value remains internal because only the denominator-style accumulator is needed - upstream at the moment.) - -## TowerLogUpSumCheckClaimAir (`src/tower/layer/logup_claim/air.rs`) - -### Columns & Loops - -- Shares the `(proof_idx, idx, layer_idx)` nested-loop structure and reuses `index_id` to count accumulator rows. -- Columns mirror the product AIR plus the denominator evaluations: `is_enabled`, the loop counters/flags, - `(p_xi_0, p_xi_1, q_xi_0, q_xi_1)`, interpolated `(p_xi, q_xi)`, `lambda`, `mu`, `pow_lambda`, `acc_sum`, - `index_id`, and `num_logup_count`. - -### Constraints - -- Recomputes both `p_xi` and `q_xi` in every row. -- Uses the existing log-up contribution `acc_sum_next = acc_sum + (lambda * q_xi) * pow_lambda`. -- `index_id` obeys the same initialization/increment/final-row checks against `num_logup_count` as the product AIR. -- Only the final accumulator row per `(proof_idx, idx, layer_idx)` may drive `TowerLogupClaimBus`. - -### Interactions - -- Layer metadata is consumed on the row flagged by `is_first_layer`. -- Folded logup claim is emitted exactly once per triple when the accumulator row counter reaches `num_logup_count`. + `num_logup_count`. `acc_p_cross` remains in-trace in the current message shape, but the ground-truth layer relation + still requires the numerator-cross contribution to be accounted for when forming the final sumcheck target. ## TowerLayerSumcheckAir (`src/tower/sumcheck/air.rs`) From e7c97cbfa8c450a912a80a111ce8922fbbbf1e18 Mon Sep 17 00:00:00 2001 From: kunxian xia Date: Thu, 25 Jun 2026 16:58:46 +0800 Subject: [PATCH 2/5] revert --- ceno_recursion_v2/docs/tower_air_spec.md | 30 ++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/ceno_recursion_v2/docs/tower_air_spec.md b/ceno_recursion_v2/docs/tower_air_spec.md index f3325be28..3afa1ebf9 100644 --- a/ceno_recursion_v2/docs/tower_air_spec.md +++ b/ceno_recursion_v2/docs/tower_air_spec.md @@ -319,8 +319,9 @@ denominator-cross contribution with their corresponding powers of `alpha`. - Transcript data `p_xi_0`, `p_xi_1`, `q_xi_0`, `q_xi_1`, interpolated `p_xi`, `q_xi`. - Challenges `lambda`, `lambda_prime`, `mu`. - Running powers `pow_lambda`, `pow_lambda_prime`. - - Accumulators: `acc_sum` for the standard `(p_xi + lambda * q_xi)` contribution, plus `acc_p_cross` and - `acc_q_cross` for the LogUp numerator-cross and denominator-cross terms in the layer relation. + - Accumulators: `acc_sum` for the standard `(p_xi + lambda * q_xi)` contribution, `acc_p_cross`, `acc_q_cross` for + the + log-up initialization terms that previously lived in their own AIR. ### Constraints @@ -338,8 +339,29 @@ denominator-cross contribution with their corresponding powers of `alpha`. - Receives the layer challenge message with both `lambda` and `lambda_prime` on the first row. - Final row sends `TowerLogupClaimMessage { lambda_claim = acc_sum, lambda_prime_claim = acc_q_cross }` plus - `num_logup_count`. `acc_p_cross` remains in-trace in the current message shape, but the ground-truth layer relation - still requires the numerator-cross contribution to be accounted for when forming the final sumcheck target. + `num_logup_count`. (The `acc_p_cross` value remains internal because only the denominator-style accumulator is needed + upstream at the moment.) + +## TowerLogUpSumCheckClaimAir (`src/tower/layer/logup_claim/air.rs`) + +### Columns & Loops + +- Shares the `(proof_idx, idx, layer_idx)` nested-loop structure and reuses `index_id` to count accumulator rows. +- Columns mirror the product AIR plus the denominator evaluations: `is_enabled`, the loop counters/flags, + `(p_xi_0, p_xi_1, q_xi_0, q_xi_1)`, interpolated `(p_xi, q_xi)`, `lambda`, `mu`, `pow_lambda`, `acc_sum`, + `index_id`, and `num_logup_count`. + +### Constraints + +- Recomputes both `p_xi` and `q_xi` in every row. +- Uses the existing log-up contribution `acc_sum_next = acc_sum + (lambda * q_xi) * pow_lambda`. +- `index_id` obeys the same initialization/increment/final-row checks against `num_logup_count` as the product AIR. +- Only the final accumulator row per `(proof_idx, idx, layer_idx)` may drive `TowerLogupClaimBus`. + +### Interactions + +- Layer metadata is consumed on the row flagged by `is_first_layer`. +- Folded logup claim is emitted exactly once per triple when the accumulator row counter reaches `num_logup_count`. ## TowerLayerSumcheckAir (`src/tower/sumcheck/air.rs`) From 7981b504a59e288c31829cefd0037a82501b0855 Mon Sep 17 00:00:00 2001 From: kunxian xia Date: Thu, 25 Jun 2026 19:52:01 +0800 Subject: [PATCH 3/5] add design doc on tower module --- ceno_recursion_v2/docs/tower_module_design.md | 584 ++++++++++++++++++ 1 file changed, 584 insertions(+) create mode 100644 ceno_recursion_v2/docs/tower_module_design.md diff --git a/ceno_recursion_v2/docs/tower_module_design.md b/ceno_recursion_v2/docs/tower_module_design.md new file mode 100644 index 000000000..78927a5b0 --- /dev/null +++ b/ceno_recursion_v2/docs/tower_module_design.md @@ -0,0 +1,584 @@ +# Tower Module Design Notes + +This document records the intended module boundaries for the recursive tower verifier. The lower-level AIR constraints +are specified in `tower_air_spec.md`; this file focuses on how the tower module should interact with the rest of the +recursive verifier circuit. + +The tower module is verifier code. Its primary job is to verify every chip tower proof in every Ceno proof being +recursively verified: for each `(proof_idx, chip_id)`, it replays the native Ceno tower verifier for that chip's tower +proof. Transcript order, shape metadata, batching challenges, and exported claims must stay in lockstep with +`ceno_zkvm::scheme::verifier::TowerVerify`. + +## Scope + +The tower module reduces each `(proof_idx, chip_id)` tower proof to an input-layer claim. It does not own downstream +batch-constraint semantics; the next module checks what that reduced claim means. The interaction contract is captured by +the AIR sequence diagram and the per-AIR contracts below. + +The target readers are both human developers and AI agents. Prefer explicit message contracts, algebraic meanings, and +ordering rules over implicit references to current Rust struct names; the doc should be usable as a debugging guide and +as implementation context. + +## Names And Scopes + +Use these names consistently in diagrams, contracts, and AIR specs. + +| Name | Meaning | Scope / Owner | +|------|---------|---------------| +| `proof_idx` | Logical index of the Ceno proof being recursively verified. | Scopes all per-proof buses; may be represented by the typed bus key instead of a payload field. | +| `chip_id` | VK-assigned unique id for one chip. | Selected by proof-shape; together with `proof_idx`, uniquely identifies one tower proof. | +| `layer_idx` | Active tower layer currently being reduced. | Local to one `(proof_idx, chip_id)` tower proof. | +| `tidx` | Transcript cursor at which an observe/sample event occurs. | Per proof; tower AIRs must match the native verifier transcript order. | +| `num_layers` | Number of active tower layers for this chip tower proof. | VK/proof-shape metadata consumed by `TowerInputAir` and `TowerLayerAir`. | +| `num_read_specs` | Number of read product specs for this chip. | VK-derived; forwarded to read product claim folding. | +| `num_write_specs` | Number of write product specs for this chip. | VK-derived; forwarded to write product claim folding. | +| `num_logup_specs` | Number of LogUp specs for this chip. | VK-derived; forwarded to LogUp claim folding. | +| `num_prod_specs` | `num_read_specs + num_write_specs`. | Derived from VK metadata; controls product batching width. | +| `alpha` / `lambda` | Batching challenge for product and LogUp specs. The math spec uses `alpha`; current layer AIRs often use `lambda` for the active challenge. | Fresh per layer; powers are reused across specs only within that layer. | +| `lambda_prime` | Previous layer batching challenge used by claim AIRs for the companion/current claim path. Root layer pins it to one. | Propagated by `TowerLayerAir`. | +| `rho` | Sumcheck evaluation point produced by the layer sumcheck rounds. | Produced by `TowerLayerSumcheckAir` for one layer. | +| `r_i` | Current layer point in the expected claim `C_i(r_i)`. | Carried by `TowerLayerAir` from one layer to the next. | +| `mu` | Merge/interpolation challenge for deriving the next layer point and expected claim. | Sampled by `TowerLayerAir`; used as the next layer's first challenge when another layer remains. | +| `T_i(rho)` | Current-layer expression computed from child claims at `rho`. | Derived by `TowerLayerAir` from read/write/LogUp `current_claim` outputs. | +| `C_{i+1}(rho, mu)` | Next layer's expected batched claim after interpolation at `mu`. | Derived by `TowerLayerAir` from read/write/LogUp `next_claim` outputs when another layer remains. | +| `current_claim` | A claim AIR's contribution to `T_i(rho)`. | Emitted by product/LogUp claim AIRs; corresponds to current Rust `lambda_prime_claim`. | +| `next_claim` | A claim AIR's contribution to `C_{i+1}(rho, mu)`. | Emitted by product/LogUp claim AIRs; corresponds to current Rust `lambda_claim`. | +| `input_layer_claim` | Final reduced claim after the terminal/input layer. | Emitted by tower to `MainBus`; downstream modules interpret its constraint meaning. | + +## Module Interaction Diagram + +The diagram below is the high-level circuit contract. It shows which modules own shape, tower verification, and +downstream claim checking for each `(proof_idx, chip_id)` tower proof. + +```mermaid +flowchart LR + PS[ProofShapeModule] + TW[TowerModule] + MAIN[MainModule] + DOWN[Downstream batch-constraint modules] + + PS -- "TowerModuleBus\n(proof_idx, chip_id, num_layers,\nnum_read_specs, num_write_specs, num_logup_specs)" --> TW + PS -- "AirShapeBus / shape lookups\nVK-derived counts and trace shape" --> TW + + TW -- "MainBus\n(proof_idx, chip_id, final_tidx, input_layer_claim)" --> MAIN + + MAIN -- "main claim / expression claim" --> DOWN +``` + +Boundary rules: + +- `ProofShapeModule` is the source of VK-derived tower shape metadata. +- `TowerModule` verifies the tower proof and emits reduced claims; it does not interpret downstream constraint + semantics. +- `MainModule` and downstream modules check the reduced claim against the rest of the recursive verifier statement. + +## External Module Boundaries + +### Proof Shape Module + +There is one native tower proof per chip per Ceno proof. The proof-shape side owns the VK-derived shape metadata for each +`(proof_idx, chip_id)` pair and starts each tower verification by sending `TowerModuleMessage`: + +```text +( + proof_idx, + chip_id, + num_layers, + num_read_specs, + num_write_specs, + num_logup_specs +) +``` + +where: + +- `proof_idx` scopes the Ceno proof being recursively verified. In the current typed per-proof bus implementation this + may be the bus key rather than a field inside the message payload; +- `chip_id` is the VK-assigned unique id for this chip, selected by proof-shape; +- `num_layers` is the number of active tower layers for this chip's tower proof in this Ceno proof; +- `num_read_specs`, `num_write_specs`, and `num_logup_specs` are selected from the chip VK. + +The proof-shape-to-tower message is shape metadata only. It should not carry transcript cursor state such as +`tower_tidx`; transcript scheduling belongs to the transcript/tower transcript contract. It also should not introduce a +separate tower row identity: `(proof_idx, chip_id)` uniquely identifies the tower proof. + +For the selected chip VK, proof-shape derives: + +```text +num_read_specs = num_read_count +num_write_specs = num_write_count +num_logup_specs = num_logup_count +num_prod_specs = num_read_specs + num_write_specs +``` + +These are extracted from the child VK constraint system, not from the proof. Tower uses them as the trusted shape for +the corresponding chip tower proof: + +- `tower_proof.prod_specs_eval.len() == num_prod_specs`; +- `tower_proof.logup_specs_eval.len() == num_logup_specs`; +- initial and per-layer batching use `num_prod_specs + 2 * num_logup_specs` powers of `alpha`; +- read/write/LogUp claim AIRs use `num_read_specs`, `num_write_specs`, and `num_logup_specs` when folding child claims. + +Tower also relies on proof-shape/metadata checks for: + +- number of product specs; +- number of LogUp specs; +- number of active layers; +- per-layer read/write/LogUp accumulator counts; +- active evaluation-row counts for each spec; +- trace heights and padding rules for tower AIRs. + +Shape metadata is security relevant. If the recursive verifier accepts a different active-round schedule or different +product/LogUp spec counts than the native verifier, it can verify a different tower statement. + +Current implementation note: `TowerModuleMessage` only carries `(idx, tidx, n_logup)`, is sent from the proof-shape +summary row, and the tower input trace is still collapsed to one record per proof. The intended design is one +`TowerModuleBus` shape message and one tower input record per `(proof_idx, chip_id)`, carrying +`(proof_idx, chip_id, num_layers, num_read_specs, num_write_specs, num_logup_specs)`. + +### Main / Downstream Claim Module + +`TowerInputAir` emits the reduced tower claim through `MainBus`, keyed by `proof_idx`: + +```text +MainMessage { chip_id, tidx, claim } +``` + +This claim is the tower output after all active layers have been reduced to the input layer. The downstream main/batch +constraint path owns the interpretation of that claim against constraint expressions or committed evaluations. + +The tower output boundary should specify: + +- the final transcript cursor after tower processing; +- the reduced input-layer claim; +- the final layer batching challenge and merge challenge if downstream modules need them; +- the exact `chip_id` and `proof_idx` scoping rules for multi-proof aggregation. + +## Internal Tower Structure + +The current internal split is: + +```text +TowerInputAir + receives the tower task, samples the initial tower challenge, and sends/receives layer-level messages + +TowerLayerAir + orchestrates layer-to-layer reduction, count checks, layer challenges, and final layer output + +TowerLayerSumcheckAir + verifies each layer sumcheck proof round and returns (claim_out, eq_at_r_prime) + +TowerProdSumCheckClaimAir + folds read/write product child claims for each layer + +TowerLogupSumCheckClaimAir + folds LogUp child claims for each layer +``` + +The internal AIR split may change, but the module boundary should continue to expose protocol-level values: + +```text +current layer: T_i(rho) +next layer: C_{i+1}(rho, mu) +terminal layer: reduced input-layer claim +``` + +Column names such as `*_prime` are implementation details and should not define the protocol contract. + +When the current Rust bus structs use `lambda_claim` and `lambda_prime_claim`, the contract-level names are: + +```text +lambda_claim = next_claim = contribution to C_{i+1}(rho, mu) +lambda_prime_claim = current_claim = contribution to T_i(rho) +``` + +## AIR Contract Priority + +The first specification target is the contract of each AIR: what statement it receives, what statement it emits, and +which other AIR is responsible for checking the linked statement. Local row constraints are secondary; once the contract +is fixed, missing local constraints can be filled in mechanically. + +For tower, an AIR contract should state: + +- the exact bus messages received and sent; +- the transcript events the AIR owns, in native verifier order; +- the shape metadata the AIR trusts and where that metadata comes from; +- the algebraic meaning of each emitted claim; +- the `proof_idx` and `chip_id` scope of every message; +- the zero-work behavior when a chip has no tower interactions. + +The corresponding `tower_air_spec.md` should start each AIR section with this contract before listing column-level +constraints. + +## AIR Interaction Sequence + +For both human developers and AI agents, the diagram below is the first-pass debugging reference for one +`(proof_idx, chip_id)` tower proof. It shows the expected order of bus sends/receives and transcript interactions. A +constraint error should usually map to one arrow: either the producer did not send the matching message, the consumer +used the wrong scope/counter, or a transcript event was placed at the wrong `tidx`. Exact cursor arithmetic is handled by +the `tidx` columns and `tower_transcript_len`. + +```mermaid +sequenceDiagram + participant PS as ProofShapeAir + participant TI as TowerInputAir + participant TR as TranscriptAir + participant TL as TowerLayerAir + participant PR as TowerProdReadSumCheckClaimAir + participant PW as TowerProdWriteSumCheckClaimAir + participant LU as TowerLogupSumCheckClaimAir + participant SC as TowerLayerSumcheckAir + participant M as MainAir + + PS->>TI: 0. TowerModuleBus(proof_idx, chip_id, num_layers, num_read_specs, num_write_specs, num_logup_specs) + TI->>TR: 1. TranscriptBus sample alpha powers for initial batching + TI->>TR: 2. TranscriptBus sample product_sum / root point + TI->>TL: 3. TowerLayerInputBus(chip_id, layer_tidx, num_layers, spec counts, r0_claim, w0_claim, q0_claim) + + loop each active tower layer + alt layer_idx > 0 + TL->>TR: 4. TranscriptBus sample fresh alpha/lambda for layer i + TL->>SC: 5. TowerSumcheckInputBus(chip_id, layer_idx, claim = C_i(r)) + loop each sumcheck round + SC->>TR: 6a. TranscriptBus observe ev1, ev2, ev3 + SC->>TR: 6b. TranscriptBus sample rho round challenge + SC-->>SC: 6c. update sumcheck claim and eq(r, rho) + end + SC->>TL: 7. TowerSumcheckOutputBus(chip_id, layer_idx, final_eval, eq_at_r_prime) + end + + TL->>PR: 8a. TowerProdReadClaimInputBus(chip_id, layer_idx, lambda, lambda_prime, mu, claim_tidx) + TL->>PW: 8b. TowerProdWriteClaimInputBus(chip_id, layer_idx, lambda, lambda_prime, mu, claim_tidx) + TL->>LU: 8c. TowerLogupClaimInputBus(chip_id, layer_idx, lambda, lambda_prime, mu, claim_tidx) + PR->>TR: 9a. TranscriptBus observe read child claims + PW->>TR: 9b. TranscriptBus observe write child claims + LU->>TR: 9c. TranscriptBus observe LogUp child claims + PR->>TL: 10a. TowerProdReadClaimBus(chip_id, layer_idx, read_next_claim, read_current_claim, num_read_count) + PW->>TL: 10b. TowerProdWriteClaimBus(chip_id, layer_idx, write_next_claim, write_current_claim, num_write_count) + LU->>TL: 10c. TowerLogupClaimBus(chip_id, layer_idx, logup_next_claim, logup_current_claim, num_logup_count) + TL->>TL: 11. derive T_i(rho) and check final_eval = eq(r, rho) * T_i(rho) + TL->>TR: 12. TranscriptBus sample merge challenge mu + + alt non-last layer + TL->>SC: 13a. TowerSumcheckChallengeBus(chip_id, layer_idx, mu as next layer round 0 challenge) + TL->>TL: 13b. derive next expected claim C_{i+1}(rho, mu) + else last layer + TL->>TL: 13c. terminal layer produces input_layer_claim + end + end + + TL->>TI: 14. TowerLayerOutputBus(chip_id, final_tidx, input_layer_claim, lambda, mu) + TI->>M: 15. MainBus(chip_id, final_tidx, input_layer_claim) +``` + +## Per-AIR Contracts + +Read these contracts as details for each participant in the sequence diagram above. + +### ProofShapeAir + +`ProofShapeAir` is outside the tower module, but it is the source of tower shape truth. For each `(proof_idx, chip_id)` +tower proof, it selects the chip VK metadata and sends: + +```text +TowerModuleBus( + proof_idx, + chip_id, + num_layers, + num_read_specs, + num_write_specs, + num_logup_specs +) +``` + +The counts are VK-derived: + +```text +num_read_specs = num_read_count +num_write_specs = num_write_count +num_logup_specs = num_logup_count +num_prod_specs = num_read_specs + num_write_specs +``` + +`ProofShapeAir` must not derive these counts from the proof. It may also publish per-layer count metadata through +`AirShapeBus`, but that metadata must agree with the same selected VK and tower instance. + +### TowerInputAir + +`TowerInputAir` is the per-`(proof_idx, chip_id)` tower entry and exit boundary. + +Receives: + +```text +TowerModuleBus( + proof_idx, + chip_id, + num_layers, + num_read_specs, + num_write_specs, + num_logup_specs +) +TowerLayerOutputBus(chip_id, final_tidx, layer_idx_end, input_layer_claim, lambda, mu) +``` + +Sends: + +```text +TowerLayerInputBus( + chip_id, + layer_tidx, + num_layers, + num_read_specs, + num_write_specs, + num_logup_specs, + r0_claim, + w0_claim, + q0_claim +) +MainBus(chip_id, final_tidx, input_layer_claim) +TranscriptBus samples/observations for the initial tower transcript prefix +``` + +Contract: + +- one active `TowerInputAir` row corresponds to one `(proof_idx, chip_id)` tower proof; +- it anchors the VK-derived spec counts to the tower instance, but does not derive them; +- it obtains tower transcript cursor state from the transcript/tower transcript contract, not from `TowerModuleBus`; +- it forwards `num_layers`, `num_read_specs`, `num_write_specs`, and `num_logup_specs` to `TowerLayerAir`; +- it routes the final reduced input-layer claim to `MainBus` under the same `chip_id`; +- if `num_layers = 0` or all spec counts are zero, it must not create active layer work and must use the documented + zero-work output behavior. + +### TowerLayerAir + +`TowerLayerAir` is the layer orchestrator. It owns the transition from one tower layer claim to the next expected claim. + +Receives: + +```text +TowerLayerInputBus( + chip_id, + layer_tidx, + num_layers, + num_read_specs, + num_write_specs, + num_logup_specs, + r0_claim, + w0_claim, + q0_claim +) +TowerSumcheckOutputBus(chip_id, layer_idx, tidx_after_sumcheck, claim_out, eq_at_r_prime) +TowerProdReadClaimBus(chip_id, layer_idx, read_next_claim, read_current_claim, num_read_count) +TowerProdWriteClaimBus(chip_id, layer_idx, write_next_claim, write_current_claim, num_write_count) +TowerLogupClaimBus(chip_id, layer_idx, logup_next_claim, logup_current_claim, num_logup_count) +optional AirShapeBus count metadata for cross-checking the selected chip +``` + +Sends: + +```text +TowerSumcheckInputBus(chip_id, layer_idx, is_last_layer, sumcheck_tidx, claim) +TowerProdReadClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) +TowerProdWriteClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) +TowerLogupClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) +TowerSumcheckChallengeBus(chip_id, layer_idx, round, challenge) +TowerLayerOutputBus(chip_id, final_tidx, layer_idx_end, input_layer_claim, lambda, mu) +TranscriptBus samples for layer batching and merge challenges +``` + +Contract: + +- for layer `i`, it supplies the claim that the layer sumcheck must verify; +- from claim AIR outputs, it derives `T_i(rho)` and checks the sumcheck final evaluation against + `eq(r_i, rho) * T_i(rho)`; +- if another layer remains, it derives the next expected claim `C_{i+1}(rho, mu)`; +- if this is the terminal/input layer, it emits the reduced `input_layer_claim`; +- it checks read/write/LogUp counts against the proof-shape metadata forwarded from `TowerInputAir`, not against + proof-provided lengths alone. + +### TowerLayerSumcheckAir + +`TowerLayerSumcheckAir` owns only the sumcheck transcript and sumcheck algebra for one layer. + +Receives: + +```text +TowerSumcheckInputBus(chip_id, layer_idx, is_last_layer, sumcheck_tidx, claim) +TowerSumcheckChallengeBus(chip_id, layer_idx, previous_round, challenge) +TranscriptBus observations for prover messages +TranscriptBus samples for rho challenges +``` + +Sends: + +```text +TowerSumcheckOutputBus(chip_id, layer_idx, tidx_after_sumcheck, final_evaluation, eq_at_r_prime) +TowerSumcheckChallengeBus(chip_id, layer_idx, round, challenge) +``` + +Contract: + +- it verifies the univariate sumcheck rounds starting from the input claim; +- it emits the final sumcheck evaluation and `eq_at_r_prime = eq(r_i, rho)`; +- it does not know product or LogUp semantics; `TowerLayerAir` interprets the final evaluation against `T_i(rho)`. + +### TowerProdReadSumCheckClaimAir and TowerProdWriteSumCheckClaimAir + +The read and write product claim AIRs have the same contract, with separate buses. + +Receives: + +```text +TowerProd{Read,Write}ClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) +TranscriptBus observations for active child product claims +``` + +Sends: + +```text +TowerProd{Read,Write}ClaimBus(chip_id, layer_idx, next_claim, current_claim, num_prod_count) +``` + +Contract: + +- `current_claim` is this product group's contribution to `T_i(rho)`; +- `next_claim` is this product group's contribution to `C_{i+1}(rho, mu)`; +- `num_prod_count` is the number of active read or write product specs for the selected chip/layer; +- the AIR owns the transcript observation of the product child claims but not the layer-level sumcheck check. + +### TowerLogupSumCheckClaimAir + +`TowerLogupSumCheckClaimAir` folds the LogUp numerator/denominator child claims. + +Receives: + +```text +TowerLogupClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) +TranscriptBus observations for active LogUp child claims +``` + +Sends: + +```text +TowerLogupClaimBus(chip_id, layer_idx, next_claim, current_claim, num_logup_count) +``` + +Contract: + +- `current_claim` is the LogUp contribution to `T_i(rho)` using the native numerator/denominator reduction; +- `next_claim` is the LogUp contribution to `C_{i+1}(rho, mu)` after interpolation at `mu`; +- `num_logup_count` is the number of active LogUp specs for the selected chip/layer; +- it observes LogUp child claims in the same order as the native verifier. + +## Bus Summary + +All quantities are scoped by `proof_idx`. `PermutationCheck` buses are typed per-proof permutation buses unless noted +otherwise. Tower-specific buses are further scoped by `chip_id`, so `(proof_idx, chip_id)` identifies the chip tower +proof inside the recursive verifier. The `TowerModuleBus` payload is the proof-shape metadata: +`(proof_idx, chip_id, num_layers, num_read_specs, num_write_specs, num_logup_specs)`. If `proof_idx` is already the +typed per-proof bus key, the implementation may omit it from the message struct while preserving it as part of the +logical contract. + +| Bus | Bus Type | Producer | Consumer | Payload | Scope | Meaning | +|-----|----------|----------|----------|---------|-------|---------| +| `TowerModuleBus` | `PermutationCheck` | `ProofShapeAir` | `TowerInputAir` | `(proof_idx, chip_id, num_layers, num_read_specs, num_write_specs, num_logup_specs)` | per `(proof_idx, chip_id)` tower proof | Starts one tower verification with VK-derived shape metadata. | +| `TowerLayerInputBus` | `PermutationCheck` | `TowerInputAir` | `TowerLayerAir` | `(chip_id, layer_tidx, num_layers, num_read_specs, num_write_specs, num_logup_specs, r0_claim, w0_claim, q0_claim)` | per `(proof_idx, chip_id)` tower proof | Anchors the initial batched claims and shape counts for all tower layers. | +| `TowerLayerOutputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerInputAir` | `(chip_id, final_tidx, layer_idx_end, input_layer_claim, lambda, mu)` | per `(proof_idx, chip_id)` tower proof | Returns the reduced terminal/input-layer tower claim. | +| `TowerProdReadClaimInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerProdReadSumCheckClaimAir` | `(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu)` | per `(proof_idx, chip_id, layer_idx)` | Starts read product child-claim folding for one layer. | +| `TowerProdReadClaimBus` | `PermutationCheck` | `TowerProdReadSumCheckClaimAir` | `TowerLayerAir` | `(chip_id, layer_idx, read_next_claim, read_current_claim, num_read_count)` | per `(proof_idx, chip_id, layer_idx)` | Returns read product contributions to `C_{i+1}(rho, mu)` and `T_i(rho)`. | +| `TowerProdWriteClaimInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerProdWriteSumCheckClaimAir` | `(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu)` | per `(proof_idx, chip_id, layer_idx)` | Starts write product child-claim folding for one layer. | +| `TowerProdWriteClaimBus` | `PermutationCheck` | `TowerProdWriteSumCheckClaimAir` | `TowerLayerAir` | `(chip_id, layer_idx, write_next_claim, write_current_claim, num_write_count)` | per `(proof_idx, chip_id, layer_idx)` | Returns write product contributions to `C_{i+1}(rho, mu)` and `T_i(rho)`. | +| `TowerLogupClaimInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerLogupSumCheckClaimAir` | `(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu)` | per `(proof_idx, chip_id, layer_idx)` | Starts LogUp numerator/denominator child-claim folding for one layer. | +| `TowerLogupClaimBus` | `PermutationCheck` | `TowerLogupSumCheckClaimAir` | `TowerLayerAir` | `(chip_id, layer_idx, logup_next_claim, logup_current_claim, num_logup_count)` | per `(proof_idx, chip_id, layer_idx)` | Returns LogUp contributions to `C_{i+1}(rho, mu)` and `T_i(rho)`. | +| `TowerSumcheckInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerLayerSumcheckAir` | `(chip_id, layer_idx, is_last_layer, sumcheck_tidx, claim)` | per `(proof_idx, chip_id, layer_idx)` | Starts the layer sumcheck from expected claim `C_i(r_i)`. | +| `TowerSumcheckOutputBus` | `PermutationCheck` | `TowerLayerSumcheckAir` | `TowerLayerAir` | `(chip_id, layer_idx, tidx_after_sumcheck, final_evaluation, eq_at_r_prime)` | per `(proof_idx, chip_id, layer_idx)` | Returns the sumcheck final evaluation and `eq(r_i, rho)`. | +| `TowerSumcheckChallengeBus` | `PermutationCheck` | `TowerLayerAir`, `TowerLayerSumcheckAir` | `TowerLayerSumcheckAir` | `(chip_id, layer_idx, round, challenge)` | per `(proof_idx, chip_id, layer_idx, round)` | Links each round challenge, including the layer-to-layer `mu` when it seeds the next layer. | +| `TranscriptBus` | `PermutationCheck` | `TranscriptAir` for samples; tower AIRs for observations | `TowerInputAir`, `TowerLayerAir`, `TowerLayerSumcheckAir`, product/logup claim AIRs for samples; `TranscriptAir` for observations | `(tidx, value, access metadata)` | per-proof | Enforces native verifier observe/sample order. | +| `AirShapeBus` | `Lookup` | `ProofShapeAir` | `TowerLayerAir` | VK-derived trace/count metadata | per-proof | Cross-checks selected tower shape against proof-shape metadata. | +| `MainBus` | `PermutationCheck` | `TowerInputAir` | `MainAir` | `(chip_id, final_tidx, input_layer_claim)` | per `(proof_idx, chip_id)` main claim | Exports the reduced tower claim for downstream constraint checking. | + +## Layer Verification Contract + +For each non-terminal layer `i`, tower verifies two related values from the same child-layer claims. + +First, it verifies the current sumcheck output: + +```text +sumcheck_final_eval = eq(r_i, rho) * T_i(rho) +``` + +where `T_i(rho)` is computed from: + +```text +Prod_j^{i+1}(rho, 0), Prod_j^{i+1}(rho, 1) +P_k^{i+1}(rho, 0), P_k^{i+1}(rho, 1) +Q_k^{i+1}(rho, 0), Q_k^{i+1}(rho, 1) +``` + +Second, if another layer remains, it derives the next layer's expected claim: + +```text +C_{i+1}(rho, mu) +``` + +using interpolation at `mu` and fresh `alpha_next` powers. Specs that have no remaining active round must not contribute +to this next expected claim. + +## Edge Cases / Zero Work Behavior + +These cases must be explicit because they affect bus multiplicities, transcript cursor movement, and whether inactive +rows are allowed to send messages. + +- **Zero tower work:** if a selected `(proof_idx, chip_id)` has `num_layers = 0` or all spec counts are zero, + `TowerInputAir` must still consume the proof-shape task, but it must not create active layer, sumcheck, product, or + LogUp work. The exported claim and transcript movement must match the native verifier's documented zero-work behavior. +- **Zero read specs:** `TowerProdReadSumCheckClaimAir` has no active child claims. It must create no positive-count read + folding work; any required zero-count boundary message must be fully scoped and masked. The read contribution to both + `T_i(rho)` and `C_{i+1}(rho, mu)` is zero. +- **Zero write specs:** `TowerProdWriteSumCheckClaimAir` has no active child claims. It must create no positive-count + write folding work; any required zero-count boundary message must be fully scoped and masked. The write contribution + to both `T_i(rho)` and `C_{i+1}(rho, mu)` is zero. +- **Zero LogUp specs:** `TowerLogupSumCheckClaimAir` has no active child claims. It must create no positive-count LogUp + folding work; any required zero-count boundary message must be fully scoped and masked. The LogUp contribution to both + `T_i(rho)` and `C_{i+1}(rho, mu)` is zero. +- **Inactive specs in later layers:** a spec that has no remaining active round must not contribute to the next expected + claim and must not consume proof-provided child evaluations for that inactive layer. +- **Terminal/input layer:** the terminal layer checks the current sumcheck output against `T_i(rho)` and then emits + `input_layer_claim`. It must not derive or send a non-terminal `C_{i+1}(rho, mu)` claim. +- **Shape mismatch:** if proof-provided lengths, active row counts, or layer counts disagree with proof-shape/VK + metadata, the recursive verifier must reject through unsatisfied constraints rather than adapting to the proof shape. +- **Padding rows:** padded rows may carry default values only when their send/receive multiplicities are fully masked. + A padded row must not create transcript, bus, or count effects. + +## Debugging Guide + +Use the sequence diagram first, then this table. Most failures should reduce to one producer/consumer pair, one scope +field, or one transcript cursor. + +| Symptom | Check first | Likely broken contract | +|---------|-------------|------------------------| +| `TowerModuleBus` receive does not match | `proof_idx`, `chip_id`, and VK-derived counts from proof-shape | Proof-shape sent the wrong tower task or tower consumed it under the wrong scope. | +| Multiple or missing tower input rows for a chip | one active `TowerInputAir` row per `(proof_idx, chip_id)` | Tower proof identity is not keyed exactly by `(proof_idx, chip_id)`. | +| Product or LogUp claim count mismatch | `num_read_specs`, `num_write_specs`, `num_logup_specs`, and per-layer active counts | Claim AIR is using proof-provided lengths or stale forwarded counts instead of proof-shape metadata. | +| Sumcheck rounds fail before final evaluation | `TowerSumcheckInputBus`, round challenges, and transcript observations | `TowerLayerSumcheckAir` is not replaying the native sumcheck transcript/algebra for this layer. | +| Sumcheck final evaluation fails | `eq_at_r_prime`, `current_claim` outputs, and construction of `T_i(rho)` | `TowerLayerAir` assembled the current-layer expression incorrectly, or the claim AIRs swapped current/next claims. | +| Next layer expected claim is wrong | `next_claim` outputs, `mu`, inactive spec masks, and fresh `alpha_next` powers | `TowerLayerAir` derived `C_{i+1}(rho, mu)` with the wrong interpolation challenge or batching weights. | +| Terminal layer still expects another layer | `layer_idx`, `num_layers`, and `is_last_layer` | Terminal/non-terminal gating is wrong, causing an invalid `C_{i+1}` transition. | +| `TranscriptBus` failure at a specific `tidx` | nearest numbered arrow in the AIR sequence diagram | The owner AIR observed or sampled at the wrong cursor, wrong order, or wrong multiplicity. | +| `MainBus` claim mismatch | `TowerLayerOutputBus` and `TowerInputAir` export row | Tower emitted the wrong `input_layer_claim` or changed `chip_id`/`final_tidx` at the boundary. | +| Cross-proof or cross-chip leakage | all bus scope fields and typed per-proof bus instances | A message omitted `chip_id`, used the wrong per-proof bus key, or reused state across tower proofs. | + +## Design Invariants + +- **Transcript lockstep:** every observe/sample event must match the native verifier order, labels, and cursor length. +- **Fresh layer batching:** each layer samples a fresh batching challenge; specs within that layer use its powers. +- **Shape agreement:** proof-shape metadata must force the same active spec/layer schedule as the native verifier. +- **No hidden semantics in column names:** protocol docs should describe `T_i(rho)` and `C_{i+1}(rho, mu)`, not rely on + `_prime` or other implementation-specific column names. +- **Downstream ownership:** tower emits the reduced claim; downstream modules check the claim's constraint semantics. + +## Open Design Questions + +- Should proof-shape metadata provide per-spec active-round lengths directly, rather than deriving them from counts in + tracegen? +- Should transcript cursor arithmetic remain closed-form in tower AIRs, or be centralized in a schedule/preflight table? From 8a0915f6f3b14be04531e296fb0b792b9b17c677 Mon Sep 17 00:00:00 2001 From: kunxian xia Date: Fri, 26 Jun 2026 22:40:00 +0800 Subject: [PATCH 4/5] docs: update tower AIR and module design specs --- ceno_recursion_v2/docs/tower_air_spec.md | 865 ++++++++++++------ ceno_recursion_v2/docs/tower_module_design.md | 634 ++++++++----- 2 files changed, 990 insertions(+), 509 deletions(-) diff --git a/ceno_recursion_v2/docs/tower_air_spec.md b/ceno_recursion_v2/docs/tower_air_spec.md index 3afa1ebf9..835764fb4 100644 --- a/ceno_recursion_v2/docs/tower_air_spec.md +++ b/ceno_recursion_v2/docs/tower_air_spec.md @@ -4,364 +4,655 @@ This document captures the current behavior of each GKR-related AIR that lives i can reason about constraints or plan refactors without diving back into Rust. Update the relevant section whenever an AIR’s columns, constraints, or interactions change. -## Ground Truth: Layer Reduction Math +## Protocol Math Reference -This section is the semantic source of truth for the tower layer reduction. The AIR-specific sections below describe how -the trace and buses realize these identities; protocol changes must preserve this math. +The tower protocol math is specified in +[tower_module_design.md](tower_module_design.md#protocol-math). This AIR spec should describe how each AIR realizes that +contract through columns, buses, row loops, and local constraints. Keep per-AIR accumulator formulas here when they are +needed to debug constraints, but do not duplicate the global layer-reduction source of truth. -Let layer `i` be the current parent layer and layer `i + 1` the child layer. For binary fan-in, a child point is written -as `(b, t)` with `t in {0, 1}`. The sumcheck proves the value at a parent point `r` by summing over Boolean `b` with the -multilinear equality polynomial `eq(r, b)`. +## TowerInputAir (`src/tower/input/air.rs`) + +### Columns + +| Field | Shape | Description | +|----------------------------|-----------------|----------------------------------------------------------------------| +| `is_enabled` | scalar | Row selector (0 = padding). | +| `proof_idx` | scalar | Outer proof loop index enforced by nested sub-AIRs. | +| `chip_id` | scalar | VK-assigned chip identity for this tower proof. | +| `n_layer` | scalar | Number of active GKR layers for the proof. | +| `is_n_layer_zero` | scalar | Flag for `n_layer == 0` (drives “no interaction” branches). | +| `is_n_layer_zero_aux` | `IsZeroAuxCols` | Witness used by `IsZeroSubAir` to enforce the zero test. | +| `tidx` | scalar | Transcript cursor at start of the proof. | +| `r0_claim` | `[D_EF]` | Product of read root out-eval pairs returned to `ProofShapeAir`. | +| `w0_claim` | `[D_EF]` | Product of write root out-eval pairs returned to `ProofShapeAir`. | +| `p0_claim` | `[D_EF]` | Root LogUp numerator claim returned to `ProofShapeAir`. | +| `q0_claim` | `[D_EF]` | Root LogUp denominator claim returned to `ProofShapeAir`. | +| `alpha_logup` | `[D_EF]` | Current Rust column name for the root batching challenge `lambda_1`. | +| `r_1` | `[D_EF]` | Root one-variable interpolation point for the initial claim. | +| `initial_tower_claim` | `[D_EF]` | Batched initial claim `C_1(r_1)` sent to `TowerLayerAir`. | +| `input_layer_claim` | `[D_EF]` | Folded claim returned from `TowerLayerAir`. | +| `layer_output_lambda_next` | `[D_EF]` | Batching challenge sampled in the final GKR layer (zeros if unused). | +| `layer_output_mu` | `[D_EF]` | Reduction point sampled in the final GKR layer (zeros if unused). | + +### Row Constraints + +- **Enablement / identity**: Constraints enforce boolean `is_enabled`, padding-after-padding, and one enabled + `TowerInputAir` row per `(proof_idx, chip_id)` tower proof. No additional local tower identifier is used; `chip_id` is + the per-chip tower identity. +- **Zero test**: `IsZeroSubAir` checks `n_layer` against `is_n_layer_zero`, unlocking the “no interaction” path. +- **Input layer defaults**: When `n_layer == 0`, the input-layer claim must be `[0, alpha_logup]` (numerator zero, denominator + equals `alpha_logup`). +- **Transcript math**: Local expressions derive the transcript offsets for batching-challenge sampling, per-layer reductions, and the + xi-sampling window directly from `n_layer`. No auxiliary `n_max` adjustment is needed. +- **Root claim export**: `TowerInputAir` returns `(r0_claim, w0_claim, p0_claim, q0_claim)` to `ProofShapeAir`. + The read/write roots are: + + ```text + r0_claim = product_k r_out_evals[k][0] * r_out_evals[k][1] + w0_claim = product_k w_out_evals[k][0] * w_out_evals[k][1] + ``` + + Proof-shape uses those chip-level values to enforce: + + ```text + prod_chip r0_claim = prod_chip w0_claim + sum_chip p0_claim / q0_claim = 0 / x + ``` +- **Initial claim assembly**: `TowerInputAir` samples the root batching challenge and root interpolation point + `(lambda_1, r_1)`, passes them to the read, write, and LogUp root fold AIRs, and receives the three contributions: + + ```text + initial_tower_claim = read_initial_claim + write_initial_claim + logup_initial_claim + ``` + + Those contributions are computed from the same root out-eval rows that produce + `(r0_claim, w0_claim, p0_claim, q0_claim)`. + +### Interactions + +- **Internal buses** + - `Tower{Read,Write}RootInputBus.send`: starts root product folding with + `(chip_id, claim_tidx, lambda_1, r_1, lambda_1_start, num_prod_count)`. + - `TowerLogupRootInputBus.send`: starts root LogUp folding with + `(chip_id, claim_tidx, lambda_1, r_1, lambda_1_start, num_logup_count)`. + - `Tower{Read,Write}RootBus.receive`: pulls `r0_claim` or `w0_claim` from the product root folds. + - `TowerLogupRootBus.receive`: pulls `(p0_claim, q0_claim)` from the LogUp root fold. + - `Tower{Read,Write}InitBus.receive`: pulls the read/write contributions to `C_1(r_1)`. + - `TowerLogupInitBus.receive`: pulls the LogUp contribution to `C_1(r_1)`. + - `TowerLayerInputBus.send`: emits + `(chip_id, layer_tidx, num_layers, num_read_specs, num_write_specs, num_logup_specs, initial_tower_claim)` + when interactions exist. + - `TowerLayerOutputBus.receive`: pulls reduced + `(chip_id, layer_idx_end, input_layer_claim, lambda_next, mu)` back. +- **External buses** + - `TowerModuleBus.receive`: proof-shape message + `(proof_idx, chip_id, num_layers, num_read_specs, num_write_specs, num_logup_specs)` per enabled row. + - `TowerRootClaimBus.send`: returns `(chip_id, r0_claim, w0_claim, p0_claim, q0_claim)` to `ProofShapeAir`. + - `MainBus.send`: forwards the final input-layer claim with the final transcript index. + - `TranscriptBus`: sample the root batching/interpolation challenges and observe root claim data only when + `has_interactions`. The root input buses may carry sampled values into child AIRs as ordinary dataflow; only the + transcript bus fixes sample/observe chronology. + +## TowerLayerAir (`src/tower/layer/air.rs`) + +### Columns + +| Field | Shape | Description | +|------------------------------------|---------------|----------------------------------------------------------------------------| +| `is_enabled` | scalar | Row selector. | +| `proof_idx` | scalar | Proof counter shared with input AIR. | +| `chip_id` | scalar | VK-assigned chip id used to scope tower claim/sumcheck messages. | +| `is_first_tower` | scalar | First row flag for each `(proof_idx, chip_id)` tower proof. | +| `is_first` | scalar | Indicates the first layer row of a proof. | +| `is_dummy` | scalar | Marks padding rows that still satisfy constraints. | +| `layer_idx` | scalar | Layer number, enforced to start at 0 and increment per transition. | +| `tidx` | scalar | Transcript cursor at the start of the layer. | +| `lambda_next` | `[D_EF]` | Fresh/outgoing batching challenge for the next-layer claim. | +| `lambda_cur` | `[D_EF]` | Current sumcheck batching challenge (root layer pins it to `1`). | +| `mu` | `[D_EF]` | Reduction point sampled from transcript. | +| `sumcheck_claim_in` | `[D_EF]` | Combined claim passed to the layer sumcheck AIR. | +| `read_claim_next` | `[D_EF]` | Folded product contribution with respect to `lambda_next`. | +| `read_claim_cur` | `[D_EF]` | Folded product contribution with respect to `lambda_cur`. | +| `write_claim_next` | `[D_EF]` | Same as above for the write accumulator. | +| `write_claim_cur` | `[D_EF]` | Companion write claim with respect to `lambda_cur`. | +| `logup_claim_next` | `[D_EF]` | LogUp folded claim with respect to `lambda_next`. | +| `logup_claim_cur` | `[D_EF]` | LogUp folded claim with respect to `lambda_cur`. | +| `num_read_count` | scalar | Declared accumulator length for the read product AIR. | +| `num_write_count` | scalar | Declared accumulator length for the write product AIR. | +| `num_logup_count` | scalar | Declared accumulator length for the LogUp AIR. | +| `read_lambda_next_end` | `[D_EF]` | `lambda_next` power after the read product group. | +| `read_lambda_cur_end` | `[D_EF]` | `lambda_cur` power after the read product group. | +| `write_lambda_next_end` | `[D_EF]` | `lambda_next` power after the write product group. | +| `write_lambda_cur_end` | `[D_EF]` | `lambda_cur` power after the write product group. | +| `eq_at_r_prime` | `[D_EF]` | Product of eq evaluations returned from sumcheck. | +| `initial_tower_claim` | `[D_EF]` | Batched initial claim `C_1(r_1)` supplied by `TowerInputAir`. | + +### Row Constraints + +- **Looping**: Loop constraints enforce boolean enablement, padding-after-padding, and grouping by + `(proof_idx, chip_id)`. `is_first_tower` scopes the tower input bus handshake to the very first active row for that + chip tower proof, while `is_first` marks the first layer row. No additional local tower identifier is used; `chip_id` + is the VK-assigned protocol identity carried on tower proof messages and remains constant across all layer rows of the + tower proof. +- **Layer counter**: `layer_idx = 0` on the `is_first` row and increments by one on every transition flagged by the loop + helper. +- **`lambda_cur` propagation**: On the first active tower row, `lambda_cur` must equal `[1, 0, …, 0]`; on each transition + the next row’s `lambda_cur` is constrained to equal the previous row’s sampled `lambda_next`. This lets downstream AIRs + reuse the same logic for both initialization and continuing layers. +- **Initial claim**: When `is_first = 1`, the layer sumcheck starts from the supplied `initial_tower_claim = C_1(r_1)`. + The `*_claim_cur` values received from downstream AIRs are used to assemble `T_i(rho)`, not to recompute + `r0_claim`/`w0_claim`. +- **Inter-layer propagation**: `next.sumcheck_claim_in = read_claim_next + write_claim_next + logup_claim_next` on transitions. The + current-claim versions feed `sumcheck_claim_out = read_claim_cur + write_claim_cur + logup_claim_cur`, which is what + the sumcheck AIR receives. +- **Count consistency**: `num_read_count`, `num_write_count`, and `num_logup_count` are individually checked against the + shape metadata forwarded from `TowerInputAir`/`TowerModuleBus` for read product specs, write product specs, and LogUp + specs. +- **Dummy rows**: Dummy rows allow reusing the same AIR width even when no layer work is pending; constraints are + guarded by `is_not_dummy` to avoid accidentally constraining padding rows. +- **Transcript timing**: Same `tidx` arithmetic as before, but now the post-sumcheck transcript window must also cover + the sample/observe operations that the product/logup AIRs perform themselves. + +### Interactions + +- **Layer buses** + - `layer_input.receive`: only on the first non-dummy row; provides + `(chip_id, layer_tidx, num_layers, num_read_specs, num_write_specs, num_logup_specs, initial_tower_claim)`. + - `layer_output.send`: on the last non-dummy row; reports + `(chip_id, tidx_end, layer_idx_end, input_layer_claim, lambda_next, mu)` + back to `TowerInputAir` so the caller can record the transcript state for downstream verifiers. +- **Sumcheck buses** + - `sumcheck_input.send`: for non-root layers, dispatches + `(chip_id, layer_idx, is_last_layer, tidx + D_EF, claim)` to the sumcheck AIR. + - `sumcheck_output.receive`: ingests `(chip_id, layer_idx, claim_out, eq_at_r_prime)` and re-encodes them into local + columns. + - `sumcheck_challenge.send`: posts `(chip_id, layer_idx, round = 0, mu)` as round 0 for the next layer’s sumcheck. +- **Transcript bus** + - Samples `lambda_next` for non-root layers and `mu` for every active layer. + - Product and LogUp child-claim observations are owned by the product/LogUp claim AIRs, not by `TowerLayerAir`. +- **Prod/logup buses** + - Sends read product input with `prod_offset = 0`, start powers equal to one, and `num_prod_count = num_read_count`. + - Receives read product claims plus `read_lambda_next_end` and `read_lambda_cur_end`, then sends write product input with + `prod_offset = num_read_count`, start powers equal to the read end powers, and `num_prod_count = num_write_count`. + - Receives write product claims plus `write_lambda_next_end` and `write_lambda_cur_end`, then sends the LogUp input + with start powers equal to the write end powers and `num_logup_count`. + - Receives back both `lambda_next_claim` and `lambda_cur_claim` from each claim AIR. + - Root init buses are outside `TowerLayerAir`; this AIR receives the already assembled + `initial_tower_claim = C_1(r_1)` from `TowerInputAir`. + +## TowerProdSumCheckClaimAir (`src/tower/layer/prod_claim/air.rs`) + +### Columns + +There are two product claim AIR instances with the same contract: + +- `TowerProdReadSumCheckClaimAir` folds read product specs; +- `TowerProdWriteSumCheckClaimAir` folds write product specs. -For a product spec `j`: +The read variant uses: ```text -Prod_j^i(b) = Prod_j^{i+1}(b, 0) * Prod_j^{i+1}(b, 1) +prod_offset = 0 +num_prod_count = num_read_specs +``` + +The write variant uses: -Prod_j^i(r) = - sum_b eq(r, b) * Prod_j^{i+1}(b, 0) * Prod_j^{i+1}(b, 1) +```text +prod_offset = num_read_specs +num_prod_count = num_write_specs ``` -For a LogUp spec `k`, with numerator `P_k` and denominator `Q_k`, the relation is fraction addition: +The trace needs columns equivalent to: + +- loop metadata: `is_enabled`, `(proof_idx, chip_id)` scope, and optional `layer_idx`; +- a mode selector for `DeriveLayerClaims` versus `DeriveOutputClaim`; +- `claim_tidx` for transcript observation of the product claim tuple; +- `num_prod_count` and the active row counter inside the selected group; +- product tuple values `P_k(0)` and `P_k(1)`; +- layer-mode challenges and powers: `lambda_next`, `lambda_cur`, `mu`, `lambda_next_start`, `lambda_cur_start`, + running next/current powers, and end powers; +- layer-mode accumulators for `next_claim` and `eval_claim`; +- output-mode challenges and powers: `lambda_1`, `r_1`, `lambda_1_start`, and running `init_pow`; +- output-mode accumulators for `output_claim` and `initial_claim`. + +So if a chip has `num_read_specs = 4` and `num_write_specs = 6`, the read product claim AIR needs 4 active rows and the +write product claim AIR needs 6 active rows. Together they fold the 10 product specs. LogUp specs are folded separately by +`TowerLogupSumCheckClaimAir`. + +### Row Constraints + +The AIR supports two modes: ```text -P_k^i(b) / Q_k^i(b) = - P_k^{i+1}(b, 0) / Q_k^{i+1}(b, 0) - + P_k^{i+1}(b, 1) / Q_k^{i+1}(b, 1) +DeriveLayerClaims: + derive (eval_claim, next_claim) for one active tower layer + +DeriveOutputClaim: + derive the chip-level root output product claim plus the C_1 contribution ``` -Equivalently: +In both modes, each active row consumes one product spec tuple: + +```text +P_k(0), P_k(1) +``` + +For row `k` inside the read or write product group, define: ```text -P_k^i(b) = - P_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) - + P_k^{i+1}(b, 1) * Q_k^{i+1}(b, 0) +A0_k = P_k(0) +A1_k = P_k(1) -Q_k^i(b) = - Q_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) +A_mu_k = (1 - mu) * A0_k + mu * A1_k +A_r1_k = (1 - r_1) * A0_k + r_1 * A1_k +A_cross_k = A0_k * A1_k ``` -Product specs include both read and write specs. LogUp specs contribute two batched polynomials, `P_k` and `Q_k`. A -single transcript-derived batching challenge `alpha` is used for all specs. If there are `n_prod` product specs, the -flattened batching order is: +In `DeriveLayerClaims` mode, `A0_k = Prod_{prod_offset + k}^{i+1}(rho, 0)` and +`A1_k = Prod_{prod_offset + k}^{i+1}(rho, 1)`. In `DeriveOutputClaim` mode, they are the root out-eval pair for the +selected read or write product spec. + +#### DeriveLayerClaims Mode + +The next-layer product claim is: ```text -Prod_0, ..., Prod_{n_prod-1}, P_0, Q_0, P_1, Q_1, ... +prod_next_claim = + sum_k lambda_next^(prod_offset + k) * A_mu_k ``` -so the weights are consecutive powers of `alpha`. The batched parent claim is: +Equivalently, with `next_pow_0 = lambda_next_start = lambda_next^prod_offset`: ```text -C_i(r) = - sum_j alpha^j * Prod_j^i(r) - + sum_k alpha^{n_prod + 2k} * P_k^i(r) - + sum_k alpha^{n_prod + 2k + 1} * Q_k^i(r) +next_acc_{k+1} = next_acc_k + next_pow_k * A_mu_k +next_pow_{k+1} = next_pow_k * lambda_next ``` -Here `alpha^0 = 1`, matching `get_challenge_pows`. +The product expected-evaluation contribution is the product contribution to `T_i(rho)`: + +```text +prod_eval_claim = + sum_k lambda_cur^(prod_offset + k) * A_cross_k +``` -Substituting the layer relations gives the sumcheck target: +Equivalently, with `current_pow_0 = lambda_cur_start = lambda_cur^prod_offset`: ```text -C_i(r) = sum_b eq(r, b) * T_i(b) +eval_acc_{k+1} = eval_acc_k + current_pow_k * A_cross_k +current_pow_{k+1} = current_pow_k * lambda_cur +``` -T_i(b) = - sum_j alpha^j * Prod_j^{i+1}(b, 0) * Prod_j^{i+1}(b, 1) +The final active row sends: - + sum_k alpha^{n_prod + 2k} * ( - P_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) - + P_k^{i+1}(b, 1) * Q_k^{i+1}(b, 0) - ) +```text +TowerProdSumClaimMessage { + chip_id, + layer_idx, + lambda_next_claim = prod_next_claim, + lambda_cur_claim = prod_eval_claim, + lambda_next_end = lambda_next_start * lambda_next^num_prod_count, + lambda_cur_end = lambda_cur_start * lambda_cur^num_prod_count +} +``` - + sum_k alpha^{n_prod + 2k + 1} * ( - Q_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) - ) +`lambda_next_start` and `lambda_cur_start` must equal the batching powers at `prod_offset`: + +```text +lambda_next_start = lambda_next^prod_offset +lambda_cur_start = lambda_cur^prod_offset ``` -If the sumcheck samples point `rho`, its final claim is: +For the read product AIR, `prod_offset = 0` and both start powers are one. For the write product AIR, +`prod_offset = num_read_specs`, and both start powers must equal the end powers exported by the read product AIR. + +#### DeriveOutputClaim Mode + +The output product claim is: ```text -claim_out = eq(r, rho) * T_i(rho) +prod_output_claim = product_k A_cross_k ``` -where `eq(r, rho)` is accumulated round-by-round as: +Equivalently: ```text -eq_next = eq_cur * (xi * rho_i + (1 - xi) * (1 - rho_i)) +output_acc_0 = 1 +output_acc_{k+1} = output_acc_k * A_cross_k ``` -Verifier view for layer `i`: +The same rows also derive the product contribution to the initial tower claim: -1. Verify the sumcheck proof for the claim `C_i(r)`. The proof returns the point `rho` and a final evaluation. The child - layer claims at that point are: +```text +prod_initial_claim = + sum_k lambda_1^(prod_offset + k) * A_r1_k +``` - ```text - Prod_j^{i+1}(rho, 0), Prod_j^{i+1}(rho, 1) +Equivalently, with `init_pow_0 = lambda_1_start = lambda_1^prod_offset`: - P_k^{i+1}(rho, 0), P_k^{i+1}(rho, 1), - Q_k^{i+1}(rho, 0), Q_k^{i+1}(rho, 1) - ``` +```text +init_acc_0 = 0 +init_acc_{k+1} = init_acc_k + init_pow_k * A_r1_k +init_pow_{k+1} = init_pow_k * lambda_1 +``` - From those claims, compute: +For the read instance, `output_claim = r0_claim` and `lambda_1_start = 1`. For the write instance, +`output_claim = w0_claim` and `lambda_1_start = lambda_1^num_read_specs`. This mode has no `mu` interpolation and no +contribution to `T_i(rho)` or `C_{i+1}`; its `lambda_1` weights contribute only to `C_1(r_1)`. - ```text - T_i(rho) = - sum_j alpha^j * Prod_j^{i+1}(rho, 0) * Prod_j^{i+1}(rho, 1) +The final active row sends `output_claim = output_acc_end` and `initial_claim = init_acc_end`. - + sum_k alpha^{n_prod + 2k} * ( - P_k^{i+1}(rho, 0) * Q_k^{i+1}(rho, 1) - + P_k^{i+1}(rho, 1) * Q_k^{i+1}(rho, 0) - ) +The common loop constraints are: - + sum_k alpha^{n_prod + 2k + 1} * ( - Q_k^{i+1}(rho, 0) * Q_k^{i+1}(rho, 1) - ) - ``` +- One active row corresponds to one product spec. +- The mode bit must be constant across the group. +- `DeriveLayerClaims` mode is grouped by `(proof_idx, chip_id, layer_idx)` inside each read/write AIR instance. +- `DeriveOutputClaim` mode is grouped by `(proof_idx, chip_id)` inside each read/write AIR instance. +- `DeriveLayerClaims` mode initializes additive accumulators to zero and power accumulators from the input message. +- `DeriveOutputClaim` mode initializes the multiplicative output accumulator to one, initializes the additive + initial-claim accumulator to zero, and initializes `init_pow` from the root input message. +- In `DeriveOutputClaim` mode, each observed root out-eval pair must update both `output_acc` and `init_acc`; this binds + the root claim and initial claim to the same data. +- The mode selector must gate bus sends/receives so a row group produces exactly the messages for its selected mode. - The sumcheck final evaluation must equal `eq(r, rho) * T_i(rho)`. +### Interactions + +In `DeriveLayerClaims` mode, the AIR receives the layer input message on the first row: -2. If layer `i + 1` is not terminal, derive the next layer's expected sum after sampling `mu` and a fresh batching - challenge `alpha_next`: +```text +TowerProdLayerInputMessage { + chip_id, + layer_idx, + tidx, + lambda_next, // next-layer batching challenge + lambda_cur, // current-layer batching challenge + mu, + prod_offset, + lambda_next_start, + lambda_cur_start, + num_prod_count +} +``` - ```text - r_next = (rho, mu) +and sends on the final active row: - C_{i+1}(r_next) = - sum_j alpha_next^j * Prod_j^{i+1}(r_next) - + sum_k alpha_next^{n_prod + 2k} * P_k^{i+1}(r_next) - + sum_k alpha_next^{n_prod + 2k + 1} * Q_k^{i+1}(r_next) - ``` +```text +TowerProdSumClaimMessage { + chip_id, + layer_idx, + lambda_next_claim = prod_next_claim, + lambda_cur_claim = prod_eval_claim, + lambda_next_end = lambda_next_start * lambda_next^num_prod_count, + lambda_cur_end = lambda_cur_start * lambda_cur^num_prod_count +} +``` - Each carried claim is the multilinear interpolation at `mu`: +In `DeriveOutputClaim` mode, the AIR receives the root input bus message on the first row: - ```text - Prod_j^{i+1}(r_next) = - (1 - mu) * Prod_j^{i+1}(rho, 0) - + mu * Prod_j^{i+1}(rho, 1) - ``` +```text +Tower{Read,Write}RootInputBus { + chip_id, + claim_tidx, + lambda_1, + r_1, + lambda_1_start, + num_prod_count +} +``` - with the same interpolation for each LogUp `P_k` and `Q_k`. Specs that have no remaining reduction round do not - contribute to the next expected sum. +and sends both root-mode messages from the same final active row: -Both LogUp cross terms in `T_i` are part of the semantic statement. If an implementation splits or reuses accumulators, -the final sumcheck target must still include the `P0 * Q1 + P1 * Q0` numerator-cross contribution and the `Q0 * Q1` -denominator-cross contribution with their corresponding powers of `alpha`. +```text +Tower{Read,Write}RootBus { + chip_id, + output_claim +} + +Tower{Read,Write}InitBus { + chip_id, + initial_claim +} +``` -## TowerInputAir (`src/tower/input/air.rs`) +The AIR owns transcript observations for the active product child claims. It does not own the layer-level sumcheck check; +`TowerLayerAir` combines the returned product, write-product, and LogUp claims. + +## TowerLogUpSumCheckClaimAir (`src/tower/layer/logup_claim/air.rs`) ### Columns -| Field | Shape | Description | -|-----------------------|-----------------|----------------------------------------------------------------------| -| `is_enabled` | scalar | Row selector (0 = padding). | -| `proof_idx` | scalar | Outer proof loop index enforced by nested sub-AIRs. | -| `idx` | scalar | Inner loop index enumerating AIR instances within a proof. | -| `n_layer` | scalar | Number of active GKR layers for the proof. | -| `is_n_layer_zero` | scalar | Flag for `n_layer == 0` (drives “no interaction” branches). | -| `is_n_layer_zero_aux` | `IsZeroAuxCols` | Witness used by `IsZeroSubAir` to enforce the zero test. | -| `tidx` | scalar | Transcript cursor at start of the proof. | -| `r0_claim` | `[D_EF]` | Root numerator commitment supplied to `TowerLayerAir`. | -| `w0_claim` | `[D_EF]` | Root witness commitment supplied to `TowerLayerAir`. | -| `q0_claim` | `[D_EF]` | Root denominator commitment supplied to `TowerLayerAir`. | -| `alpha_logup` | `[D_EF]` | Transcript challenge sampled before passing inputs to GKR layers. | -| `input_layer_claim` | `[D_EF]` | Folded claim returned from `TowerLayerAir`. | -| `layer_output_lambda` | `[D_EF]` | Batching challenge sampled in the final GKR layer (zeros if unused). | -| `layer_output_mu` | `[D_EF]` | Reduction point sampled in the final GKR layer (zeros if unused). | +This AIR folds only the LogUp specs. Product specs are folded by the read/write product claim AIRs. If a chip has +`num_prod_specs = 10` and `num_logup_specs = 20`, this AIR needs 20 active accumulator rows for that layer; the 10 +product specs are handled by the read/write product claim AIRs. + +The LogUp batching offset is: + +```text +logup_offset = num_read_specs + num_write_specs = num_prod_specs +``` + +The trace needs columns equivalent to: + +- loop metadata: `is_enabled`, `(proof_idx, chip_id)` scope, and optional `layer_idx`; +- a mode selector for `DeriveLayerClaims` versus `DeriveOutputClaim`; +- `claim_tidx` for transcript observation of the LogUp tuple; +- `num_logup_count` and the active row counter inside the selected group; +- LogUp tuple values `P_k(0)`, `P_k(1)`, `Q_k(0)`, and `Q_k(1)`; +- layer-mode challenges and powers: `lambda_next`, `lambda_cur`, `mu`, `lambda_next_start`, `lambda_cur_start`, and + running next/current powers; +- layer-mode accumulators for `next_claim` and `eval_claim`; +- output-mode challenges and powers: `lambda_1`, `r_1`, `lambda_1_start`, and running `init_pow`; +- output-mode fraction accumulators for `(p0_claim, q0_claim)` and an accumulator for `logup_initial_claim`. ### Row Constraints -- **Enablement / indexing**: A `NestedForLoopSubAir<2>` enforces boolean `is_enabled`, padding-after-padding, and - consecutive `(proof_idx, idx)` pairs for enabled rows. -- **Zero test**: `IsZeroSubAir` checks `n_logup` against `is_n_logup_zero`, unlocking the “no interaction” path. -- **Input layer defaults**: When `n_logup == 0`, the input-layer claim must be `[0, α]` (numerator zero, denominator - equals `alpha_logup`). -- **Transcript math**: Local expressions derive the transcript offsets for alpha sampling, per-layer reductions, and the - xi-sampling window directly from `n_layer`. No auxiliary `n_max` adjustment is needed. +The AIR supports two modes: -### Interactions +```text +DeriveLayerClaims: + derive (eval_claim, next_claim) for one active tower layer -- **Internal buses** - - `TowerLayerInputBus.send`: emits `(idx, tidx skip roots, r0/w0/q0_claim)` when interactions exist. - - `TowerLayerOutputBus.receive`: pulls reduced `(idx, layer_idx_end, input_layer_claim, lambda, mu)` back. -- **External buses** - - `TowerModuleBus.receive`: initial module message `(idx, tidx, n_logup)` per enabled row. - - `BatchConstraintModuleBus.send`: forwards the final input-layer claim with the final transcript index. - - `TranscriptBus`: sample `alpha_logup` and observe `q0_claim` only when `has_interactions`. +DeriveOutputClaim: + derive the chip-level root LogUp fractional pair (p0_claim, q0_claim) plus the C_1 contribution +``` -### Notes +In both modes, each active row consumes one LogUp spec tuple: -- Local booleans `has_interactions` gate all downstream activity, so future refactors must keep those semantics aligned - with the code branches. +```text +P_k(0), P_k(1), +Q_k(0), Q_k(1) +``` -## TowerLayerAir (`src/tower/layer/air.rs`) +For row `k`, define: -### Columns +```text +P0_k = P_k(0) +P1_k = P_k(1) +Q0_k = Q_k(0) +Q1_k = Q_k(1) -| Field | Shape | Description | -|------------------------------------|---------------|----------------------------------------------------------------------------| -| `is_enabled` | scalar | Row selector. | -| `proof_idx` | scalar | Proof counter shared with input AIR. | -| `idx` | scalar | AIR index within the proof (matches the input AIR). | -| `is_first_air_idx` | scalar | First row flag for each `(proof_idx, idx)` block. | -| `is_first` | scalar | Indicates the first layer row of a proof. | -| `is_dummy` | scalar | Marks padding rows that still satisfy constraints. | -| `layer_idx` | scalar | Layer number, enforced to start at 0 and increment per transition. | -| `tidx` | scalar | Transcript cursor at the start of the layer. | -| `lambda` | `[D_EF]` | Fresh batching challenge sampled for non-root layers. | -| `lambda_prime` | `[D_EF]` | Challenge inherited from the previous layer (root layer pins it to `1`). | -| `mu` | `[D_EF]` | Reduction point sampled from transcript. | -| `sumcheck_claim_in` | `[D_EF]` | Combined claim passed to the layer sumcheck AIR. | -| `read_claim` | `[D_EF]` | Folded product contribution with respect to `lambda`. | -| `read_claim_prime` | `[D_EF]` | Companion folded claim with respect to `lambda_prime` (root = r₀). | -| `write_claim` | `[D_EF]` | Same as above for the write accumulator. | -| `write_claim_prime` | `[D_EF]` | Companion write claim. | -| `logup_claim` | `[D_EF]` | LogUp folded claim w.r.t. `lambda`. | -| `logup_claim_prime` | `[D_EF]` | LogUp folded claim w.r.t. `lambda_prime` (root = q₀). | -| `num_read_count` | scalar | Declared accumulator length for the read prod AIR (must equal `n_logup`). | -| `num_write_count` | scalar | Declared accumulator length for the write prod AIR (must equal `n_logup`). | -| `num_logup_count` | scalar | Declared accumulator length for the logup AIR (must equal `n_logup`). | -| `eq_at_r_prime` | `[D_EF]` | Product of eq evaluations returned from sumcheck. | -| `r0_claim`, `w0_claim`, `q0_claim` | `[D_EF]` each | Root evaluations supplied by `TowerInputAir`. | +P_mu_k = (1 - mu) * P0_k + mu * P1_k +Q_mu_k = (1 - mu) * Q0_k + mu * Q1_k -### Row Constraints +P_r1_k = (1 - r_1) * P0_k + r_1 * P1_k +Q_r1_k = (1 - r_1) * Q0_k + r_1 * Q1_k -- **Looping**: `NestedForLoopSubAir<2>` continues to enforce boolean enablement, padding-after-padding, and - lexicographic ordering for `(proof_idx, idx)`. `is_first_air_idx` scopes the per-proof input bus handshake to the very - first active row, while `is_first` marks the first layer row. -- **Layer counter**: `layer_idx = 0` on the `is_first` row and increments by one on every transition flagged by the loop - helper. -- **`lambda_prime` propagation**: On the root row, `lambda_prime` must equal `[1, 0, …, 0]`; on each transition the next - row’s `lambda_prime` is constrained to equal the previous row’s sampled `lambda`. This lets downstream AIRs reuse the - same logic for both initialization and continuing layers. -- **Root comparisons**: When `is_first = 1`, the `_prime` claims received from downstream AIRs must match the supplied - `r0_claim`, `w0_claim`, `q0_claim`. This replaces the old local interpolation logic. -- **Inter-layer propagation**: `next.sumcheck_claim_in = read_claim + write_claim + logup_claim` on transitions. The - `_prime` versions feed `sumcheck_claim_out = read_claim_prime + write_claim_prime + logup_claim_prime`, which is what - the sumcheck AIR receives. -- **Count consistency**: `num_read_count`, `num_write_count`, and `num_logup_count` are all constrained to equal - `n_logup`, and each is individually range-checked against ProofShape metadata via `AirShapeBus`. -- **Transcript timing**: Same `tidx` arithmetic as before, but now the post-sumcheck transcript window must also cover - the sample/observe operations that the product/logup AIRs perform themselves. +P_cross_k = P0_k * Q1_k + P1_k * Q0_k +Q_cross_k = Q0_k * Q1_k +``` -### Interactions +In `DeriveLayerClaims` mode, these are the child claims at `(rho, 0)` and `(rho, 1)`. In `DeriveOutputClaim` mode, +they are the root LogUp out-eval tuple. -- **Layer buses** - - `layer_input.receive`: only on the first non-dummy row; provides `(idx, tidx, r0/w0/q0_claim)`. - - `layer_output.send`: on the last non-dummy row; reports `(idx, tidx_end, layer_idx_end, folded claim, lambda, mu)` - back to `TowerInputAir` so the caller can record the transcript state for downstream verifiers. -- **Sumcheck buses** - - `sumcheck_input.send`: for non-root layers, dispatches `(layer_idx, is_last_layer, tidx + D_EF, claim)` to the - sumcheck AIR. - - `sumcheck_output.receive`: ingests `(claim_out, eq_at_r_prime)` and re-encodes them into local columns. - - `sumcheck_challenge.send`: posts the `mu` challenge as round 0 for the next layer’s sumcheck. -- **Transcript bus** - - Samples `lambda` (non-root) and `mu`, observes all `p/q` evaluations. -- **Xi randomness bus** - - On the proof’s final layer, sends `mu` as the shared xi challenge consumed by later modules. -- **Prod/logup buses** - - Sends `(idx, layer_idx, tidx, lambda, lambda_prime, mu)` to the read/write prod AIRs every row (dummy rows are - masked out). Receives back both `lambda_claim` and `lambda_prime_claim` along with `num_read_count` / - `num_write_count`. - - Sends the same challenge payload to the logup AIR and receives its dual claims plus `num_logup_count`. - - No separate “init” buses exist anymore; setting `lambda_prime = 1` on the root row instructs the sub-AIRs to act - as - the initialization accumulators whose outputs are compared directly against `r0/w0/q0`. +#### DeriveLayerClaims Mode -### Notes +The next-layer LogUp claim is: -- Dummy rows allow reusing the same AIR width even when no layer work is pending; constraints are guarded by - `is_not_dummy` to avoid accidentally constraining padding rows. -- The transcript math (5·`D_EF` per layer after sumcheck) must stay synchronized with `TowerInputAir`’s tidx - bookkeeping. +```text +logup_next_claim = + sum_k lambda_next^(logup_offset + 2k) * P_mu_k + + sum_k lambda_next^(logup_offset + 2k + 1) * Q_mu_k +``` -## TowerProdSumCheckClaimAir (`src/tower/layer/prod_claim/air.rs`) +Equivalently, with `next_pow_0 = lambda_next_start = lambda_next^logup_offset`: -### Columns & Loops - -- `NestedForLoopSubAir<2>` enumerates `(proof_idx, idx)` and treats `layer_idx` as an inner counter controlled by - `is_first_layer`; within each `(proof_idx, idx, layer_idx)` triple an `index_id` column counts accumulator rows. -- Columns include: - - Loop/indexing flags (`is_enabled`, `is_first_layer`, `is_first`, `is_dummy`, `index_id`, `num_read_count`, - `num_write_count`). - - Metadata observed from `TowerLayerAir`: `layer_idx`, `tidx`, challenges `lambda`, `lambda_prime`, `mu`. - - Transcript observations: `p_xi_0`, `p_xi_1`, interpolated `p_xi`. - - Dual running powers/sums: `(pow_lambda, acc_sum)` for the standard sumcheck, `(pow_lambda_prime, acc_sum_prime)` - for - the root-compatible accumulator. - -### Constraints - -- Clamp `index_id` to zero on the first row of every layer triple, increment it while `stay_in_layer = 1`, and enforce - `index_id + 1 = num_read_count` / `num_write_count` on the rows that send results. -- Recompute `p_xi` via the usual linear interpolation in `mu`. -- Update both accumulators: - - `acc_sum_next = acc_sum + p_xi * pow_lambda`, with `pow_lambda_next = pow_lambda * lambda`. - - `acc_sum_prime_next = acc_sum_prime + (p_xi_0 * p_xi_1) * pow_lambda_prime`, - `pow_lambda_prime_next = pow_lambda_prime * lambda_prime`. -- The root-layer behavior falls out automatically: when `lambda_prime = 1`, the `_prime` accumulator simply sums - pairwise products, so the final row exports `r0`/`w0`-style claims. +```text +next_acc_{k+1} = next_acc_k + next_pow_k * (P_mu_k + lambda_next * Q_mu_k) +next_pow_{k+1} = next_pow_k * lambda_next^2 +``` -### Interactions +The LogUp expected-evaluation contribution is the LogUp contribution to `T_i(rho)`: -- First row per layer triple receives - `TowerProdLayerChallengeMessage { idx, layer_idx, tidx, lambda, lambda_prime, mu }`. -- Final row sends `TowerProdSumClaimMessage { lambda_claim = acc_sum, lambda_prime_claim = acc_sum_prime }` alongside - the - appropriate `num_*_count`. Read/write variants simply use different buses. +```text +logup_eval_claim = + sum_k lambda_cur^(logup_offset + 2k) * P_cross_k + + sum_k lambda_cur^(logup_offset + 2k + 1) * Q_cross_k +``` -## TowerLogUpSumCheckClaimAir (`src/tower/layer/logup_claim/air.rs`) +Equivalently, with `current_pow_0 = lambda_cur_start = lambda_cur^logup_offset`: -### Columns & Loops - -- Shares the same `(proof_idx, idx)` outer loop and `index_id` accumulator counter as the product AIR. -- Columns: - - Loop metadata plus `num_logup_count`. - - Transcript data `p_xi_0`, `p_xi_1`, `q_xi_0`, `q_xi_1`, interpolated `p_xi`, `q_xi`. - - Challenges `lambda`, `lambda_prime`, `mu`. - - Running powers `pow_lambda`, `pow_lambda_prime`. - - Accumulators: `acc_sum` for the standard `(p_xi + lambda * q_xi)` contribution, `acc_p_cross`, `acc_q_cross` for - the - log-up initialization terms that previously lived in their own AIR. - -### Constraints - -- Recompute `p_xi`, `q_xi` every row, then derive the cross terms - `p_cross = p_xi_0 * q_xi_1 + p_xi_1 * q_xi_0`, `q_cross = q_xi_0 * q_xi_1`. -- Accumulators: - - `acc_sum_next = acc_sum + pow_lambda * (p_xi + lambda * q_xi)`. - - `acc_p_cross_next = acc_p_cross + pow_lambda_prime * p_cross`. - - `acc_q_cross_next = acc_q_cross + pow_lambda_prime * lambda_prime * q_cross`. - Root-layer behavior again follows from `lambda_prime = 1`. -- `pow_lambda` and `pow_lambda_prime` follow the same multiplicative recurrence as in the product AIR. -- `index_id` bookkeeping and “final row sends” conditions mirror the product AIR. +```text +eval_acc_{k+1} = eval_acc_k + current_pow_k * (P_cross_k + lambda_cur * Q_cross_k) +current_pow_{k+1} = current_pow_k * lambda_cur^2 +``` -### Interactions +`lambda_next_start` and `lambda_cur_start` must equal the product write end powers: -- Receives the layer challenge message with both `lambda` and `lambda_prime` on the first row. -- Final row sends `TowerLogupClaimMessage { lambda_claim = acc_sum, lambda_prime_claim = acc_q_cross }` plus - `num_logup_count`. (The `acc_p_cross` value remains internal because only the denominator-style accumulator is needed - upstream at the moment.) +```text +lambda_next_start = lambda_next^logup_offset +lambda_cur_start = lambda_cur^logup_offset +``` -## TowerLogUpSumCheckClaimAir (`src/tower/layer/logup_claim/air.rs`) +Each LogUp row consumes two consecutive batching powers: + +```text +P_k next weight = lambda_next^(logup_offset + 2k) +Q_k next weight = lambda_next^(logup_offset + 2k + 1) +P_k current weight = lambda_cur^(logup_offset + 2k) +Q_k current weight = lambda_cur^(logup_offset + 2k + 1) +``` -### Columns & Loops +The final active row sends: -- Shares the `(proof_idx, idx, layer_idx)` nested-loop structure and reuses `index_id` to count accumulator rows. -- Columns mirror the product AIR plus the denominator evaluations: `is_enabled`, the loop counters/flags, - `(p_xi_0, p_xi_1, q_xi_0, q_xi_1)`, interpolated `(p_xi, q_xi)`, `lambda`, `mu`, `pow_lambda`, `acc_sum`, - `index_id`, and `num_logup_count`. +```text +TowerLogupClaimMessage { + chip_id, + layer_idx, + lambda_next_claim = logup_next_claim, + lambda_cur_claim = logup_eval_claim +} +``` -### Constraints +#### DeriveOutputClaim Mode -- Recomputes both `p_xi` and `q_xi` in every row. -- Uses the existing log-up contribution `acc_sum_next = acc_sum + (lambda * q_xi) * pow_lambda`. -- `index_id` obeys the same initialization/increment/final-row checks against `num_logup_count` as the product AIR. -- Only the final accumulator row per `(proof_idx, idx, layer_idx)` may drive `TowerLogupClaimBus`. +The output LogUp fractional pair folds by fraction addition: + +```text +p_acc_0 = 0 +q_acc_0 = 1 + +p_acc_{k+1} = p_acc_k * Q_cross_k + P_cross_k * q_acc_k +q_acc_{k+1} = q_acc_k * Q_cross_k +``` + +The same rows also derive the LogUp contribution to the initial tower claim: + +```text +logup_initial_claim = + sum_k lambda_1^(logup_offset + 2k) * P_r1_k + + sum_k lambda_1^(logup_offset + 2k + 1) * Q_r1_k +``` + +Equivalently, with `init_pow_0 = lambda_1_start = lambda_1^logup_offset`: + +```text +init_acc_0 = 0 +init_acc_{k+1} = init_acc_k + init_pow_k * (P_r1_k + lambda_1 * Q_r1_k) +init_pow_{k+1} = init_pow_k * lambda_1^2 +``` + +The final active row sends: + +```text +p0_claim = p_acc_end +q0_claim = q_acc_end +logup_initial_claim = init_acc_end +``` + +The common loop constraints are: + +- One active row corresponds to one LogUp spec. +- The mode bit must be constant across the group. +- `DeriveLayerClaims` mode is grouped by `(proof_idx, chip_id, layer_idx)`. +- `DeriveOutputClaim` mode is grouped by `(proof_idx, chip_id)`. +- In `DeriveLayerClaims` mode, recompute `P_mu_k` and `Q_mu_k` every row using interpolation at `mu`. +- Recompute `P_cross_k` and `Q_cross_k` every row. +- In `DeriveLayerClaims` mode, update the next-claim accumulator using `lambda_next`. +- In `DeriveLayerClaims` mode, update the eval-claim accumulator using `lambda_cur`. +- In `DeriveLayerClaims` mode, advance LogUp powers by the corresponding batching challenge squared. +- In `DeriveOutputClaim` mode, update the fraction-addition output accumulators and the initial-claim accumulator. +- The mode selector must gate bus sends/receives so a row group produces exactly the messages for its selected mode. ### Interactions -- Layer metadata is consumed on the row flagged by `is_first_layer`. -- Folded logup claim is emitted exactly once per triple when the accumulator row counter reaches `num_logup_count`. +In `DeriveLayerClaims` mode, the AIR receives the layer input message on the first row: + +```text +TowerLogupLayerInputMessage { + chip_id, + layer_idx, + tidx, + lambda_next, // next-layer batching challenge + lambda_cur, // current-layer batching challenge + mu, + lambda_next_start, + lambda_cur_start, + num_logup_count +} +``` + +and sends on the final active row: + +```text +TowerLogupClaimMessage { + chip_id, + layer_idx, + lambda_next_claim = logup_next_claim, + lambda_cur_claim = logup_eval_claim +} +``` + +In `DeriveOutputClaim` mode, the AIR receives the root input message on the first row: + +```text +TowerLogupRootInputBus { + chip_id, + claim_tidx, + lambda_1, + r_1, + lambda_1_start, + num_logup_count +} +``` + +and sends both root-mode messages from the same final active row: + +```text +TowerLogupRootBus { + chip_id, + p0_claim, + q0_claim +} + +TowerLogupInitBus { + chip_id, + logup_initial_claim +} +``` + +The AIR owns transcript observations for active LogUp child claims. It does not own the layer-level sumcheck check; +`TowerLayerAir` combines the returned read-product, write-product, and LogUp claims. ## TowerLayerSumcheckAir (`src/tower/sumcheck/air.rs`) @@ -369,16 +660,16 @@ denominator-cross contribution with their corresponding powers of `alpha`. | Field | Shape | Description | |-------------------------------|----------|-------------------------------------------------------------| -| `is_enabled` | scalar | Row selector. -| `proof_idx` | scalar | Proof counter. -| `idx` | scalar | Module index within the proof (mirrors `TowerLayerAir`). -| `layer_idx` | scalar | Layer whose sumcheck is being executed. -| `is_first_idx` | scalar | First sumcheck row for the current `(proof_idx, idx)` pair. | -| `is_first_layer` | scalar | First round row for the current layer. -| `is_first_round` | scalar | First round inside the layer. -| `is_dummy` | scalar | Padding flag. -| `is_last_layer` | scalar | Whether this layer is the final GKR layer. -| `round` | scalar | Sub-round index within the layer (0 .. layer_idx-1). +| `is_enabled` | scalar | Row selector. | +| `proof_idx` | scalar | Proof counter. | +| `chip_id` | scalar | VK-assigned chip id used on sumcheck buses. | +| `layer_idx` | scalar | Layer whose sumcheck is being executed. | +| `is_first_tower` | scalar | First sumcheck row for the current `(proof_idx, chip_id)`. | +| `is_first_layer` | scalar | First round row for the current layer. | +| `is_first_round` | scalar | First round inside the layer. | +| `is_dummy` | scalar | Padding flag. | +| `is_last_layer` | scalar | Whether this layer is the final GKR layer. | +| `round` | scalar | Sub-round index within the layer (0 .. layer_idx-1). | | `tidx` | scalar | Transcript cursor before reading evaluations. | `ev1`, `ev2`, `ev3` | `[D_EF]` | Polynomial evaluations at points 1,2,3 (point 0 inferred). | `claim_in`, `claim_out` | `[D_EF]` | Incoming/outgoing claims for each round. @@ -387,26 +678,26 @@ denominator-cross contribution with their corresponding powers of `alpha`. ### Row Constraints -- **Looping**: `NestedForLoopSubAir<3>` now iterates over `(proof_idx, idx, layer_idx)` with the sumcheck round serving - as the innermost loop. The `is_first_idx` flag gates reset logic when we advance to a new module instance, while - `is_first_layer` protects the per-layer bookkeeping just before the round loop begins. +- **Looping**: Loop constraints iterate over `(proof_idx, chip_id, layer_idx)` with the sumcheck round serving as the + innermost loop. The `is_first_tower` flag gates reset logic when we advance to a new chip tower proof, while + `is_first_layer` protects the per-layer bookkeeping just before the round loop begins. No additional local tower + identifier is used. - **Round counter**: `round` starts at 0 and increments each transition; final round enforces `round = layer_idx - 1`. - **Eq accumulator**: `eq_in = 1` on the first round; `eq_out = update_eq(eq_in, prev_challenge, challenge)` and propagates forward. - **Claim flow**: `claim_out` computed via `interpolate_cubic_at_0123` using `(claim_in - ev1)` as `ev0`; `next.claim_in = claim_out` across transitions. - **Transcript timing**: Each transition bumps `next.tidx = tidx + 4·D_EF` (three observations + challenge sample). +- **Dummy rows**: Dummy rows short-circuit all bus traffic; guard send/receive calls with `is_not_dummy`. +- **Arity assumption**: The layout assumes cubic polynomials (degree 3) and would need updates if the sumcheck arity + changes. ### Interactions -- `sumcheck_input.receive`: first non-dummy round pulls `(layer_idx, is_last_layer, tidx, claim)` from `TowerLayerAir`. -- `sumcheck_output.send`: last non-dummy round returns `(claim_out, eq_at_r_prime)` to the layer AIR. +- `sumcheck_input.receive`: first non-dummy round pulls `(chip_id, layer_idx, is_last_layer, tidx, claim)` from + `TowerLayerAir`. +- `sumcheck_output.send`: last non-dummy round returns `(chip_id, layer_idx, claim_out, eq_at_r_prime)` to the layer AIR. - `sumcheck_challenge.receive/send`: enforces challenge chaining between layers/rounds (`prev_challenge` from prior layer, `challenge` published for the next layer or eq export). -- All three buses now include the `idx` field so messages disambiguate distinct module instances inside the same proof. +- All three tower sumcheck buses include `chip_id` so messages disambiguate chip tower proofs inside the same proof. - `transcript_bus.observe_ext`: records `ev1/ev2/ev3`, followed by `sample_ext` of `challenge`. - -### Notes - -- Dummy rows short-circuit all bus traffic; guard send/receive calls with `is_not_dummy`. -- The layout assumes cubic polynomials (degree 3) and would need updates if the sumcheck arity changes. diff --git a/ceno_recursion_v2/docs/tower_module_design.md b/ceno_recursion_v2/docs/tower_module_design.md index 78927a5b0..afadce84a 100644 --- a/ceno_recursion_v2/docs/tower_module_design.md +++ b/ceno_recursion_v2/docs/tower_module_design.md @@ -9,6 +9,24 @@ recursively verified: for each `(proof_idx, chip_id)`, it replays the native Cen proof. Transcript order, shape metadata, batching challenges, and exported claims must stay in lockstep with `ceno_zkvm::scheme::verifier::TowerVerify`. +## Table Of Contents + +- [Scope](#scope) +- [Names And Scopes](#names-and-scopes) +- [Protocol Math](#protocol-math) +- [Module Interaction Diagram](#module-interaction-diagram) +- [External Module Boundaries](#external-module-boundaries) +- [Internal Tower Structure](#internal-tower-structure) +- [AIR Contract Priority](#air-contract-priority) +- [AIR Interaction Sequence](#air-interaction-sequence) +- [Per-AIR Contracts](#per-air-contracts) +- [Bus Summary](#bus-summary) +- [Layer Verification Contract](#layer-verification-contract) +- [Edge Cases / Zero Work Behavior](#edge-cases--zero-work-behavior) +- [Debugging Guide](#debugging-guide) +- [Design Invariants](#design-invariants) +- [Open Design Questions](#open-design-questions) + ## Scope The tower module reduces each `(proof_idx, chip_id)` tower proof to an input-layer claim. It does not own downstream @@ -23,27 +41,229 @@ as implementation context. Use these names consistently in diagrams, contracts, and AIR specs. -| Name | Meaning | Scope / Owner | -|------|---------|---------------| -| `proof_idx` | Logical index of the Ceno proof being recursively verified. | Scopes all per-proof buses; may be represented by the typed bus key instead of a payload field. | -| `chip_id` | VK-assigned unique id for one chip. | Selected by proof-shape; together with `proof_idx`, uniquely identifies one tower proof. | -| `layer_idx` | Active tower layer currently being reduced. | Local to one `(proof_idx, chip_id)` tower proof. | -| `tidx` | Transcript cursor at which an observe/sample event occurs. | Per proof; tower AIRs must match the native verifier transcript order. | -| `num_layers` | Number of active tower layers for this chip tower proof. | VK/proof-shape metadata consumed by `TowerInputAir` and `TowerLayerAir`. | -| `num_read_specs` | Number of read product specs for this chip. | VK-derived; forwarded to read product claim folding. | -| `num_write_specs` | Number of write product specs for this chip. | VK-derived; forwarded to write product claim folding. | -| `num_logup_specs` | Number of LogUp specs for this chip. | VK-derived; forwarded to LogUp claim folding. | -| `num_prod_specs` | `num_read_specs + num_write_specs`. | Derived from VK metadata; controls product batching width. | -| `alpha` / `lambda` | Batching challenge for product and LogUp specs. The math spec uses `alpha`; current layer AIRs often use `lambda` for the active challenge. | Fresh per layer; powers are reused across specs only within that layer. | -| `lambda_prime` | Previous layer batching challenge used by claim AIRs for the companion/current claim path. Root layer pins it to one. | Propagated by `TowerLayerAir`. | -| `rho` | Sumcheck evaluation point produced by the layer sumcheck rounds. | Produced by `TowerLayerSumcheckAir` for one layer. | -| `r_i` | Current layer point in the expected claim `C_i(r_i)`. | Carried by `TowerLayerAir` from one layer to the next. | -| `mu` | Merge/interpolation challenge for deriving the next layer point and expected claim. | Sampled by `TowerLayerAir`; used as the next layer's first challenge when another layer remains. | -| `T_i(rho)` | Current-layer expression computed from child claims at `rho`. | Derived by `TowerLayerAir` from read/write/LogUp `current_claim` outputs. | -| `C_{i+1}(rho, mu)` | Next layer's expected batched claim after interpolation at `mu`. | Derived by `TowerLayerAir` from read/write/LogUp `next_claim` outputs when another layer remains. | -| `current_claim` | A claim AIR's contribution to `T_i(rho)`. | Emitted by product/LogUp claim AIRs; corresponds to current Rust `lambda_prime_claim`. | -| `next_claim` | A claim AIR's contribution to `C_{i+1}(rho, mu)`. | Emitted by product/LogUp claim AIRs; corresponds to current Rust `lambda_claim`. | -| `input_layer_claim` | Final reduced claim after the terminal/input layer. | Emitted by tower to `MainBus`; downstream modules interpret its constraint meaning. | +| Name | Meaning | +|------|---------| +| `proof_idx` | Logical index of the Ceno proof being recursively verified; scopes all per-proof buses. | +| `chip_id` | VK-assigned unique id for one chip; together with `proof_idx`, uniquely identifies one tower proof. | +| `layer_idx` | Active tower layer currently being reduced inside one `(proof_idx, chip_id)` tower proof. | +| `tidx` | Per-proof transcript cursor at which an observe/sample event occurs. | +| `num_layers` | VK/proof-shape-derived number of active tower layers for this chip tower proof. | +| `num_read_specs` | VK-derived number of read product specs for this chip. | +| `num_write_specs` | VK-derived number of write product specs for this chip. | +| `num_logup_specs` | VK-derived number of LogUp specs for this chip. | +| `num_prod_specs` | `num_read_specs + num_write_specs`; controls the product batching width. | +| `r0_claim` | Product of read root out-eval pairs for one chip tower proof: `product_k r_out_evals[k][0] * r_out_evals[k][1]`. | +| `w0_claim` | Product of write root out-eval pairs for one chip tower proof: `product_k w_out_evals[k][0] * w_out_evals[k][1]`. | +| `p0_claim` | Root LogUp numerator claim for one chip tower proof. | +| `q0_claim` | Root LogUp denominator claim for one chip tower proof. | +| `C_0([])` | Zero-variable root/output claim. It is not the initial tower sumcheck claim. | +| `initial_tower_claim` | Batched initial tower claim `C_1(r_1)` derived from root out-evals. | +| `lambda_cur` | Batching challenge for the current layer's sumcheck expected-evaluation body `T_i(rho)`. | +| `lambda_next` | Batching challenge for the next layer, used in the outgoing expected claim `C_{i+1}(rho, mu)`. | +| `rho` | Sumcheck evaluation point produced by the layer sumcheck rounds. | +| `r_i` | Current layer point in the current claim `C_i(r_i)`. | +| `mu` | Merge/interpolation challenge for deriving the next layer point and expected claim. | +| `T_i(rho)` | Current layer sumcheck expected-evaluation body computed from child claims at `rho`, before multiplying by `eq(r_i, rho)`. | +| `C_{i+1}(rho, mu)` | Next layer's expected batched claim after interpolation at `mu`. | +| `eval_claim` | A claim AIR's contribution to `T_i(rho)`. | +| `next_claim` | A claim AIR's contribution to `C_{i+1}(rho, mu)`. | +| `DeriveLayerClaims` | Claim AIR mode that outputs `(eval_claim, next_claim)` for a tower layer. | +| `DeriveOutputClaim` | Claim AIR mode that outputs chip-level root claims and `C_1(r_1)` contributions from root out-evals. | +| `input_layer_claim` | Final reduced claim after the terminal/input layer, emitted by tower to `MainBus`. | + +## Protocol Math + +This section is the semantic source of truth for the tower layer reduction. The AIR-specific sections in +`tower_air_spec.md` describe how the trace and buses realize these identities; protocol changes must preserve this math. + +Layer `0` is the zero-variable root/output layer. It is not the initial tower sumcheck claim. The verifier first samples +`r_1` and derives the initial tower claim `C_1(r_1)` from `r_out_evals`, `w_out_evals`, and `lk_out_evals`. Separately, +the same root out-evals are also folded into +`r0_claim`, `w0_claim`, `p0_claim`, and `q0_claim` for proof-shape's cross-chip checks. + +For an active tower sumcheck layer `i >= 1`, `C_i(r_i)` is the current claim passed into the sumcheck. For binary fan-in, +a child point is written as `(b, t)` with `t in {0, 1}`. The sumcheck proves the value at `r_i` by summing over Boolean +`b` with the multilinear equality polynomial `eq(r_i, b)`. + +For a product spec `j`: + +```text +Prod_j^i(b) = Prod_j^{i+1}(b, 0) * Prod_j^{i+1}(b, 1) + +Prod_j^i(r) = + sum_b eq(r, b) * Prod_j^{i+1}(b, 0) * Prod_j^{i+1}(b, 1) +``` + +For a LogUp spec `k`, with numerator `P_k` and denominator `Q_k`, the relation is fraction addition: + +```text +P_k^i(b) / Q_k^i(b) = + P_k^{i+1}(b, 0) / Q_k^{i+1}(b, 0) + + P_k^{i+1}(b, 1) / Q_k^{i+1}(b, 1) +``` + +Equivalently: + +```text +P_k^i(b) = + P_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) + + P_k^{i+1}(b, 1) * Q_k^{i+1}(b, 0) + +Q_k^i(b) = + Q_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) +``` + +Product specs include both read and write specs. LogUp specs contribute two batched polynomials, `P_k` and `Q_k`. For +layer `i`, `lambda_cur = lambda_i` is the batching challenge for the current sumcheck expected-evaluation body +`T_i(rho)`, and `lambda_next = lambda_{i+1}` is the fresh batching challenge for the outgoing next-layer claim. If there +are `n_prod` product specs, the flattened batching order is: + +```text +Prod_0, ..., Prod_{n_prod-1}, P_0, Q_0, P_1, Q_1, ... +``` + +so the weights are consecutive powers of the active batching challenge. + +Root read/write claims are separate cross-chip claims, not layer-sumcheck target contributions. For one chip: + +```text +r0_claim = product_k r_out_evals[k][0] * r_out_evals[k][1] +w0_claim = product_k w_out_evals[k][0] * w_out_evals[k][1] +``` + +These products have no `lambda` weights and are not the initial tower sum. They are returned to `ProofShapeAir` so it can +check the global read/write product relation across chips. + +The tower sumcheck starts from the batched initial tower claim `C_1(r_1)`, derived from the same root out-evals. For one +chip, with `n_read` read specs, `n_write` write specs, and `n_prod = n_read + n_write`: + +```text +R_j(r_1) = (1 - r_1) * r_out_evals[j][0] + r_1 * r_out_evals[j][1] +W_j(r_1) = (1 - r_1) * w_out_evals[j][0] + r_1 * w_out_evals[j][1] + +read_initial_claim = + sum_{j=0}^{n_read-1} lambda_1^j * R_j(r_1) + +write_initial_claim = + sum_{j=0}^{n_write-1} lambda_1^(n_read + j) * W_j(r_1) +``` + +For LogUp root out-evals: + +```text +P_k(r_1) = (1 - r_1) * P_k(0) + r_1 * P_k(1) +Q_k(r_1) = (1 - r_1) * Q_k(0) + r_1 * Q_k(1) + +logup_initial_claim = + sum_k lambda_1^(n_prod + 2k) * P_k(r_1) + + sum_k lambda_1^(n_prod + 2k + 1) * Q_k(r_1) +``` + +and: + +```text +C_1(r_1) = read_initial_claim + write_initial_claim + logup_initial_claim +``` + +The root fold AIRs must use the same observed root out-eval rows for both the root claims +`(r0_claim, w0_claim, p0_claim, q0_claim)` and these initial-claim contributions. The generic batched claim at layer `i` +is: + +```text +C_i(r) = + sum_j lambda_cur^j * Prod_j^i(r) + + sum_k lambda_cur^{n_prod + 2k} * P_k^i(r) + + sum_k lambda_cur^{n_prod + 2k + 1} * Q_k^i(r) +``` + +Here `lambda_cur^0 = 1`, matching `get_challenge_pows`. + +Substituting the layer relations gives the current layer sumcheck expected-evaluation body: + +```text +C_i(r) = sum_b eq(r, b) * T_i(b) + +T_i(b) = + sum_j lambda_cur^j * Prod_j^{i+1}(b, 0) * Prod_j^{i+1}(b, 1) + + + sum_k lambda_cur^{n_prod + 2k} * ( + P_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) + + P_k^{i+1}(b, 1) * Q_k^{i+1}(b, 0) + ) + + + sum_k lambda_cur^{n_prod + 2k + 1} * ( + Q_k^{i+1}(b, 0) * Q_k^{i+1}(b, 1) + ) +``` + +If the sumcheck samples point `rho`, its final evaluation is: + +```text +claim_out = eq(r_i, rho) * T_i(rho) +``` + +where `eq(r_i, rho)` is accumulated round-by-round as: + +```text +eq_next = eq_cur * (xi * rho_i + (1 - xi) * (1 - rho_i)) +``` + +Verifier view for active tower layer `i >= 1`: + +1. Verify the sumcheck proof for the current claim `C_i(r_i)`. The proof returns the point `rho` and a final evaluation. + The child layer claims at that point are: + + ```text + Prod_j^{i+1}(rho, 0), Prod_j^{i+1}(rho, 1) + + P_k^{i+1}(rho, 0), P_k^{i+1}(rho, 1), + Q_k^{i+1}(rho, 0), Q_k^{i+1}(rho, 1) + ``` + + From those claims, compute: + + ```text + T_i(rho) = + sum_j lambda_cur^j * Prod_j^{i+1}(rho, 0) * Prod_j^{i+1}(rho, 1) + + + sum_k lambda_cur^{n_prod + 2k} * ( + P_k^{i+1}(rho, 0) * Q_k^{i+1}(rho, 1) + + P_k^{i+1}(rho, 1) * Q_k^{i+1}(rho, 0) + ) + + + sum_k lambda_cur^{n_prod + 2k + 1} * ( + Q_k^{i+1}(rho, 0) * Q_k^{i+1}(rho, 1) + ) + ``` + + The sumcheck final evaluation must equal `eq(r_i, rho) * T_i(rho)`. + +2. If layer `i + 1` is not terminal, derive the next layer's expected sum after sampling `mu` and a fresh batching + challenge `lambda_next`: + + ```text + r_next = (rho, mu) + + C_{i+1}(r_next) = + sum_j lambda_next^j * Prod_j^{i+1}(r_next) + + sum_k lambda_next^{n_prod + 2k} * P_k^{i+1}(r_next) + + sum_k lambda_next^{n_prod + 2k + 1} * Q_k^{i+1}(r_next) + ``` + + Each carried claim is the multilinear interpolation at `mu`: + + ```text + Prod_j^{i+1}(r_next) = + (1 - mu) * Prod_j^{i+1}(rho, 0) + + mu * Prod_j^{i+1}(rho, 1) + ``` + + with the same interpolation for each LogUp `P_k` and `Q_k`. Specs that have no remaining reduction round do not + contribute to the next expected sum. + +Both LogUp cross terms in `T_i` are part of the semantic statement. If an implementation splits or reuses accumulators, +the final sumcheck target must still include the `P0 * Q1 + P1 * Q0` numerator-cross contribution and the `Q0 * Q1` +denominator-cross contribution with their corresponding powers of `lambda_cur`. ## Module Interaction Diagram @@ -58,8 +278,8 @@ flowchart LR DOWN[Downstream batch-constraint modules] PS -- "TowerModuleBus\n(proof_idx, chip_id, num_layers,\nnum_read_specs, num_write_specs, num_logup_specs)" --> TW - PS -- "AirShapeBus / shape lookups\nVK-derived counts and trace shape" --> TW + TW -- "TowerRootClaimBus\n(proof_idx, chip_id, r0_claim,\nw0_claim, p0_claim, q0_claim)" --> PS TW -- "MainBus\n(proof_idx, chip_id, final_tidx, input_layer_claim)" --> MAIN MAIN -- "main claim / expression claim" --> DOWN @@ -67,7 +287,7 @@ flowchart LR Boundary rules: -- `ProofShapeModule` is the source of VK-derived tower shape metadata. +- `ProofShapeModule` is the source of VK-derived tower shape metadata and checks cross-chip root claims. - `TowerModule` verifies the tower proof and emits reduced claims; it does not interpret downstream constraint semantics. - `MainModule` and downstream modules check the reduced claim against the rest of the recursive verifier statement. @@ -116,7 +336,8 @@ the corresponding chip tower proof: - `tower_proof.prod_specs_eval.len() == num_prod_specs`; - `tower_proof.logup_specs_eval.len() == num_logup_specs`; -- initial and per-layer batching use `num_prod_specs + 2 * num_logup_specs` powers of `alpha`; +- initial and per-layer batching use `num_prod_specs + 2 * num_logup_specs` powers of `lambda_cur` for + expected-evaluation checks and `lambda_next` for outgoing next-layer claims; - read/write/LogUp claim AIRs use `num_read_specs`, `num_write_specs`, and `num_logup_specs` when folding child claims. Tower also relies on proof-shape/metadata checks for: @@ -131,11 +352,6 @@ Tower also relies on proof-shape/metadata checks for: Shape metadata is security relevant. If the recursive verifier accepts a different active-round schedule or different product/LogUp spec counts than the native verifier, it can verify a different tower statement. -Current implementation note: `TowerModuleMessage` only carries `(idx, tidx, n_logup)`, is sent from the proof-shape -summary row, and the tower input trace is still collapsed to one record per proof. The intended design is one -`TowerModuleBus` shape message and one tower input record per `(proof_idx, chip_id)`, carrying -`(proof_idx, chip_id, num_layers, num_read_specs, num_write_specs, num_logup_specs)`. - ### Main / Downstream Claim Module `TowerInputAir` emits the reduced tower claim through `MainBus`, keyed by `proof_idx`: @@ -178,20 +394,24 @@ TowerLogupSumCheckClaimAir The internal AIR split may change, but the module boundary should continue to expose protocol-level values: ```text -current layer: T_i(rho) +current claim: C_i(r_i) +expected eval: T_i(rho) next layer: C_{i+1}(rho, mu) terminal layer: reduced input-layer claim ``` -Column names such as `*_prime` are implementation details and should not define the protocol contract. +Use `lambda_cur` for the current layer expected-evaluation path `T_i(rho)` and `lambda_next` for the outgoing +`C_{i+1}(rho, mu)` path. -When the current Rust bus structs use `lambda_claim` and `lambda_prime_claim`, the contract-level names are: +When the current Rust bus structs use `lambda_next_claim` and `lambda_cur_claim`, the contract-level names are: ```text -lambda_claim = next_claim = contribution to C_{i+1}(rho, mu) -lambda_prime_claim = current_claim = contribution to T_i(rho) +lambda_next_claim = next_claim = contribution to C_{i+1}(rho, mu) +lambda_cur_claim = eval_claim = contribution to T_i(rho) ``` +`lambda_cur_claim` is not the current claim. The current claim is `C_i(r_i)`, the value passed into the sumcheck proof. + ## AIR Contract Priority The first specification target is the contract of each AIR: what statement it receives, what statement it emits, and @@ -202,7 +422,6 @@ For tower, an AIR contract should state: - the exact bus messages received and sent; - the transcript events the AIR owns, in native verifier order; -- the shape metadata the AIR trusts and where that metadata comes from; - the algebraic meaning of each emitted claim; - the `proof_idx` and `chip_id` scope of every message; - the zero-work behavior when a chip has no tower interactions. @@ -218,6 +437,12 @@ constraint error should usually map to one arrow: either the producer did not se used the wrong scope/counter, or a transcript event was placed at the wrong `tidx`. Exact cursor arithmetic is handled by the `tidx` columns and `tower_transcript_len`. +Only `TranscriptBus` arrows define transcript chronology. Other arrows are bus dependencies between AIR rows; they can +carry values whose transcript samples occur earlier or later in native verifier order. When debugging a bus failure, map +the failing bus to the nearest arrow with the same producer/consumer pair, then check scope fields, counters, and +`tidx`. When debugging transcript failures, ignore non-transcript arrows and follow only the numbered `TranscriptBus` +events. + ```mermaid sequenceDiagram participant PS as ProofShapeAir @@ -230,121 +455,105 @@ sequenceDiagram participant SC as TowerLayerSumcheckAir participant M as MainAir - PS->>TI: 0. TowerModuleBus(proof_idx, chip_id, num_layers, num_read_specs, num_write_specs, num_logup_specs) - TI->>TR: 1. TranscriptBus sample alpha powers for initial batching - TI->>TR: 2. TranscriptBus sample product_sum / root point - TI->>TL: 3. TowerLayerInputBus(chip_id, layer_tidx, num_layers, spec counts, r0_claim, w0_claim, q0_claim) - - loop each active tower layer + PS->>TI: 0. TowerModuleBus: tower shape + TI->>PR: 1a. TowerReadRootInputBus: read root/init + TI->>PW: 1b. TowerWriteRootInputBus: write root/init + TI->>LU: 1c. TowerLogupRootInputBus: LogUp root/init + PR->>TR: 2a. TranscriptBus: observe root read out-evals + PW->>TR: 2b. TranscriptBus: observe root write out-evals + LU->>TR: 2c. TranscriptBus: observe root LogUp out-evals + TI->>TR: 3. TranscriptBus: sample lambda_1, r_1 + PR->>TI: 4a. Read root+init buses: r0 + C_1 part + PW->>TI: 4b. Write root+init buses: w0 + C_1 part + LU->>TI: 4c. LogUp root+init buses: p0/q0 + C_1 part + TI->>PS: 4d. TowerRootClaimBus: root claims + TI->>TL: 5. TowerLayerInputBus: initial claim + shape + + loop each active reduction layer i alt layer_idx > 0 - TL->>TR: 4. TranscriptBus sample fresh alpha/lambda for layer i - TL->>SC: 5. TowerSumcheckInputBus(chip_id, layer_idx, claim = C_i(r)) + TL->>SC: 6. TowerSumcheckInputBus: C_i(r_i) loop each sumcheck round - SC->>TR: 6a. TranscriptBus observe ev1, ev2, ev3 - SC->>TR: 6b. TranscriptBus sample rho round challenge - SC-->>SC: 6c. update sumcheck claim and eq(r, rho) + SC->>TR: 7a. TranscriptBus: observe round evals + SC->>TR: 7b. TranscriptBus: sample rho + SC-->>SC: 7c. update claim + eq end - SC->>TL: 7. TowerSumcheckOutputBus(chip_id, layer_idx, final_eval, eq_at_r_prime) + SC->>TL: 8. TowerSumcheckOutputBus: final_eval + eq + end + + TL->>PR: 9a. TowerProdReadClaimInputBus: read layer fold + TL->>PW: 9b. TowerProdWriteClaimInputBus: write layer fold + TL->>LU: 9c. TowerLogupClaimInputBus: LogUp layer fold + PR->>TR: 10a. TranscriptBus: observe read claims + PW->>TR: 10b. TranscriptBus: observe write claims + LU->>TR: 10c. TranscriptBus: observe LogUp claims + TL->>TR: 11. TranscriptBus: sample mu_i + + alt non-last layer + TL->>TR: 12. TranscriptBus: sample lambda_next + else last layer + TL-->>TL: 12. no lambda_next sample end - TL->>PR: 8a. TowerProdReadClaimInputBus(chip_id, layer_idx, lambda, lambda_prime, mu, claim_tidx) - TL->>PW: 8b. TowerProdWriteClaimInputBus(chip_id, layer_idx, lambda, lambda_prime, mu, claim_tidx) - TL->>LU: 8c. TowerLogupClaimInputBus(chip_id, layer_idx, lambda, lambda_prime, mu, claim_tidx) - PR->>TR: 9a. TranscriptBus observe read child claims - PW->>TR: 9b. TranscriptBus observe write child claims - LU->>TR: 9c. TranscriptBus observe LogUp child claims - PR->>TL: 10a. TowerProdReadClaimBus(chip_id, layer_idx, read_next_claim, read_current_claim, num_read_count) - PW->>TL: 10b. TowerProdWriteClaimBus(chip_id, layer_idx, write_next_claim, write_current_claim, num_write_count) - LU->>TL: 10c. TowerLogupClaimBus(chip_id, layer_idx, logup_next_claim, logup_current_claim, num_logup_count) - TL->>TL: 11. derive T_i(rho) and check final_eval = eq(r, rho) * T_i(rho) - TL->>TR: 12. TranscriptBus sample merge challenge mu + PR->>TL: 13a. TowerProdReadClaimBus: read claims + end powers + PW->>TL: 13b. TowerProdWriteClaimBus: write claims + end powers + LU->>TL: 13c. TowerLogupClaimBus: LogUp claims + TL->>TL: 14. check final_eval = eq * T_i(rho) alt non-last layer - TL->>SC: 13a. TowerSumcheckChallengeBus(chip_id, layer_idx, mu as next layer round 0 challenge) - TL->>TL: 13b. derive next expected claim C_{i+1}(rho, mu) + TL->>SC: 15a. TowerSumcheckChallengeBus: seed next layer + TL->>TL: 15b. derive C_{i+1}(rho, mu_i) else last layer - TL->>TL: 13c. terminal layer produces input_layer_claim + TL->>TL: 15c. produce input_layer_claim end end - TL->>TI: 14. TowerLayerOutputBus(chip_id, final_tidx, input_layer_claim, lambda, mu) - TI->>M: 15. MainBus(chip_id, final_tidx, input_layer_claim) + TL->>TI: 16. TowerLayerOutputBus: tower result + TI->>M: 17. MainBus: reduced claim ``` +Step 3 represents two transcript samples with distinct cursor positions: `lambda_1` followed by `r_1`. +The step-1 root input buses may carry `lambda_1` and `r_1` as values even though the transcript samples occur at step 3. +Only `TranscriptBus` arrows define transcript chronology. The root AIRs must use the same observed out-eval rows for both +the step-4 root claims and the step-4 initial-claim contributions. For each root fold AIR, the root bus and init bus are +sent by the same final accumulator row. + ## Per-AIR Contracts -Read these contracts as details for each participant in the sequence diagram above. +Read this section as the semantic contract for each participant. It intentionally does not repeat full bus payloads; the +bus table below is the source of truth for producer, consumer, payload, and scope. If a bus-balance error needs exact +fields, go to the bus table first. If the bus balances but the proof is semantically wrong, use this section. ### ProofShapeAir -`ProofShapeAir` is outside the tower module, but it is the source of tower shape truth. For each `(proof_idx, chip_id)` -tower proof, it selects the chip VK metadata and sends: - -```text -TowerModuleBus( - proof_idx, - chip_id, - num_layers, - num_read_specs, - num_write_specs, - num_logup_specs -) -``` +`ProofShapeAir` is outside the tower module. It owns VK-derived shape truth and must not derive tower counts from the +proof. For each `(proof_idx, chip_id)` tower proof, it sends `num_layers`, `num_read_specs`, `num_write_specs`, and +`num_logup_specs` to `TowerInputAir`. -The counts are VK-derived: +It also consumes each chip's root claims from `TowerInputAir` and, across all chips in the same `proof_idx`, enforces: ```text -num_read_specs = num_read_count -num_write_specs = num_write_count -num_logup_specs = num_logup_count -num_prod_specs = num_read_specs + num_write_specs +prod_chip r0_claim = prod_chip w0_claim +sum_chip p0_claim / q0_claim = 0 / x ``` -`ProofShapeAir` must not derive these counts from the proof. It may also publish per-layer count metadata through -`AirShapeBus`, but that metadata must agree with the same selected VK and tower instance. +Equivalently, the product of read roots equals the product of write roots, and the sum of all chip LogUp fractional +pairs has zero numerator. These root claims are not the same as `initial_tower_claim = C_1(r_1)` or any `T_i(rho)` +expected-evaluation contribution. The denominator value `x` is unconstrained except that the fraction representation must +follow the LogUp/fraction-folding contract used by the downstream proof-shape checks. ### TowerInputAir `TowerInputAir` is the per-`(proof_idx, chip_id)` tower entry and exit boundary. -Receives: - -```text -TowerModuleBus( - proof_idx, - chip_id, - num_layers, - num_read_specs, - num_write_specs, - num_logup_specs -) -TowerLayerOutputBus(chip_id, final_tidx, layer_idx_end, input_layer_claim, lambda, mu) -``` - -Sends: - -```text -TowerLayerInputBus( - chip_id, - layer_tidx, - num_layers, - num_read_specs, - num_write_specs, - num_logup_specs, - r0_claim, - w0_claim, - q0_claim -) -MainBus(chip_id, final_tidx, input_layer_claim) -TranscriptBus samples/observations for the initial tower transcript prefix -``` - -Contract: - - one active `TowerInputAir` row corresponds to one `(proof_idx, chip_id)` tower proof; - it anchors the VK-derived spec counts to the tower instance, but does not derive them; - it obtains tower transcript cursor state from the transcript/tower transcript contract, not from `TowerModuleBus`; -- it forwards `num_layers`, `num_read_specs`, `num_write_specs`, and `num_logup_specs` to `TowerLayerAir`; +- it sends output-mode fold tasks to the product and LogUp claim AIRs; these tasks may carry `lambda_1` and `r_1` as + data even though transcript chronology is enforced only by `TranscriptBus`; +- it receives root initial-claim contributions and computes + `initial_tower_claim = read_initial_claim + write_initial_claim + logup_initial_claim`; +- it forwards `num_layers`, spec counts, and `initial_tower_claim = C_1(r_1)` to `TowerLayerAir`; +- it returns `r0_claim`, `w0_claim`, `p0_claim`, and `q0_claim` to `ProofShapeAir` for cross-chip root checks; - it routes the final reduced input-layer claim to `MainBus` under the same `chip_id`; - if `num_layers = 0` or all spec counts are zero, it must not create active layer work and must use the documented zero-work output behavior. @@ -353,121 +562,84 @@ Contract: `TowerLayerAir` is the layer orchestrator. It owns the transition from one tower layer claim to the next expected claim. -Receives: - -```text -TowerLayerInputBus( - chip_id, - layer_tidx, - num_layers, - num_read_specs, - num_write_specs, - num_logup_specs, - r0_claim, - w0_claim, - q0_claim -) -TowerSumcheckOutputBus(chip_id, layer_idx, tidx_after_sumcheck, claim_out, eq_at_r_prime) -TowerProdReadClaimBus(chip_id, layer_idx, read_next_claim, read_current_claim, num_read_count) -TowerProdWriteClaimBus(chip_id, layer_idx, write_next_claim, write_current_claim, num_write_count) -TowerLogupClaimBus(chip_id, layer_idx, logup_next_claim, logup_current_claim, num_logup_count) -optional AirShapeBus count metadata for cross-checking the selected chip -``` - -Sends: - -```text -TowerSumcheckInputBus(chip_id, layer_idx, is_last_layer, sumcheck_tidx, claim) -TowerProdReadClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) -TowerProdWriteClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) -TowerLogupClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) -TowerSumcheckChallengeBus(chip_id, layer_idx, round, challenge) -TowerLayerOutputBus(chip_id, final_tidx, layer_idx_end, input_layer_claim, lambda, mu) -TranscriptBus samples for layer batching and merge challenges -``` - -Contract: - - for layer `i`, it supplies the claim that the layer sumcheck must verify; -- from claim AIR outputs, it derives `T_i(rho)` and checks the sumcheck final evaluation against +- from claim AIR outputs, it derives the expected-evaluation body `T_i(rho)` and checks the sumcheck final evaluation against `eq(r_i, rho) * T_i(rho)`; - if another layer remains, it derives the next expected claim `C_{i+1}(rho, mu)`; - if this is the terminal/input layer, it emits the reduced `input_layer_claim`; -- it checks read/write/LogUp counts against the proof-shape metadata forwarded from `TowerInputAir`, not against - proof-provided lengths alone. +- it owns `lambda_cur`/`lambda_next` power handoff between read products, write products, and LogUp specs; +- it checks read/write/LogUp counts against the shape metadata forwarded from `TowerInputAir`/`TowerModuleBus`, not + against proof-provided lengths alone. ### TowerLayerSumcheckAir `TowerLayerSumcheckAir` owns only the sumcheck transcript and sumcheck algebra for one layer. -Receives: - -```text -TowerSumcheckInputBus(chip_id, layer_idx, is_last_layer, sumcheck_tidx, claim) -TowerSumcheckChallengeBus(chip_id, layer_idx, previous_round, challenge) -TranscriptBus observations for prover messages -TranscriptBus samples for rho challenges -``` - -Sends: - -```text -TowerSumcheckOutputBus(chip_id, layer_idx, tidx_after_sumcheck, final_evaluation, eq_at_r_prime) -TowerSumcheckChallengeBus(chip_id, layer_idx, round, challenge) -``` - -Contract: - - it verifies the univariate sumcheck rounds starting from the input claim; - it emits the final sumcheck evaluation and `eq_at_r_prime = eq(r_i, rho)`; - it does not know product or LogUp semantics; `TowerLayerAir` interprets the final evaluation against `T_i(rho)`. ### TowerProdReadSumCheckClaimAir and TowerProdWriteSumCheckClaimAir -The read and write product claim AIRs have the same contract, with separate buses. - -Receives: +The read and write product claim AIRs have the same contract, with separate buses. Each AIR supports two modes: ```text -TowerProd{Read,Write}ClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) -TranscriptBus observations for active child product claims -``` +DeriveLayerClaims: + consumes layer child claims and returns (next_claim, eval_claim) -Sends: - -```text -TowerProd{Read,Write}ClaimBus(chip_id, layer_idx, next_claim, current_claim, num_prod_count) +DeriveOutputClaim: + consumes root out-eval pairs and returns r0_claim/w0_claim plus the C_1 contribution ``` -Contract: - -- `current_claim` is this product group's contribution to `T_i(rho)`; -- `next_claim` is this product group's contribution to `C_{i+1}(rho, mu)`; -- `num_prod_count` is the number of active read or write product specs for the selected chip/layer; +- in `DeriveLayerClaims` mode, `current_claim` is the current code/bus name for this product group's `eval_claim`, i.e. + its contribution to `T_i(rho)`; +- in `DeriveLayerClaims` mode, `next_claim` is this product group's contribution to `C_{i+1}(rho, mu)`; +- in `DeriveLayerClaims` mode, `prod_offset` is the first flattened product index handled by this AIR instance: `0` for + read products and `num_read_specs` for write products; +- in `DeriveLayerClaims` mode, `lambda_next_start` and `lambda_cur_start` are the batching powers at `prod_offset`; +- in `DeriveLayerClaims` mode, `lambda_next_end` and `lambda_cur_end` are the batching powers immediately after this + product group and must be used as the next group's start powers; +- in `DeriveOutputClaim` mode, `output_claim = product_k P_k(0) * P_k(1)`; +- in `DeriveOutputClaim` mode, + `initial_claim = sum_k lambda_1^(prod_offset + k) * ((1 - r_1) * P_k(0) + r_1 * P_k(1))`; +- in `DeriveOutputClaim` mode, the read AIR returns `r0_claim` and the write AIR returns `w0_claim`; +- in `DeriveOutputClaim` mode, the read/write initial claims are contributions to `initial_tower_claim = C_1(r_1)`; +- in `DeriveOutputClaim` mode, the root bus and init bus are sent by the same final accumulator row; +- `num_prod_count` fixes the number of active read or write product accumulator rows for the selected chip/layer or root + claim group; - the AIR owns the transcript observation of the product child claims but not the layer-level sumcheck check. ### TowerLogupSumCheckClaimAir -`TowerLogupSumCheckClaimAir` folds the LogUp numerator/denominator child claims. - -Receives: +`TowerLogupSumCheckClaimAir` folds the LogUp numerator/denominator child claims. It supports two modes: ```text -TowerLogupClaimInputBus(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu) -TranscriptBus observations for active LogUp child claims -``` +DeriveLayerClaims: + consumes layer child claims and returns (next_claim, eval_claim) -Sends: - -```text -TowerLogupClaimBus(chip_id, layer_idx, next_claim, current_claim, num_logup_count) +DeriveOutputClaim: + consumes root LogUp out-evals and returns (p0_claim, q0_claim) plus the C_1 contribution ``` -Contract: - -- `current_claim` is the LogUp contribution to `T_i(rho)` using the native numerator/denominator reduction; -- `next_claim` is the LogUp contribution to `C_{i+1}(rho, mu)` after interpolation at `mu`; -- `num_logup_count` is the number of active LogUp specs for the selected chip/layer; +- in `DeriveLayerClaims` mode, `current_claim` is the current code/bus name for this LogUp group's `eval_claim`, i.e. its + contribution to `T_i(rho)` using the native numerator/denominator reduction; +- in `DeriveLayerClaims` mode, `next_claim` is the LogUp contribution to `C_{i+1}(rho, mu)` after interpolation at `mu`; +- in `DeriveLayerClaims` mode, `lambda_next_start` and `lambda_cur_start` must equal the product write end powers, i.e. + the powers at `num_read_specs + num_write_specs`; +- in `DeriveLayerClaims` mode, each LogUp accumulator row consumes two batching powers: one for `P_k` and one for `Q_k`; +- in `DeriveOutputClaim` mode, each row computes + `P_cross = P(0) * Q(1) + P(1) * Q(0)` and `Q_cross = Q(0) * Q(1)`; +- in `DeriveOutputClaim` mode, the AIR folds `p0_claim / q0_claim = sum_k P_cross_k / Q_cross_k`; +- in `DeriveOutputClaim` mode, the AIR also folds: + + ```text + logup_initial_claim = + sum_k lambda_1^(logup_offset + 2k) * P_k(r_1) + + sum_k lambda_1^(logup_offset + 2k + 1) * Q_k(r_1) + ``` + +- in `DeriveOutputClaim` mode, `TowerLogupRootBus` and `TowerLogupInitBus` are sent by the same final accumulator row; +- `num_logup_count` fixes the number of active LogUp accumulator rows for the selected chip/layer or root claim group; - it observes LogUp child claims in the same order as the native verifier. ## Bus Summary @@ -479,29 +651,45 @@ proof inside the recursive verifier. The `TowerModuleBus` payload is the proof-s typed per-proof bus key, the implementation may omit it from the message struct while preserving it as part of the logical contract. +For bus-balance logs, a positive count is a send and a negative count is a receive. In BabyBear logs the raw value +`2013265920` is `-1`. The first debugging question is therefore: "which row should have produced the exact same payload +under the opposite sign?" + | Bus | Bus Type | Producer | Consumer | Payload | Scope | Meaning | |-----|----------|----------|----------|---------|-------|---------| | `TowerModuleBus` | `PermutationCheck` | `ProofShapeAir` | `TowerInputAir` | `(proof_idx, chip_id, num_layers, num_read_specs, num_write_specs, num_logup_specs)` | per `(proof_idx, chip_id)` tower proof | Starts one tower verification with VK-derived shape metadata. | -| `TowerLayerInputBus` | `PermutationCheck` | `TowerInputAir` | `TowerLayerAir` | `(chip_id, layer_tidx, num_layers, num_read_specs, num_write_specs, num_logup_specs, r0_claim, w0_claim, q0_claim)` | per `(proof_idx, chip_id)` tower proof | Anchors the initial batched claims and shape counts for all tower layers. | -| `TowerLayerOutputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerInputAir` | `(chip_id, final_tidx, layer_idx_end, input_layer_claim, lambda, mu)` | per `(proof_idx, chip_id)` tower proof | Returns the reduced terminal/input-layer tower claim. | -| `TowerProdReadClaimInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerProdReadSumCheckClaimAir` | `(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu)` | per `(proof_idx, chip_id, layer_idx)` | Starts read product child-claim folding for one layer. | -| `TowerProdReadClaimBus` | `PermutationCheck` | `TowerProdReadSumCheckClaimAir` | `TowerLayerAir` | `(chip_id, layer_idx, read_next_claim, read_current_claim, num_read_count)` | per `(proof_idx, chip_id, layer_idx)` | Returns read product contributions to `C_{i+1}(rho, mu)` and `T_i(rho)`. | -| `TowerProdWriteClaimInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerProdWriteSumCheckClaimAir` | `(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu)` | per `(proof_idx, chip_id, layer_idx)` | Starts write product child-claim folding for one layer. | -| `TowerProdWriteClaimBus` | `PermutationCheck` | `TowerProdWriteSumCheckClaimAir` | `TowerLayerAir` | `(chip_id, layer_idx, write_next_claim, write_current_claim, num_write_count)` | per `(proof_idx, chip_id, layer_idx)` | Returns write product contributions to `C_{i+1}(rho, mu)` and `T_i(rho)`. | -| `TowerLogupClaimInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerLogupSumCheckClaimAir` | `(chip_id, layer_idx, claim_tidx, lambda, lambda_prime, mu)` | per `(proof_idx, chip_id, layer_idx)` | Starts LogUp numerator/denominator child-claim folding for one layer. | -| `TowerLogupClaimBus` | `PermutationCheck` | `TowerLogupSumCheckClaimAir` | `TowerLayerAir` | `(chip_id, layer_idx, logup_next_claim, logup_current_claim, num_logup_count)` | per `(proof_idx, chip_id, layer_idx)` | Returns LogUp contributions to `C_{i+1}(rho, mu)` and `T_i(rho)`. | -| `TowerSumcheckInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerLayerSumcheckAir` | `(chip_id, layer_idx, is_last_layer, sumcheck_tidx, claim)` | per `(proof_idx, chip_id, layer_idx)` | Starts the layer sumcheck from expected claim `C_i(r_i)`. | +| `TowerLayerInputBus` | `PermutationCheck` | `TowerInputAir` | `TowerLayerAir` | `(chip_id, layer_tidx, num_layers, num_read_specs, num_write_specs, num_logup_specs, initial_tower_claim)` | per `(proof_idx, chip_id)` tower proof | Anchors the initial batched tower claim and shape counts for all tower layers. | +| `TowerLayerOutputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerInputAir` | `(chip_id, final_tidx, layer_idx_end, input_layer_claim, lambda_next, mu)` | per `(proof_idx, chip_id)` tower proof | Returns the reduced terminal/input-layer tower claim. | +| `TowerReadRootInputBus` | `PermutationCheck` | `TowerInputAir` | `TowerProdReadSumCheckClaimAir` | `(chip_id, claim_tidx, lambda_1, r_1, lambda_1_start, num_read_count)` | per `(proof_idx, chip_id)` root claim group | Starts read root product folding and passes the initial-claim challenges. | +| `TowerReadRootBus` | `PermutationCheck` | `TowerProdReadSumCheckClaimAir` | `TowerInputAir` | `(chip_id, r0_claim)` | per `(proof_idx, chip_id)` root claim group | Returns `r0_claim = product_k r_out_evals[k][0] * r_out_evals[k][1]`. | +| `TowerReadInitBus` | `PermutationCheck` | `TowerProdReadSumCheckClaimAir` | `TowerInputAir` | `(chip_id, read_initial_claim)` | per `(proof_idx, chip_id)` root claim group | Returns the read contribution to `C_1(r_1)` from the same rows as `r0_claim`. | +| `TowerWriteRootInputBus` | `PermutationCheck` | `TowerInputAir` | `TowerProdWriteSumCheckClaimAir` | `(chip_id, claim_tidx, lambda_1, r_1, lambda_1_start, num_write_count)` | per `(proof_idx, chip_id)` root claim group | Starts write root product folding and passes the initial-claim challenges. | +| `TowerWriteRootBus` | `PermutationCheck` | `TowerProdWriteSumCheckClaimAir` | `TowerInputAir` | `(chip_id, w0_claim)` | per `(proof_idx, chip_id)` root claim group | Returns `w0_claim = product_k w_out_evals[k][0] * w_out_evals[k][1]`. | +| `TowerWriteInitBus` | `PermutationCheck` | `TowerProdWriteSumCheckClaimAir` | `TowerInputAir` | `(chip_id, write_initial_claim)` | per `(proof_idx, chip_id)` root claim group | Returns the write contribution to `C_1(r_1)` from the same rows as `w0_claim`. | +| `TowerLogupRootInputBus` | `PermutationCheck` | `TowerInputAir` | `TowerLogupSumCheckClaimAir` | `(chip_id, claim_tidx, lambda_1, r_1, lambda_1_start, num_logup_count)` | per `(proof_idx, chip_id)` root claim group | Starts root LogUp fractional folding and passes the initial-claim challenges. | +| `TowerLogupRootBus` | `PermutationCheck` | `TowerLogupSumCheckClaimAir` | `TowerInputAir` | `(chip_id, p0_claim, q0_claim)` | per `(proof_idx, chip_id)` root claim group | Returns the chip-level root LogUp fractional pair. | +| `TowerLogupInitBus` | `PermutationCheck` | `TowerLogupSumCheckClaimAir` | `TowerInputAir` | `(chip_id, logup_initial_claim)` | per `(proof_idx, chip_id)` root claim group | Returns the LogUp contribution to `C_1(r_1)` from the same rows as `(p0_claim, q0_claim)`. | +| `TowerProdReadClaimInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerProdReadSumCheckClaimAir` | `(chip_id, layer_idx, claim_tidx, lambda_next, lambda_cur, mu, prod_offset, lambda_next_start, lambda_cur_start, num_read_count)` | per `(proof_idx, chip_id, layer_idx)` | Starts read product child-claim folding at flattened product offset `0`; start powers are one. | +| `TowerProdReadClaimBus` | `PermutationCheck` | `TowerProdReadSumCheckClaimAir` | `TowerLayerAir` | `(chip_id, layer_idx, read_next_claim, read_current_claim, lambda_next_end, lambda_cur_end)` | per `(proof_idx, chip_id, layer_idx)` | Returns read product contributions and the powers after the read product range. | +| `TowerProdWriteClaimInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerProdWriteSumCheckClaimAir` | `(chip_id, layer_idx, claim_tidx, lambda_next, lambda_cur, mu, prod_offset, lambda_next_start, lambda_cur_start, num_write_count)` | per `(proof_idx, chip_id, layer_idx)` | Starts write product child-claim folding at `num_read_specs`; start powers are the read product end powers. | +| `TowerProdWriteClaimBus` | `PermutationCheck` | `TowerProdWriteSumCheckClaimAir` | `TowerLayerAir` | `(chip_id, layer_idx, write_next_claim, write_current_claim, lambda_next_end, lambda_cur_end)` | per `(proof_idx, chip_id, layer_idx)` | Returns write product contributions and the powers after the full product range. | +| `TowerLogupClaimInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerLogupSumCheckClaimAir` | `(chip_id, layer_idx, claim_tidx, lambda_next, lambda_cur, mu, lambda_next_start, lambda_cur_start, num_logup_count)` | per `(proof_idx, chip_id, layer_idx)` | Starts LogUp numerator/denominator child-claim folding at the product write end powers. | +| `TowerLogupClaimBus` | `PermutationCheck` | `TowerLogupSumCheckClaimAir` | `TowerLayerAir` | `(chip_id, layer_idx, logup_next_claim, logup_current_claim)` | per `(proof_idx, chip_id, layer_idx)` | Returns LogUp contributions to `C_{i+1}(rho, mu)` and `T_i(rho)`. | +| `TowerSumcheckInputBus` | `PermutationCheck` | `TowerLayerAir` | `TowerLayerSumcheckAir` | `(chip_id, layer_idx, is_last_layer, sumcheck_tidx, claim)` | per `(proof_idx, chip_id, layer_idx)` | Starts the layer sumcheck from current claim `C_i(r_i)`. | | `TowerSumcheckOutputBus` | `PermutationCheck` | `TowerLayerSumcheckAir` | `TowerLayerAir` | `(chip_id, layer_idx, tidx_after_sumcheck, final_evaluation, eq_at_r_prime)` | per `(proof_idx, chip_id, layer_idx)` | Returns the sumcheck final evaluation and `eq(r_i, rho)`. | | `TowerSumcheckChallengeBus` | `PermutationCheck` | `TowerLayerAir`, `TowerLayerSumcheckAir` | `TowerLayerSumcheckAir` | `(chip_id, layer_idx, round, challenge)` | per `(proof_idx, chip_id, layer_idx, round)` | Links each round challenge, including the layer-to-layer `mu` when it seeds the next layer. | +| `TowerRootClaimBus` | `PermutationCheck` | `TowerInputAir` | `ProofShapeAir` | `(chip_id, r0_claim, w0_claim, p0_claim, q0_claim)` | per `(proof_idx, chip_id)` tower proof | Returns chip root claims so proof-shape can enforce cross-chip product and LogUp fraction checks. | | `TranscriptBus` | `PermutationCheck` | `TranscriptAir` for samples; tower AIRs for observations | `TowerInputAir`, `TowerLayerAir`, `TowerLayerSumcheckAir`, product/logup claim AIRs for samples; `TranscriptAir` for observations | `(tidx, value, access metadata)` | per-proof | Enforces native verifier observe/sample order. | -| `AirShapeBus` | `Lookup` | `ProofShapeAir` | `TowerLayerAir` | VK-derived trace/count metadata | per-proof | Cross-checks selected tower shape against proof-shape metadata. | | `MainBus` | `PermutationCheck` | `TowerInputAir` | `MainAir` | `(chip_id, final_tidx, input_layer_claim)` | per `(proof_idx, chip_id)` main claim | Exports the reduced tower claim for downstream constraint checking. | ## Layer Verification Contract -For each non-terminal layer `i`, tower verifies two related values from the same child-layer claims. +Layer `0` is a zero-variable root/output layer. It is not the initial tower sumcheck claim. The initial tower sumcheck +claim is `C_1(r_1)`. For each non-terminal active layer `i >= 1`, tower verifies two related values from the same +child-layer claims. -First, it verifies the current sumcheck output: +First, it verifies the current sumcheck output. The current claim is `C_i(r_i)`, and the final sumcheck evaluation must +match the expected-evaluation body: ```text sumcheck_final_eval = eq(r_i, rho) * T_i(rho) @@ -521,7 +709,7 @@ Second, if another layer remains, it derives the next layer's expected claim: C_{i+1}(rho, mu) ``` -using interpolation at `mu` and fresh `alpha_next` powers. Specs that have no remaining active round must not contribute +using interpolation at `mu` and fresh `lambda_next` powers. Specs that have no remaining active round must not contribute to this next expected claim. ## Edge Cases / Zero Work Behavior @@ -559,10 +747,12 @@ field, or one transcript cursor. |---------|-------------|------------------------| | `TowerModuleBus` receive does not match | `proof_idx`, `chip_id`, and VK-derived counts from proof-shape | Proof-shape sent the wrong tower task or tower consumed it under the wrong scope. | | Multiple or missing tower input rows for a chip | one active `TowerInputAir` row per `(proof_idx, chip_id)` | Tower proof identity is not keyed exactly by `(proof_idx, chip_id)`. | -| Product or LogUp claim count mismatch | `num_read_specs`, `num_write_specs`, `num_logup_specs`, and per-layer active counts | Claim AIR is using proof-provided lengths or stale forwarded counts instead of proof-shape metadata. | +| Product or LogUp claim count mismatch | `num_read_specs`, `num_write_specs`, `num_logup_specs`, and per-layer active counts | Claim AIR is using proof-provided lengths or stale forwarded counts instead of `TowerModuleBus` shape metadata. | +| Product write claim bus fails but read claim bus balances | `prod_offset`, `lambda_next_start`, `lambda_cur_start` on write input | Write product folding did not start at the read product end powers. | +| LogUp claim bus fails with paired send/receive payloads differing only in last limbs | `lambda_next_start`, `lambda_cur_start`, and two-power LogUp progression | LogUp folding did not start after all product specs or advanced by one power instead of two. | | Sumcheck rounds fail before final evaluation | `TowerSumcheckInputBus`, round challenges, and transcript observations | `TowerLayerSumcheckAir` is not replaying the native sumcheck transcript/algebra for this layer. | -| Sumcheck final evaluation fails | `eq_at_r_prime`, `current_claim` outputs, and construction of `T_i(rho)` | `TowerLayerAir` assembled the current-layer expression incorrectly, or the claim AIRs swapped current/next claims. | -| Next layer expected claim is wrong | `next_claim` outputs, `mu`, inactive spec masks, and fresh `alpha_next` powers | `TowerLayerAir` derived `C_{i+1}(rho, mu)` with the wrong interpolation challenge or batching weights. | +| Sumcheck final evaluation fails | `eq_at_r_prime`, `eval_claim` outputs (`*_current_claim` in code), and construction of `T_i(rho)` | `TowerLayerAir` assembled the expected-evaluation body incorrectly, or the claim AIRs swapped eval/next claims. | +| Next layer expected claim is wrong | `next_claim` outputs, `mu`, inactive spec masks, and fresh `lambda_next` powers | `TowerLayerAir` derived `C_{i+1}(rho, mu)` with the wrong interpolation challenge or batching weights. | | Terminal layer still expects another layer | `layer_idx`, `num_layers`, and `is_last_layer` | Terminal/non-terminal gating is wrong, causing an invalid `C_{i+1}` transition. | | `TranscriptBus` failure at a specific `tidx` | nearest numbered arrow in the AIR sequence diagram | The owner AIR observed or sampled at the wrong cursor, wrong order, or wrong multiplicity. | | `MainBus` claim mismatch | `TowerLayerOutputBus` and `TowerInputAir` export row | Tower emitted the wrong `input_layer_claim` or changed `chip_id`/`final_tidx` at the boundary. | @@ -574,7 +764,7 @@ field, or one transcript cursor. - **Fresh layer batching:** each layer samples a fresh batching challenge; specs within that layer use its powers. - **Shape agreement:** proof-shape metadata must force the same active spec/layer schedule as the native verifier. - **No hidden semantics in column names:** protocol docs should describe `T_i(rho)` and `C_{i+1}(rho, mu)`, not rely on - `_prime` or other implementation-specific column names. + implementation-specific column names. - **Downstream ownership:** tower emits the reduced claim; downstream modules check the claim's constraint semantics. ## Open Design Questions From 41ee99dbc995ad7256fa84fc0ea81b61c0b3e292 Mon Sep 17 00:00:00 2001 From: kunxian xia Date: Fri, 26 Jun 2026 22:43:25 +0800 Subject: [PATCH 5/5] scripts: add bus failure grouping helper --- .../scripts/group_bus_failures.py | 224 ++++++++++++++++++ ceno_recursion_v2/scripts/run_e2e_test.sh | 31 ++- 2 files changed, 250 insertions(+), 5 deletions(-) create mode 100644 ceno_recursion_v2/scripts/group_bus_failures.py diff --git a/ceno_recursion_v2/scripts/group_bus_failures.py b/ceno_recursion_v2/scripts/group_bus_failures.py new file mode 100644 index 000000000..39aaaa5c3 --- /dev/null +++ b/ceno_recursion_v2/scripts/group_bus_failures.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +"""Group Stark interaction-bus balance failures by bus id. + +Usage: + python3 scripts/group_bus_failures.py prover.log + cargo ... 2>&1 | python3 scripts/group_bus_failures.py +""" + +from __future__ import annotations + +import argparse +import re +import sys +from collections import Counter, defaultdict +from dataclasses import dataclass, field +from pathlib import Path + + +BABYBEAR_MODULUS = 2_013_265_921 + +BUS_NAMES = { + 0: "TranscriptBus", + 1: "Poseidon2PermuteBus", + 2: "Poseidon2CompressBus", + 3: "MerkleVerifyBus", + 4: "TowerModuleBus", + 5: "AirShapeBus", + 6: "HyperdimBus", + 7: "LiftedHeightsBus", + 8: "PublicValuesBus", + 9: "RangeCheckerBus", + 10: "PowerCheckerBus", + 11: "ExpressionClaimNMaxBus", + 12: "FractionFolderInputBus", + 13: "NLiftBus", + 14: "XiRandomnessBus", + 15: "ExpBitsLenBus", + 16: "RightShiftBus", + 17: "MainBus", + 18: "MainSumcheckInputBus", + 19: "MainSumcheckOutputBus", + 20: "MainExpressionClaimBus", + 21: "CachedCommitBus", + 22: "FinalTranscriptStateBus", + 23: "ForkedTranscriptBus", + 24: "LookupChallengeBus", + 25: "ProofShapePermutationBus", + 26: "StartingTidxBus", + 27: "NumPublicValuesBus", + 28: "TowerLayerInputBus", + 29: "TowerLayerOutputBus", + 30: "TowerSumcheckInputBus", + 31: "TowerSumcheckOutputBus", + 32: "TowerSumcheckChallengeBus", + 33: "TowerProdReadClaimInputBus", + 34: "TowerProdReadClaimBus", + 35: "TowerProdWriteClaimInputBus", + 36: "TowerProdWriteClaimBus", + 37: "TowerLogupClaimInputBus", + 38: "TowerLogupClaimBus", + 39: "AirPresenceBus", + 40: "ColumnClaimsBus", + 41: "SelHypercubeBus", + 42: "SelUniBus", + 43: "BatchConstraintConductorBus", + 44: "EqNOuterBus", + 45: "SymbolicExpressionBus", + 46: "ExpressionClaimBus", + 47: "InteractionsFoldingBus", + 48: "ConstraintsFoldingBus", + 49: "PvsAirConsistencyBus", +} + +FAILURE_RE = re.compile( + r"Bus\s+(?P\d+)\s+failed to balance .*?fields=(?P\[[^\]]*\])" +) +CONNECTION_RE = re.compile( + r"Air idx:\s*(?P\d+),\s*Air name:\s*(?P.*?),\s*count:\s*(?P\d+)" +) + + +@dataclass(frozen=True) +class Connection: + air_idx: int + air_name: str + raw_count: int + signed_count: int + + @property + def role(self) -> str: + if self.signed_count > 0: + return "send" + if self.signed_count < 0: + return "receive" + return "disabled" + + +@dataclass +class Failure: + bus: int + fields: str + connections: list[Connection] = field(default_factory=list) + + +def decode_field(value: int, modulus: int) -> int: + if value > modulus // 2: + return value - modulus + return value + + +def read_lines(paths: list[Path]) -> list[str]: + if not paths: + return sys.stdin.read().splitlines() + lines: list[str] = [] + for path in paths: + lines.extend(path.read_text().splitlines()) + return lines + + +def parse_failures(lines: list[str], modulus: int) -> list[Failure]: + failures: list[Failure] = [] + current: Failure | None = None + + for line in lines: + failure_match = FAILURE_RE.search(line) + if failure_match: + current = Failure( + bus=int(failure_match.group("bus")), + fields=failure_match.group("fields"), + ) + failures.append(current) + continue + + connection_match = CONNECTION_RE.search(line) + if connection_match and current is not None: + raw_count = int(connection_match.group("count")) + current.connections.append( + Connection( + air_idx=int(connection_match.group("air_idx")), + air_name=connection_match.group("air_name").strip(), + raw_count=raw_count, + signed_count=decode_field(raw_count, modulus), + ) + ) + + return failures + + +def print_summary(failures: list[Failure]) -> None: + by_bus: dict[int, list[Failure]] = defaultdict(list) + for failure in failures: + by_bus[failure.bus].append(failure) + + if not by_bus: + print("No bus balance failures found.") + return + + for bus in sorted(by_bus): + bus_failures = by_bus[bus] + bus_name = BUS_NAMES.get(bus, "unknown") + print(f"Bus {bus} ({bus_name}): {len(bus_failures)} failure(s)") + + by_fields: dict[str, list[Failure]] = defaultdict(list) + for failure in bus_failures: + by_fields[failure.fields].append(failure) + + for fields, field_failures in sorted( + by_fields.items(), key=lambda item: (-len(item[1]), item[0]) + ): + connection_counter: Counter[Connection] = Counter() + imbalance = 0 + for failure in field_failures: + for connection in failure.connections: + connection_counter[connection] += 1 + imbalance += connection.signed_count + + print(f" fields={fields}") + print(f" occurrences: {len(field_failures)}") + print(f" signed imbalance from listed connections: {imbalance}") + + if not connection_counter: + print(" connections: none listed") + continue + + print(" connections:") + for connection, occurrences in sorted( + connection_counter.items(), + key=lambda item: ( + item[0].air_idx, + item[0].air_name, + item[0].signed_count, + item[0].raw_count, + ), + ): + print( + " " + f"{connection.role:7s} " + f"air={connection.air_idx} " + f"name={connection.air_name} " + f"count={connection.signed_count} " + f"raw={connection.raw_count} " + f"occurrences={occurrences}" + ) + print() + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("logs", nargs="*", type=Path, help="Log files. Reads stdin when omitted.") + parser.add_argument( + "--modulus", + type=int, + default=BABYBEAR_MODULUS, + help="Field modulus used to decode negative counts.", + ) + args = parser.parse_args() + + failures = parse_failures(read_lines(args.logs), args.modulus) + print_summary(failures) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/ceno_recursion_v2/scripts/run_e2e_test.sh b/ceno_recursion_v2/scripts/run_e2e_test.sh index f230d6479..76dfd2c0a 100755 --- a/ceno_recursion_v2/scripts/run_e2e_test.sh +++ b/ceno_recursion_v2/scripts/run_e2e_test.sh @@ -11,6 +11,7 @@ FORCE_REGEN="${CENO_RECURSION_V2_FORCE_REGEN:-0}" MAX_CYCLE_PER_SHARD="${CENO_RECURSION_V2_MAX_CYCLE_PER_SHARD:-16000}" HINTS="${CENO_RECURSION_V2_HINTS:-10}" PUBLIC_IO="${CENO_RECURSION_V2_PUBLIC_IO:-4191}" +BUS_FAILURE_GROUPER="$REPO_ROOT/ceno_recursion_v2/scripts/group_bus_failures.py" # --- Step 1: ensure fixtures exist --- @@ -46,8 +47,28 @@ fi echo "" echo "[test] running recursion v2 e2e tests ..." cd "$REPO_ROOT/ceno_recursion_v2" -CENO_RECURSION_V2_FIXTURE_DIR="$FIXTURE_DIR" \ -RUST_MIN_STACK=33554432 \ -cargo test --release \ - 'continuation::tests::prover_integration' \ - -- --nocapture + +TEST_LOG="${CENO_RECURSION_V2_TEST_LOG:-}" +REMOVE_TEST_LOG=0 +if [[ -z "$TEST_LOG" ]]; then + TEST_LOG="$(mktemp "${TMPDIR:-/tmp}/ceno-recursion-v2-e2e.XXXXXX")" + REMOVE_TEST_LOG=1 +fi + +if CENO_RECURSION_V2_FIXTURE_DIR="$FIXTURE_DIR" \ + RUST_MIN_STACK=33554432 \ + cargo test \ + 'continuation::tests::prover_integration::agg_prover_single_shard' \ + -- --nocapture 2>&1 | tee "$TEST_LOG"; then + if [[ "$REMOVE_TEST_LOG" == "1" ]]; then + rm -f "$TEST_LOG" + fi +else + status=$? + echo "" + echo "[test] bus balance failure summary:" + python3 "$BUS_FAILURE_GROUPER" "$TEST_LOG" || true + echo "" + echo "[test] full failing log: $TEST_LOG" + exit "$status" +fi