From 4974cf64f090b5f692e893a7254ae83d3b769c3d Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.elhousni@consensys.net>
Date: Wed, 6 May 2026 15:31:54 -0400
Subject: [PATCH 1/8] perf: lattice-based scalar decomposition with tighter LLL
 bounds

---
 internal/stats/latest_stats.csv               |  16 +--
 std/algebra/emulated/sw_emulated/hints.go     | 111 ++++++++++--------
 std/algebra/emulated/sw_emulated/point.go     |  16 +--
 .../emulated/sw_emulated/point_test.go        |  10 +-
 std/algebra/native/sw_bls12377/g1.go          |  12 +-
 .../native/sw_bls12377/g1_eisenstein_test.go  |   8 +-
 std/algebra/native/sw_bls12377/hints.go       | 106 +++++++++--------
 .../native/twistededwards/curve_test.go       |   9 +-
 std/algebra/native/twistededwards/hints.go    |  62 ++++++----
 std/algebra/native/twistededwards/point.go    |   6 +-
 10 files changed, 200 insertions(+), 156 deletions(-)

diff --git a/internal/stats/latest_stats.csv b/internal/stats/latest_stats.csv
index 65c88a6c86..f30dd48cae 100644
--- a/internal/stats/latest_stats.csv
+++ b/internal/stats/latest_stats.csv
@@ -95,18 +95,18 @@ pairing_bn254,bn254,groth16,506052,823961
 pairing_bn254,bn254,plonk,1646819,1573151
 pairing_bw6761,bn254,groth16,1589471,2646707
 pairing_bw6761,bn254,plonk,5318762,5097941
-scalar_mul_G1_bn254,bn254,groth16,115934,175413
-scalar_mul_G1_bn254,bn254,plonk,381171,365027
-scalar_mul_G1_bn254_incomplete,bn254,groth16,55441,87984
-scalar_mul_G1_bn254_incomplete,bn254,plonk,200004,192882
+scalar_mul_G1_bn254,bn254,groth16,108168,163915
+scalar_mul_G1_bn254,bn254,plonk,355353,340385
+scalar_mul_G1_bn254_incomplete,bn254,groth16,51579,81902
+scalar_mul_G1_bn254_incomplete,bn254,plonk,185916,179316
 scalar_mul_P256,bn254,groth16,96724,151768
 scalar_mul_P256,bn254,plonk,328895,315729
 scalar_mul_P256_incomplete,bn254,groth16,75542,121798
 scalar_mul_P256_incomplete,bn254,plonk,263160,253523
-scalar_mul_secp256k1,bn254,groth16,117264,177389
-scalar_mul_secp256k1,bn254,plonk,385623,369279
-scalar_mul_secp256k1_incomplete,bn254,groth16,56125,89066
-scalar_mul_secp256k1_incomplete,bn254,plonk,202518,195302
+scalar_mul_secp256k1,bn254,groth16,108204,163975
+scalar_mul_secp256k1,bn254,plonk,355502,340530
+scalar_mul_secp256k1_incomplete,bn254,groth16,51619,81970
+scalar_mul_secp256k1_incomplete,bn254,plonk,186082,179475
 selector/binaryMux_4,bn254,groth16,5,3
 selector/binaryMux_4,bls12_377,groth16,5,3
 selector/binaryMux_4,bls12_381,groth16,5,3
diff --git a/std/algebra/emulated/sw_emulated/hints.go b/std/algebra/emulated/sw_emulated/hints.go
index b6b42cffe3..e436f992cc 100644
--- a/std/algebra/emulated/sw_emulated/hints.go
+++ b/std/algebra/emulated/sw_emulated/hints.go
@@ -6,7 +6,7 @@ import (
 	"fmt"
 	"math/big"
 
-	"github.com/consensys/gnark-crypto/algebra/eisenstein"
+	"github.com/consensys/gnark-crypto/algebra/lattice"
 	"github.com/consensys/gnark-crypto/ecc"
 	bls12381 "github.com/consensys/gnark-crypto/ecc/bls12-381"
 	bls12381_fp "github.com/consensys/gnark-crypto/ecc/bls12-381/fp"
@@ -32,8 +32,8 @@ func GetHints() []solver.Hint {
 	return []solver.Hint{
 		decomposeScalarG1,
 		scalarMulHint,
-		halfGCD,
-		halfGCDEisenstein,
+		rationalReconstruct,
+		rationalReconstructExt,
 	}
 }
 
@@ -160,7 +160,15 @@ func scalarMulHint(field *big.Int, inputs []*big.Int, outputs []*big.Int) error
 	})
 }
 
-func halfGCD(mod *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+// rationalReconstruct decomposes a scalar s ∈ Fr into (s1, |s2|, signBit) such
+// that s1 ≡ s2·s (mod r), with |s1|, |s2| < γ₂·√r ≈ 1.15·√r (proven LLL/Hermite
+// bound from gnark-crypto/algebra/lattice). Replaces the older heuristic
+// HalfGCD-based decomposition.
+//
+// In-circuit: 1 native sign bit + 2 emulated outputs (s1, |s2|). The caller
+// reconstructs the signed s2 as ±|s2| based on the sign bit and asserts
+// s1 + s·s2 ≡ 0 (mod r).
+func rationalReconstruct(mod *big.Int, inputs []*big.Int, outputs []*big.Int) error {
 	return emulated.UnwrapHintContext(mod, inputs, outputs, func(hc emulated.HintContext) error {
 		moduli := hc.EmulatedModuli()
 		if len(moduli) != 1 {
@@ -177,25 +185,38 @@ func halfGCD(mod *big.Int, inputs []*big.Int, outputs []*big.Int) error {
 		if len(emuOutputs) != 2 {
 			return fmt.Errorf("expecting two outputs, got %d", len(emuOutputs))
 		}
-		glvBasis := new(ecc.Lattice)
-		ecc.PrecomputeLattice(moduli[0], emuInputs[0], glvBasis)
-		emuOutputs[0].Set(&glvBasis.V1[0])
-		emuOutputs[1].Set(&glvBasis.V1[1])
-		// we need the absolute values for the in-circuit computations,
-		// otherwise the negative values will be reduced modulo the SNARK scalar
-		// field and not the emulated field.
-		// 		output0 = |s0| mod r
-		// 		output1 = |s1| mod r
+		// lattice.RationalReconstruct returns (x, z) with x ≡ z·s (mod r),
+		// i.e., x − z·s ≡ 0 (mod r). The circuit expects: s1 + s·_s2 ≡ 0
+		// (mod r), so s1 = x and _s2 = −z.
+		rc := lattice.NewReconstructor(moduli[0])
+		res := rc.RationalReconstruct(emuInputs[0])
+		x, z := new(big.Int).Set(res[0]), new(big.Int).Set(res[1])
+
+		// Normalise so s1 ≥ 0; flipping (x, z) preserves x ≡ z·s mod r.
+		if x.Sign() < 0 {
+			x.Neg(x)
+			z.Neg(z)
+		}
+		emuOutputs[0].Set(x)
+		emuOutputs[1].Abs(z)
+
+		// signBit = 1 iff −z < 0 iff z > 0 (so the in-circuit code negates
+		// |z| to recover s2 = −z).
 		nativeOutputs[0].SetUint64(0)
-		if emuOutputs[1].Sign() == -1 {
-			emuOutputs[1].Neg(emuOutputs[1])
-			nativeOutputs[0].SetUint64(1) // we return the sign of the second subscalar
+		if z.Sign() > 0 {
+			nativeOutputs[0].SetUint64(1)
 		}
 		return nil
 	})
 }
 
-func halfGCDEisenstein(mod *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+// rationalReconstructExt is the 4-D Eisenstein-style decomposition: given a
+// scalar s and GLV eigenvalue λ, finds (u1, u2, v1, v2) such that
+// s·(v1 + λ·v2) + u1 + λ·u2 ≡ 0 (mod r), with |u_i|, |v_i| < γ₄·r^(1/4) ≈
+// 1.25·r^(1/4) (proven LLL bound). Replaces the older Eisenstein HalfGCD.
+//
+// In-circuit: 4 native sign bits + 4 emulated absolute values.
+func rationalReconstructExt(mod *big.Int, inputs []*big.Int, outputs []*big.Int) error {
 	return emulated.UnwrapHintContext(mod, inputs, outputs, func(hc emulated.HintContext) error {
 		moduli := hc.EmulatedModuli()
 		if len(moduli) != 1 {
@@ -213,47 +234,43 @@ func halfGCDEisenstein(mod *big.Int, inputs []*big.Int, outputs []*big.Int) erro
 			return fmt.Errorf("expecting four outputs, got %d", len(emuOutputs))
 		}
 
-		glvBasis := new(ecc.Lattice)
-		ecc.PrecomputeLattice(moduli[0], emuInputs[1], glvBasis)
-		r := eisenstein.ComplexNumber{
-			A0: glvBasis.V1[0],
-			A1: glvBasis.V1[1],
-		}
-		sp := ecc.SplitScalar(emuInputs[0], glvBasis)
-		// in-circuit we check that Q - [s]P = 0 or equivalently Q + [-s]P = 0
-		// so here we return -s instead of s.
-		s := eisenstein.ComplexNumber{
-			A0: sp[0],
-			A1: sp[1],
-		}
-		s.Neg(&s)
+		// Inputs: emuInputs[0] = s, emuInputs[1] = λ.
+		// In-circuit we check Q − [s]P = 0, equivalently [−s]P + Q = 0, so we
+		// negate the scalar before reconstruction (matches the previous
+		// halfGCDEisenstein convention).
+		k := new(big.Int).Neg(emuInputs[0])
+		k.Mod(k, moduli[0])
+
+		rc := lattice.NewReconstructor(moduli[0]).SetLambda(emuInputs[1])
+		res := rc.RationalReconstructExt(k)
+		// res = (x, y, z, t) with k = (x + λ·y)/(z + λ·t) mod r,
+		// i.e., (x + λ·y) − k·(z + λ·t) ≡ 0 (mod r).
+		// Mapping onto our convention u1 + λ·u2 + s·(v1 + λ·v2) ≡ 0 with k = −s:
+		// u1 = x, u2 = y, v1 = z, v2 = t.
+		u1 := new(big.Int).Set(res[0])
+		u2 := new(big.Int).Set(res[1])
+		v1 := new(big.Int).Set(res[2])
+		v2 := new(big.Int).Set(res[3])
+
+		emuOutputs[0].Abs(u1)
+		emuOutputs[1].Abs(u2)
+		emuOutputs[2].Abs(v1)
+		emuOutputs[3].Abs(v2)
 
-		res := eisenstein.HalfGCD(&r, &s)
-		// values
-		emuOutputs[0].Set(&res[0].A0)
-		emuOutputs[1].Set(&res[0].A1)
-		emuOutputs[2].Set(&res[1].A0)
-		emuOutputs[3].Set(&res[1].A1)
-		// signs
 		nativeOutputs[0].SetUint64(0)
 		nativeOutputs[1].SetUint64(0)
 		nativeOutputs[2].SetUint64(0)
 		nativeOutputs[3].SetUint64(0)
-
-		if res[0].A0.Sign() == -1 {
-			emuOutputs[0].Neg(emuOutputs[0])
+		if u1.Sign() < 0 {
 			nativeOutputs[0].SetUint64(1)
 		}
-		if res[0].A1.Sign() == -1 {
-			emuOutputs[1].Neg(emuOutputs[1])
+		if u2.Sign() < 0 {
 			nativeOutputs[1].SetUint64(1)
 		}
-		if res[1].A0.Sign() == -1 {
-			emuOutputs[2].Neg(emuOutputs[2])
+		if v1.Sign() < 0 {
 			nativeOutputs[2].SetUint64(1)
 		}
-		if res[1].A1.Sign() == -1 {
-			emuOutputs[3].Neg(emuOutputs[3])
+		if v2.Sign() < 0 {
 			nativeOutputs[3].SetUint64(1)
 		}
 		return nil
diff --git a/std/algebra/emulated/sw_emulated/point.go b/std/algebra/emulated/sw_emulated/point.go
index 27793b0826..0404d1a385 100644
--- a/std/algebra/emulated/sw_emulated/point.go
+++ b/std/algebra/emulated/sw_emulated/point.go
@@ -1372,9 +1372,9 @@ func (c *Curve[B, S]) scalarMulFakeGLV(Q *AffinePoint[B], s *emulated.Element[S]
 
 	// First we find the sub-salars s1, s2 s.t. s1 + s2*s = 0 mod r and s1, s2 < sqrt(r).
 	// we also output the sign in case s2 is negative. In that case we compute _s2 = -s2 mod r.
-	sign, sd, err := c.scalarApi.NewHintGeneric(halfGCD, 1, 2, nil, []*emulated.Element[S]{_s})
+	sign, sd, err := c.scalarApi.NewHintGeneric(rationalReconstruct, 1, 2, nil, []*emulated.Element[S]{_s})
 	if err != nil {
-		panic(fmt.Sprintf("halfGCD hint: %v", err))
+		panic(fmt.Sprintf("rationalReconstruct hint: %v", err))
 	}
 	s1, s2 := sd[0], sd[1]
 	_s2 := c.scalarApi.Select(sign[0], c.scalarApi.Neg(s2), s2)
@@ -1676,9 +1676,9 @@ func (c *Curve[B, S]) scalarMulGLVAndFakeGLV(P *AffinePoint[B], s *emulated.Elem
 	// Eisenstein integers real and imaginary parts can be negative. So we
 	// return the absolute value in the hint and negate the corresponding
 	// points here when needed.
-	signs, sd, err := c.scalarApi.NewHintGeneric(halfGCDEisenstein, 4, 4, nil, []*emulated.Element[S]{_s, c.eigenvalue})
+	signs, sd, err := c.scalarApi.NewHintGeneric(rationalReconstructExt, 4, 4, nil, []*emulated.Element[S]{_s, c.eigenvalue})
 	if err != nil {
-		panic(fmt.Sprintf("halfGCDEisenstein hint: %v", err))
+		panic(fmt.Sprintf("rationalReconstructExt hint: %v", err))
 	}
 	u1, u2, v1, v2 := sd[0], sd[1], sd[2], sd[3]
 	isNegu1, isNegu2, isNegv1, isNegv2 := signs[0], signs[1], signs[2], signs[3]
@@ -1800,10 +1800,10 @@ func (c *Curve[B, S]) scalarMulGLVAndFakeGLV(P *AffinePoint[B], s *emulated.Elem
 	g := c.Generator()
 	Acc = addFn(Acc, g)
 
-	// u1, u2, v1, v2 < r^{1/4} (up to a constant factor).
-	// We prove that the factor is log_(3/sqrt(3)))(r).
-	// so we need to add 9 bits to r^{1/4}.nbits().
-	nbits := st.Modulus().BitLen()>>2 + 9
+	// LLL Hermite bound (gnark-crypto/algebra/lattice): u1, u2, v1, v2 are
+	// bounded by γ₄·r^(1/4) ≈ 1.25·r^(1/4), which fits in (BitLen+3)/4 + 2 bits.
+	// This is tighter than the previous heuristic BitLen/4 + 9 (saves ~7 iters).
+	nbits := (st.Modulus().BitLen()+3)/4 + 2
 	u1bits := c.scalarApi.ToBits(u1)
 	u2bits := c.scalarApi.ToBits(u2)
 	v1bits := c.scalarApi.ToBits(v1)
diff --git a/std/algebra/emulated/sw_emulated/point_test.go b/std/algebra/emulated/sw_emulated/point_test.go
index 0e20db6d74..0949c86b73 100644
--- a/std/algebra/emulated/sw_emulated/point_test.go
+++ b/std/algebra/emulated/sw_emulated/point_test.go
@@ -2601,7 +2601,7 @@ func TestScalarMulGLVAndFakeGLVEdgeCasesEdgeCases2(t *testing.T) {
 }
 
 // This is a regression for the missing complete-formula handling in
-// scalarMulGLVAndFakeGLV. For secp256k1 and s=2, the halfGCDEisenstein
+// scalarMulGLVAndFakeGLV. For secp256k1 and s=2, the rationalReconstructExt
 // decomposition yields signs corresponding to
 //
 //	b1 = -P + Q + Phi(P) + Phi(Q).
@@ -2664,9 +2664,9 @@ func TestScalarMulGLVAndFakeGLVCompletePrecomputeCollisionFails(t *testing.T) {
 	assert.NoError(err)
 }
 
-// zeroHalfGCDEisenstein replaces the honest halfGCDEisenstein hint with one
+// zeroRationalReconstructExt replaces the honest rationalReconstructExt hint with one
 // returning the all-zeros decomposition. Used by the regression below.
-func zeroHalfGCDEisenstein(_ *big.Int, _, outputs []*big.Int) error {
+func zeroRationalReconstructExt(_ *big.Int, _, outputs []*big.Int) error {
 	for i := range outputs {
 		outputs[i].SetUint64(0)
 	}
@@ -2674,7 +2674,7 @@ func zeroHalfGCDEisenstein(_ *big.Int, _, outputs []*big.Int) error {
 }
 
 // TestScalarMulGLVAndFakeGLV_TrivialDecompositionRegression: regression for a
-// soundness issue in scalarMulGLVAndFakeGLV. A malicious halfGCDEisenstein
+// soundness issue in scalarMulGLVAndFakeGLV. A malicious rationalReconstructExt
 // hint returning the trivial all-zeros decomposition (u1=u2=v1=v2=0) makes
 // the relation s·(v1 + λ·v2) + u1 + λ·u2 = 0 vacuous and lets the
 // scalar-mul hint output be any point. The fix asserts NOT (v1=0 AND v2=0).
@@ -2704,7 +2704,7 @@ func TestScalarMulGLVAndFakeGLV_TrivialDecompositionRegression(t *testing.T) {
 
 	// malicious all-zeros Eisenstein decomposition must be rejected
 	err := test.IsSolved(&circuit, &witness, testCurve.ScalarField(),
-		test.WithReplacementHint(solver.GetHintID(halfGCDEisenstein), zeroHalfGCDEisenstein),
+		test.WithReplacementHint(solver.GetHintID(rationalReconstructExt), zeroRationalReconstructExt),
 	)
 	if err == nil {
 		t.Fatal("malicious all-zeros Eisenstein decomposition was accepted — soundness break")
diff --git a/std/algebra/native/sw_bls12377/g1.go b/std/algebra/native/sw_bls12377/g1.go
index 4cdb944d9d..5c4722091f 100644
--- a/std/algebra/native/sw_bls12377/g1.go
+++ b/std/algebra/native/sw_bls12377/g1.go
@@ -651,9 +651,9 @@ func (p *G1Affine) scalarMulGLVAndFakeGLV(api frontend.API, P G1Affine, s fronte
 	// Eisenstein integers real and imaginary parts can be negative. So we
 	// return the absolute value in the hint and negate the corresponding
 	// points here when needed.
-	sd, err := api.NewHint(halfGCDEisenstein, 10, _s, cc.lambda)
+	sd, err := api.NewHint(rationalReconstructExt, 10, _s, cc.lambda)
 	if err != nil {
-		panic(fmt.Sprintf("halfGCDEisenstein hint: %v", err))
+		panic(fmt.Sprintf("rationalReconstructExt hint: %v", err))
 	}
 	u1, u2, v1, v2, q := sd[0], sd[1], sd[2], sd[3], sd[4]
 	isNegu1, isNegu2, isNegv1, isNegv2, isNegq := sd[5], sd[6], sd[7], sd[8], sd[9]
@@ -775,10 +775,10 @@ func (p *G1Affine) scalarMulGLVAndFakeGLV(api frontend.API, P G1Affine, s fronte
 	H := G1Affine{X: 0, Y: 1}
 	Acc.AddAssign(api, H)
 
-	// u1, u2, v1, v2 < r^{1/4} (up to a constant factor).
-	// We prove that the factor is log_(3/sqrt(3)))(r).
-	// so we need to add 9 bits to r^{1/4}.nbits().
-	nbits := cc.lambda.BitLen()>>1 + 9 // 72
+	// LLL Hermite bound (gnark-crypto/algebra/lattice): u1, u2, v1, v2 are
+	// bounded by γ₄·r^(1/4) ≈ 1.25·r^(1/4), which fits in (r.BitLen()+3)/4 + 2
+	// bits. Tighter than the previous heuristic lambda.BitLen()/2 + 9 (saves ~6 iters).
+	nbits := (cc.fr.BitLen()+3)/4 + 2 // 66 for BLS12-377
 	u1bits := api.ToBinary(u1, nbits)
 	u2bits := api.ToBinary(u2, nbits)
 	v1bits := api.ToBinary(v1, nbits)
diff --git a/std/algebra/native/sw_bls12377/g1_eisenstein_test.go b/std/algebra/native/sw_bls12377/g1_eisenstein_test.go
index 36f7b96550..10f995af01 100644
--- a/std/algebra/native/sw_bls12377/g1_eisenstein_test.go
+++ b/std/algebra/native/sw_bls12377/g1_eisenstein_test.go
@@ -28,11 +28,11 @@ func (c *scalarMulGLVAndFakeGLVTrivialDecompCircuit) Define(api frontend.API) er
 	return nil
 }
 
-// zeroHalfGCDEisenstein replaces the honest halfGCDEisenstein hint with one
+// zeroRationalReconstructExt replaces the honest rationalReconstructExt hint with one
 // that returns the all-zeros decomposition (u1 = u2 = v1 = v2 = q = 0). The
 // signs are also zero (positive). This is the malicious-hint shape the
 // soundness fix protects against.
-func zeroHalfGCDEisenstein(_ *big.Int, inputs, outputs []*big.Int) error {
+func zeroRationalReconstructExt(_ *big.Int, inputs, outputs []*big.Int) error {
 	if len(inputs) != 2 {
 		return errors.New("expecting two inputs")
 	}
@@ -46,7 +46,7 @@ func zeroHalfGCDEisenstein(_ *big.Int, inputs, outputs []*big.Int) error {
 }
 
 // TestScalarMulGLVAndFakeGLV_TrivialDecompositionRegression: regression for a
-// soundness issue in scalarMulGLVAndFakeGLV. A malicious halfGCDEisenstein
+// soundness issue in scalarMulGLVAndFakeGLV. A malicious rationalReconstructExt
 // hint returning the trivial all-zeros decomposition (u1=u2=v1=v2=q=0) makes
 // the relation s·(v1 + λ·v2) + u1 + λ·u2 - r·q = 0 vacuous and lets the
 // scalar-mul hint output be any point. The fix asserts NOT (v1=0 AND v2=0).
@@ -73,7 +73,7 @@ func TestScalarMulGLVAndFakeGLV_TrivialDecompositionRegression(t *testing.T) {
 		&scalarMulGLVAndFakeGLVTrivialDecompCircuit{},
 		&witness,
 		ecc.BW6_761.ScalarField(),
-		test.WithReplacementHint(solver.GetHintID(halfGCDEisenstein), zeroHalfGCDEisenstein),
+		test.WithReplacementHint(solver.GetHintID(rationalReconstructExt), zeroRationalReconstructExt),
 	)
 	assert.Error(err, "trivial all-zeros Eisenstein decomposition was accepted — soundness break")
 }
diff --git a/std/algebra/native/sw_bls12377/hints.go b/std/algebra/native/sw_bls12377/hints.go
index af1acc761d..3302be200b 100644
--- a/std/algebra/native/sw_bls12377/hints.go
+++ b/std/algebra/native/sw_bls12377/hints.go
@@ -4,7 +4,7 @@ import (
 	"errors"
 	"math/big"
 
-	"github.com/consensys/gnark-crypto/algebra/eisenstein"
+	"github.com/consensys/gnark-crypto/algebra/lattice"
 	"github.com/consensys/gnark-crypto/ecc"
 	bls12377 "github.com/consensys/gnark-crypto/ecc/bls12-377"
 	"github.com/consensys/gnark/constraint/solver"
@@ -16,7 +16,7 @@ func GetHints() []solver.Hint {
 		decomposeScalarG1Simple,
 		decomposeScalarG2,
 		scalarMulGLVG1Hint,
-		halfGCDEisenstein,
+		rationalReconstructExt,
 		pairingCheckHint,
 		pairingCheckTorusHint,
 	}
@@ -306,68 +306,76 @@ func scalarMulGLVG1Hint(scalarField *big.Int, inputs []*big.Int, outputs []*big.
 	return nil
 }
 
-func halfGCDEisenstein(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+// rationalReconstructExt is the 4-D Eisenstein-style scalar decomposition for
+// BLS12-377 G1's GLV+FakeGLV scalar mul, backed by LLL-based lattice rational
+// reconstruction with proven Hermite bound |u_i|, |v_i| < γ₄·r^(1/4) ≈
+// 1.25·r^(1/4). Replaces the older Eisenstein HalfGCD.
+//
+// Inputs: [s, λ] (scalar and GLV eigenvalue, both bounded by inner curve order).
+// Outputs: [|u1|, |u2|, |v1|, |v2|, |q|, sign(u1), sign(u2), sign(v1), sign(v2), sign(q)] (10).
+//
+// The relation (in signed integers) is
+//
+//	s·(v1 + λ·v2) + u1 + λ·u2 = q·r
+//
+// where r is the inner curve order. The in-circuit check at sw_bls12377/g1.go::
+// scalarMulGLVAndFakeGLV verifies this in the outer SNARK scalar field.
+func rationalReconstructExt(scalarField *big.Int, inputs []*big.Int, outputs []*big.Int) error {
 	if len(inputs) != 2 {
-		return errors.New("expecting two input")
+		return errors.New("expecting two inputs (s, λ)")
 	}
 	if len(outputs) != 10 {
-		return errors.New("expecting ten outputs")
+		return errors.New("expecting ten outputs (4 abs values + 1 |q| + 5 sign bits)")
 	}
 	cc := getInnerCurveConfig(scalarField)
-	glvBasis := new(ecc.Lattice)
-	ecc.PrecomputeLattice(cc.fr, inputs[1], glvBasis)
-	r := eisenstein.ComplexNumber{
-		A0: glvBasis.V1[0],
-		A1: glvBasis.V1[1],
-	}
-	sp := ecc.SplitScalar(inputs[0], glvBasis)
-	// in-circuit we check that Q - [s]P = 0 or equivalently Q + [-s]P = 0
-	// so here we return -s instead of s.
-	s := eisenstein.ComplexNumber{
-		A0: sp[0],
-		A1: sp[1],
+
+	// In-circuit we check Q − [s]P = 0, equivalently Q + [−s]P = 0, so we
+	// negate the scalar before reconstruction (matches the previous convention).
+	k := new(big.Int).Neg(inputs[0])
+	k.Mod(k, cc.fr)
+
+	rc := lattice.NewReconstructor(cc.fr).SetLambda(inputs[1])
+	res := rc.RationalReconstructExt(k)
+	// res = (x, y, z, t) with k = (x + λ·y)/(z + λ·t) mod r,
+	// i.e., (x + λ·y) − k·(z + λ·t) ≡ 0 (mod r). With k = −s mod r this gives
+	// (x + λ·y) + s·(z + λ·t) ≡ 0 (mod r). Mapping: u1 = x, u2 = y, v1 = z, v2 = t.
+	u1 := new(big.Int).Set(res[0])
+	u2 := new(big.Int).Set(res[1])
+	v1 := new(big.Int).Set(res[2])
+	v2 := new(big.Int).Set(res[3])
+
+	// q = (s·(v1 + λ·v2) + u1 + λ·u2) / r computed in signed integers.
+	q := new(big.Int).Mul(v2, inputs[1])
+	q.Add(q, v1)
+	q.Mul(q, inputs[0])
+	tmp := new(big.Int).Mul(u2, inputs[1])
+	q.Add(q, tmp)
+	q.Add(q, u1)
+	q.Quo(q, cc.fr)
+
+	outputs[0].Abs(u1)
+	outputs[1].Abs(u2)
+	outputs[2].Abs(v1)
+	outputs[3].Abs(v2)
+	outputs[4].Abs(q)
+
+	for i := 5; i <= 9; i++ {
+		outputs[i].SetUint64(0)
 	}
-	s.Neg(&s)
-	res := eisenstein.HalfGCD(&r, &s)
-	outputs[0].Set(&res[0].A0)
-	outputs[1].Set(&res[0].A1)
-	outputs[2].Set(&res[1].A0)
-	outputs[3].Set(&res[1].A1)
-	outputs[4].Mul(&res[1].A1, inputs[1]).
-		Add(outputs[4], &res[1].A0).
-		Mul(outputs[4], inputs[0]).
-		Add(outputs[4], &res[0].A0)
-	s.A0.Mul(&res[0].A1, inputs[1])
-	outputs[4].Add(outputs[4], &s.A0).
-		Div(outputs[4], cc.fr)
-
-	// set the signs
-	outputs[5].SetUint64(0)
-	outputs[6].SetUint64(0)
-	outputs[7].SetUint64(0)
-	outputs[8].SetUint64(0)
-	outputs[9].SetUint64(0)
-
-	if outputs[0].Sign() == -1 {
-		outputs[0].Neg(outputs[0])
+	if u1.Sign() < 0 {
 		outputs[5].SetUint64(1)
 	}
-	if outputs[1].Sign() == -1 {
-		outputs[1].Neg(outputs[1])
+	if u2.Sign() < 0 {
 		outputs[6].SetUint64(1)
 	}
-	if outputs[2].Sign() == -1 {
-		outputs[2].Neg(outputs[2])
+	if v1.Sign() < 0 {
 		outputs[7].SetUint64(1)
 	}
-	if outputs[3].Sign() == -1 {
-		outputs[3].Neg(outputs[3])
+	if v2.Sign() < 0 {
 		outputs[8].SetUint64(1)
 	}
-	if outputs[4].Sign() == -1 {
-		outputs[4].Neg(outputs[4])
+	if q.Sign() < 0 {
 		outputs[9].SetUint64(1)
 	}
-
 	return nil
 }
diff --git a/std/algebra/native/twistededwards/curve_test.go b/std/algebra/native/twistededwards/curve_test.go
index b7715e5c95..5afcc9a727 100644
--- a/std/algebra/native/twistededwards/curve_test.go
+++ b/std/algebra/native/twistededwards/curve_test.go
@@ -626,7 +626,7 @@ func (c *scalarMulFakeGLVRegressionCircuit) Define(api frontend.API) error {
 	return nil
 }
 
-func zeroHalfGCDHint(_ *big.Int, inputs, outputs []*big.Int) error {
+func zeroRationalReconstructHint(_ *big.Int, inputs, outputs []*big.Int) error {
 	if len(inputs) != 2 {
 		return errors.New("expecting two inputs")
 	}
@@ -640,8 +640,9 @@ func zeroHalfGCDHint(_ *big.Int, inputs, outputs []*big.Int) error {
 }
 
 // This is a regression for a soundness issue in scalarMulFakeGLV. A malicious
-// halfGCD hint can return the trivial decomposition s1=s2=0, which makes the
-// internal accumulator check vacuous and lets any scalar-mul hint output pass.
+// rationalReconstruct hint can return the trivial decomposition s1=s2=0,
+// which makes the internal accumulator check vacuous and lets any scalar-mul
+// hint output pass.
 func TestScalarMulFakeGLVRegressionTrivialDecomposition(t *testing.T) {
 	assert := require.New(t)
 
@@ -654,7 +655,7 @@ func TestScalarMulFakeGLVRegressionTrivialDecomposition(t *testing.T) {
 		&scalarMulFakeGLVRegressionCircuit{},
 		&witness,
 		ecc.BN254.ScalarField(),
-		test.WithReplacementHint(solver.GetHintID(halfGCD), zeroHalfGCDHint),
+		test.WithReplacementHint(solver.GetHintID(rationalReconstruct), zeroRationalReconstructHint),
 	)
 	assert.Error(err)
 }
diff --git a/std/algebra/native/twistededwards/hints.go b/std/algebra/native/twistededwards/hints.go
index b83564f213..0a52dba254 100644
--- a/std/algebra/native/twistededwards/hints.go
+++ b/std/algebra/native/twistededwards/hints.go
@@ -5,6 +5,7 @@ import (
 	"math/big"
 	"sync"
 
+	"github.com/consensys/gnark-crypto/algebra/lattice"
 	"github.com/consensys/gnark-crypto/ecc"
 	edbls12377 "github.com/consensys/gnark-crypto/ecc/bls12-377/twistededwards"
 	"github.com/consensys/gnark-crypto/ecc/bls12-381/bandersnatch"
@@ -16,7 +17,7 @@ import (
 
 func GetHints() []solver.Hint {
 	return []solver.Hint{
-		halfGCD,
+		rationalReconstruct,
 		scalarMulHint,
 		decomposeScalar,
 	}
@@ -63,40 +64,55 @@ func decomposeScalar(scalarField *big.Int, inputs []*big.Int, res []*big.Int) er
 	return nil
 }
 
-func halfGCD(mod *big.Int, inputs, outputs []*big.Int) error {
+// rationalReconstruct decomposes a scalar s ∈ Fr into (s1, s2, signBit, k) such
+// that s1 + s2·s = k·r in the integers, with |s1|, |s2| < γ₂·√r ≈ 1.15·√r
+// (proven LLL/Hermite bound). Replaces the older heuristic-bound HalfGCD.
+//
+// The bit-decomposition convention: s1 ≥ 0 always, s2 = ±|s2| with signBit = 1
+// iff the underlying signed s2 was negative. The integer k is signed.
+func rationalReconstruct(_ *big.Int, inputs, outputs []*big.Int) error {
 	if len(inputs) != 2 {
-		return errors.New("expecting two inputs")
+		return errors.New("expecting two inputs (s, r)")
 	}
 	if len(outputs) != 4 {
-		return errors.New("expecting four outputs")
+		return errors.New("expecting four outputs (s1, |s2|, signBit, k)")
 	}
-	// using PrecomputeLattice for scalar decomposition is a hack and it doesn't
-	// work in case the scalar is zero. override it for now to avoid division by
-	// zero until a long-term solution is found.
+	// Zero scalar: trivial (s1=s2=k=0). The in-circuit IsZero(s2)=0 guard
+	// rejects this; the caller must pre-route scalar=1 (mirrors the existing
+	// scalarMulFakeGLV: checkedScalar = Select(isScalarZero, 1, scalar)).
 	if inputs[0].Sign() == 0 {
-		outputs[0].SetUint64(0)
-		outputs[1].SetUint64(0)
-		outputs[2].SetUint64(0)
-		outputs[3].SetUint64(0)
+		for i := range outputs {
+			outputs[i].SetUint64(0)
+		}
 		return nil
 	}
-	glvBasis := new(ecc.Lattice)
-	ecc.PrecomputeLattice(inputs[1], inputs[0], glvBasis)
-	outputs[0].Set(&glvBasis.V1[0])
-	outputs[1].Set(&glvBasis.V1[1])
 
-	// figure out how many times we have overflowed
-	// s2 * s + s1 = k*r
-	outputs[3].Mul(outputs[1], inputs[0]).
-		Add(outputs[3], outputs[0]).
-		Div(outputs[3], inputs[1])
+	// lattice.RationalReconstruct returns (x, z) with x ≡ z·s mod r,
+	// so x − z·s = m·r for some signed integer m, with |x|, |z| < γ₂·√r.
+	// Map onto our convention: s1 + s2·s = k·r ⇒ s1 = x, s2 = −z, k = m.
+	res := lattice.RationalReconstruct(inputs[0], inputs[1])
+	x, z := new(big.Int).Set(res[0]), new(big.Int).Set(res[1])
+
+	// Normalise so s1 ≥ 0. Flipping signs of (x, z) preserves x − z·s = m·r
+	// (with m negated).
+	if x.Sign() < 0 {
+		x.Neg(x)
+		z.Neg(z)
+	}
+	outputs[0].Set(x) // s1 = x ≥ 0
+
+	// k = (x − z·s) / r computed in signed integers.
+	k := new(big.Int).Mul(z, inputs[0])
+	k.Sub(x, k)
+	k.Quo(k, inputs[1])
+	outputs[3].Set(k)
 
+	// s2 = −z, encoded as |s2| + signBit. signBit = 1 iff −z < 0 iff z > 0.
+	outputs[1].Abs(z)
 	outputs[2].SetUint64(0)
-	if outputs[1].Sign() == -1 {
-		outputs[1].Neg(outputs[1])
+	if z.Sign() > 0 {
 		outputs[2].SetUint64(1)
 	}
-
 	return nil
 }
 
diff --git a/std/algebra/native/twistededwards/point.go b/std/algebra/native/twistededwards/point.go
index de8f1b8967..1c93fe2650 100644
--- a/std/algebra/native/twistededwards/point.go
+++ b/std/algebra/native/twistededwards/point.go
@@ -131,8 +131,10 @@ func (p *Point) scalarMulFakeGLV(api frontend.API, p1 *Point, scalar frontend.Va
 	checkedScalar := api.Select(isScalarZero, 1, scalar)
 
 	// the hints allow to decompose the scalar s into s1 and s2 such that
-	// s1 + s * s2 == 0 mod Order,
-	s, err := api.NewHint(halfGCD, 4, checkedScalar, curve.Order)
+	// s1 + s * s2 == 0 mod Order. Uses LLL-based lattice rational
+	// reconstruction with proven Hermite bound |s1|, |s2| < γ₂·√r ≈ 1.15·√r
+	// (see [EEMP25] / gnark-crypto/algebra/lattice).
+	s, err := api.NewHint(rationalReconstruct, 4, checkedScalar, curve.Order)
 	if err != nil {
 		// err is non-nil only for invalid number of inputs
 		panic(err)

From ee6aeaec68af0046fa22d8f1a6da4ddc872e8380 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.elhousni@consensys.net>
Date: Wed, 6 May 2026 15:59:47 -0400
Subject: [PATCH 2/8] perf: G2 GLV+FakeGLV scalar multiplication for
 BN254/BLS12-381/BW6-761

---
 internal/stats/latest_stats.csv             |   6 +
 internal/stats/snippet.go                   |  83 ++++
 std/algebra/emulated/sw_bls12381/g2.go      | 399 ++++++++++++--------
 std/algebra/emulated/sw_bls12381/g2_test.go |   2 +-
 std/algebra/emulated/sw_bls12381/hints.go   |  94 +++++
 std/algebra/emulated/sw_bn254/g2.go         | 399 +++++++++++++++++++-
 std/algebra/emulated/sw_bn254/hints.go      |  95 +++++
 std/algebra/emulated/sw_bw6761/g2.go        | 331 +++++++++++++++-
 std/algebra/emulated/sw_bw6761/hints.go     | 136 +++++++
 9 files changed, 1377 insertions(+), 168 deletions(-)

diff --git a/internal/stats/latest_stats.csv b/internal/stats/latest_stats.csv
index f30dd48cae..8bbce37c15 100644
--- a/internal/stats/latest_stats.csv
+++ b/internal/stats/latest_stats.csv
@@ -99,6 +99,12 @@ scalar_mul_G1_bn254,bn254,groth16,108168,163915
 scalar_mul_G1_bn254,bn254,plonk,355353,340385
 scalar_mul_G1_bn254_incomplete,bn254,groth16,51579,81902
 scalar_mul_G1_bn254_incomplete,bn254,plonk,185916,179316
+scalar_mul_G2_bls12381,bn254,groth16,138129,216643
+scalar_mul_G2_bls12381,bn254,plonk,481714,463233
+scalar_mul_G2_bn254,bn254,groth16,102279,158428
+scalar_mul_G2_bn254,bn254,plonk,349799,336420
+scalar_mul_G2_bw6761,bn254,groth16,191105,296444
+scalar_mul_G2_bw6761,bn254,plonk,659865,637514
 scalar_mul_P256,bn254,groth16,96724,151768
 scalar_mul_P256,bn254,plonk,328895,315729
 scalar_mul_P256_incomplete,bn254,groth16,75542,121798
diff --git a/internal/stats/snippet.go b/internal/stats/snippet.go
index 6a4182a69d..9534fee643 100644
--- a/internal/stats/snippet.go
+++ b/internal/stats/snippet.go
@@ -412,6 +412,89 @@ func initSnippets() {
 
 	}, ecc.BN254)
 
+	// G2 scalar mul snippets — exercise the GLV+FakeGLV path to track its cost.
+	registerSnippet("scalar_mul_G2_bls12381", func(api frontend.API, newVariable func() frontend.Variable) {
+		bls12381fp, _ := emulated.NewField[emulated.BLS12381Fp](api)
+		newFp := func() *emulated.Element[emulated.BLS12381Fp] {
+			nbLimbs, _ := emulated.GetEffectiveFieldParams[emulated.BLS12381Fp](api.Compiler().Field())
+			limbs := make([]frontend.Variable, nbLimbs)
+			for i := range limbs {
+				limbs[i] = newVariable()
+			}
+			return bls12381fp.NewElement(limbs)
+		}
+		bls12381fr, _ := emulated.NewField[emulated.BLS12381Fr](api)
+		newFr := func() *emulated.Element[emulated.BLS12381Fr] {
+			nbLimbs, _ := emulated.GetEffectiveFieldParams[emulated.BLS12381Fr](api.Compiler().Field())
+			limbs := make([]frontend.Variable, nbLimbs)
+			for i := range limbs {
+				limbs[i] = newVariable()
+			}
+			return bls12381fr.NewElement(limbs)
+		}
+		g2, _ := sw_bls12381.NewG2(api)
+		var dummyQ sw_bls12381.G2Affine
+		dummyQ.P.X.A0 = *newFp()
+		dummyQ.P.X.A1 = *newFp()
+		dummyQ.P.Y.A0 = *newFp()
+		dummyQ.P.Y.A1 = *newFp()
+		_ = g2.ScalarMul(&dummyQ, newFr())
+	}, ecc.BN254)
+
+	registerSnippet("scalar_mul_G2_bn254", func(api frontend.API, newVariable func() frontend.Variable) {
+		bn254fp, _ := emulated.NewField[emulated.BN254Fp](api)
+		newFp := func() *emulated.Element[emulated.BN254Fp] {
+			nbLimbs, _ := emulated.GetEffectiveFieldParams[emulated.BN254Fp](api.Compiler().Field())
+			limbs := make([]frontend.Variable, nbLimbs)
+			for i := range limbs {
+				limbs[i] = newVariable()
+			}
+			return bn254fp.NewElement(limbs)
+		}
+		bn254fr, _ := emulated.NewField[emulated.BN254Fr](api)
+		newFr := func() *emulated.Element[emulated.BN254Fr] {
+			nbLimbs, _ := emulated.GetEffectiveFieldParams[emulated.BN254Fr](api.Compiler().Field())
+			limbs := make([]frontend.Variable, nbLimbs)
+			for i := range limbs {
+				limbs[i] = newVariable()
+			}
+			return bn254fr.NewElement(limbs)
+		}
+		g2, _ := sw_bn254.NewG2(api)
+		var dummyQ sw_bn254.G2Affine
+		dummyQ.P.X.A0 = *newFp()
+		dummyQ.P.X.A1 = *newFp()
+		dummyQ.P.Y.A0 = *newFp()
+		dummyQ.P.Y.A1 = *newFp()
+		_ = g2.ScalarMul(&dummyQ, newFr())
+	}, ecc.BN254)
+
+	registerSnippet("scalar_mul_G2_bw6761", func(api frontend.API, newVariable func() frontend.Variable) {
+		bw6fp, _ := emulated.NewField[emulated.BW6761Fp](api)
+		newFp := func() *emulated.Element[emulated.BW6761Fp] {
+			nbLimbs, _ := emulated.GetEffectiveFieldParams[emulated.BW6761Fp](api.Compiler().Field())
+			limbs := make([]frontend.Variable, nbLimbs)
+			for i := range limbs {
+				limbs[i] = newVariable()
+			}
+			return bw6fp.NewElement(limbs)
+		}
+		bw6fr, _ := emulated.NewField[emulated.BW6761Fr](api)
+		newFr := func() *emulated.Element[emulated.BW6761Fr] {
+			nbLimbs, _ := emulated.GetEffectiveFieldParams[emulated.BW6761Fr](api.Compiler().Field())
+			limbs := make([]frontend.Variable, nbLimbs)
+			for i := range limbs {
+				limbs[i] = newVariable()
+			}
+			return bw6fr.NewElement(limbs)
+		}
+		g2, _ := sw_bw6761.NewG2(api)
+		var dummyQ sw_bw6761.G2Affine
+		dummyQ.P.X = *newFp()
+		dummyQ.P.Y = *newFp()
+		_ = g2.ScalarMul(&dummyQ, newFr())
+	}, ecc.BN254)
+
 	registerSnippet("selector/mux_3", func(api frontend.API, newVariable func() frontend.Variable) {
 		selector.Mux(api, newVariable(), newVariable(), newVariable(), newVariable())
 	})
diff --git a/std/algebra/emulated/sw_bls12381/g2.go b/std/algebra/emulated/sw_bls12381/g2.go
index 1b75c41bbd..742ce085b2 100644
--- a/std/algebra/emulated/sw_bls12381/g2.go
+++ b/std/algebra/emulated/sw_bls12381/g2.go
@@ -26,6 +26,10 @@ type G2 struct {
 	// SSWU map coefficients
 	sswuCoeffA, sswuCoeffB *fields_bls12381.E2
 	sswuZ                  *fields_bls12381.E2
+
+	// Precomputed constants for the GLV+FakeGLV scalar mul ([EEMP25] §3.3).
+	g2Gen      *g2AffP // G2 generator
+	g2GenNbits *g2AffP // [2^(nbits-1)]G2 with nbits = (r.BitLen()+3)/4 + 2
 }
 
 type g2AffP struct {
@@ -82,6 +86,30 @@ func NewG2(api frontend.API) (*G2, error) {
 		A0: *fp.NewElement(sswuZ.A0),
 		A1: *fp.NewElement(sswuZ.A1),
 	}
+
+	// Precomputed G2 generator for GLV+FakeGLV.
+	g2Gen := &g2AffP{
+		X: fields_bls12381.E2{
+			A0: *fp.NewElement("352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160"),
+			A1: *fp.NewElement("3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758"),
+		},
+		Y: fields_bls12381.E2{
+			A0: *fp.NewElement("1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905"),
+			A1: *fp.NewElement("927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582"),
+		},
+	}
+	// [2^(nbits-1)]G2 where nbits = (255+3)/4 + 2 = 66, so this is [2^65]G2.
+	g2GenNbits := &g2AffP{
+		X: fields_bls12381.E2{
+			A0: *fp.NewElement("1307001654908388153254394944417118155033503188409787277795273489312551176370209873126740711463572657296916966732684"),
+			A1: *fp.NewElement("1066804690119577865989830850277879393407029322116864061755683314318400220056817483617033672656485029228353937929571"),
+		},
+		Y: fields_bls12381.E2{
+			A0: *fp.NewElement("1233864651366532660795929818904272589705597977637697925481983092108793193162343169655985724823869788077854535468808"),
+			A1: *fp.NewElement("2703972434797875065063829955607449483769333186572810763171217085444622779819503421195150761462859837038921185079043"),
+		},
+	}
+
 	return &G2{
 		api:        api,
 		fp:         fp,
@@ -96,6 +124,9 @@ func NewG2(api frontend.API) (*G2, error) {
 		sswuCoeffA: coeffA,
 		sswuCoeffB: coeffB,
 		sswuZ:      z,
+		// GLV+FakeGLV precomputed values
+		g2Gen:      g2Gen,
+		g2GenNbits: g2GenNbits,
 	}, nil
 }
 
@@ -607,204 +638,254 @@ func (g2 *G2) scalarMulGeneric(p *G2Affine, s *Scalar, opts ...algopts.AlgebraOp
 	return R0
 }
 
-// ScalarMul computes [s]Q using an efficient endomorphism and returns it. It doesn't modify Q nor s.
-// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C).
+// ScalarMul computes [s]Q using GLV+FakeGLV with proven r^(1/4) sub-scalar
+// bounds (LLL Hermite). Routes through scalarMulGLVAndFakeGLV.
 //
 // This method is complete by default.
 // [algopts.WithIncompleteArithmetic] is deprecated here and ignored.
 // (0,0) is not on the curve but we conventionally take it as the
 // neutral/infinity point as per the [EVM].
 //
-// [Halo]: https://eprint.iacr.org/2019/1021.pdf
 // [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
+//
+// [EEMP25]: https://eprint.iacr.org/2025/933
 func (g2 *G2) ScalarMul(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
-	return g2.scalarMulGLV(Q, s, opts...)
+	return g2.scalarMulGLVAndFakeGLV(Q, s, opts...)
 }
 
-// scalarMulGLV computes [s]Q using an efficient endomorphism and returns it. It doesn't modify Q nor s.
-// It implements an optimized version based on algorithm 1 of [Halo] (see Section 6.2 and appendix C).
+// scalarMulGLVAndFakeGLV computes [s]Q using GLV+FakeGLV with r^(1/4) bounds.
+// It implements the "GLV + fake GLV" explained in [EEMP25] (Sec. 3.3).
+//
+// We hint the result R = [s]Q and verify the equation
+//
+//	[v1]R + [v2]Φ(R) + [u1]Q + [u2]Φ(Q) = O
+//
+// where (u1, u2, v1, v2) is the LLL-reduced 4-D Eisenstein decomposition of −s
+// against the GLV eigenvalue λ, so each sub-scalar fits in roughly r^(1/4)
+// bits — about a quarter of the iteration count of plain GLV.
 //
-// This helper always uses complete arithmetic.
+// This method is complete by default.
 // [algopts.WithIncompleteArithmetic] is deprecated here and ignored.
-// (0,0) is not on the curve but we conventionally take it as the
-// neutral/infinity point as per the [EVM].
 //
-// [Halo]: https://eprint.iacr.org/2019/1021.pdf
-// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
-func (g2 *G2) scalarMulGLV(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
+// [EEMP25]: https://eprint.iacr.org/2025/933
+func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
 	cfg, err := algopts.NewConfig(opts...)
 	if err != nil {
 		panic(err)
 	}
 	if cfg.IncompleteArithmetic {
 		compilelogger.LogOnce(g2.api.Compiler(), zerolog.InfoLevel,
-			"sw_bls12_381/scalarMulGLV", "WithIncompleteArithmetic is deprecated in (*sw_bls12381.G2).scalarMulGLV and complete arithmetic is always used")
-	}
-	// if Q=(0,0) we assign a dummy (1,1) to Q and continue
-	selector := g2.api.And(
-		g2.api.And(g2.fp.IsZero(&Q.P.X.A0), g2.fp.IsZero(&Q.P.X.A1)),
-		g2.api.And(g2.fp.IsZero(&Q.P.Y.A0), g2.fp.IsZero(&Q.P.Y.A1)),
-	)
-	one := g2.Ext2.One()
-	Q = g2.Select(selector, &G2Affine{P: g2AffP{X: *one, Y: *one}, Lines: nil}, Q)
-
-	// We use the endomorphism à la GLV to compute [s]Q as
-	// 		[s1]Q + [s2]Φ(Q)
-	// the sub-scalars s1, s2 can be negative (bigints) in the hint. If so,
-	// they will be reduced in-circuit modulo the SNARK scalar field and not
-	// the emulated field. So we return in the hint |s1|, |s2| and boolean
-	// flags sdBits to negate the points Q, Φ(Q) instead of the corresponding
-	// sub-scalars.
-
-	// decompose s into s1 and s2
-	sdBits, sd, err := g2.fr.NewHintGeneric(decomposeScalarG1, 2, 2, nil, []*emulated.Element[ScalarField]{s, g2.eigenvalue})
+			"sw_bls12_381/scalarMulGLVAndFakeGLV", "WithIncompleteArithmetic is deprecated in (*sw_bls12381.G2).scalarMulGLVAndFakeGLV and complete arithmetic is always used")
+	}
+
+	// Handle s = 0 by routing through s = 1 and overriding the result later.
+	one := g2.fr.One()
+	selector0 := g2.fr.IsZero(s)
+	_s := g2.fr.Select(selector0, one, s)
+
+	// Decompose s into (u1, u2, v1, v2) via LLL: s·(v1 + λ·v2) + u1 + λ·u2 ≡ 0
+	// (mod r), with each sub-scalar bounded by ~r^(1/4).
+	signs, sd, err := g2.fr.NewHintGeneric(rationalReconstructExtG2, 4, 4, nil, []*emulated.Element[ScalarField]{_s, g2.eigenvalue})
 	if err != nil {
-		panic(fmt.Sprintf("compute GLV decomposition: %v", err))
-	}
-	s1, s2 := sd[0], sd[1]
-	selector1, selector2 := sdBits[0], sdBits[1]
-	s3 := g2.fr.Select(selector1, g2.fr.Neg(s1), s1)
-	s4 := g2.fr.Select(selector2, g2.fr.Neg(s2), s2)
-	// s == s3 + [λ]s4
-	g2.fr.AssertIsEqual(
-		g2.fr.Add(s3, g2.fr.Mul(s4, g2.eigenvalue)),
-		s,
-	)
-
-	s1bits := g2.fr.ToBits(s1)
-	s2bits := g2.fr.ToBits(s2)
-
-	// precompute -Q, -Φ(Q), Φ(Q)
-	var tableQ, tablePhiQ [3]*G2Affine
-	negQY := g2.Ext2.Neg(&Q.P.Y)
+		panic(fmt.Sprintf("rationalReconstructExtG2 hint: %v", err))
+	}
+	u1, u2, v1, v2 := sd[0], sd[1], sd[2], sd[3]
+	isNegu1, isNegu2, isNegv1, isNegv2 := signs[0], signs[1], signs[2], signs[3]
+
+	// Verify s·(v1 + λ·v2) + u1 + λ·u2 ≡ 0 (mod r).
+	var st ScalarField
+	sv1 := g2.fr.Mul(_s, v1)
+	sλv2 := g2.fr.Mul(_s, g2.fr.Mul(g2.eigenvalue, v2))
+	λu2 := g2.fr.Mul(g2.eigenvalue, u2)
+	zero := g2.fr.Zero()
+
+	lhs1 := g2.fr.Select(isNegv1, zero, sv1)
+	lhs2 := g2.fr.Select(isNegv2, zero, sλv2)
+	lhs3 := g2.fr.Select(isNegu1, zero, u1)
+	lhs4 := g2.fr.Select(isNegu2, zero, λu2)
+	lhs := g2.fr.Add(g2.fr.Add(lhs1, lhs2), g2.fr.Add(lhs3, lhs4))
+
+	rhs1 := g2.fr.Select(isNegv1, sv1, zero)
+	rhs2 := g2.fr.Select(isNegv2, sλv2, zero)
+	rhs3 := g2.fr.Select(isNegu1, u1, zero)
+	rhs4 := g2.fr.Select(isNegu2, λu2, zero)
+	rhs := g2.fr.Add(g2.fr.Add(rhs1, rhs2), g2.fr.Add(rhs3, rhs4))
+
+	g2.fr.AssertIsEqual(lhs, rhs)
+
+	// Soundness: forbid the trivial all-zeros decomposition. With v1 + λ·v2 ≠ 0
+	// the relation forces (u1, u2) to the unique LLL lattice point and pins
+	// the hinted R = [s]Q.
+	g2.fr.AssertIsDifferent(g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2)), g2.fr.Zero())
+
+	// Hint R = [s]Q.
+	_, point, _, err := emulated.NewVarGenericHint(g2.api, 0, 4, 0, nil,
+		[]*emulated.Element[BaseField]{&Q.P.X.A0, &Q.P.X.A1, &Q.P.Y.A0, &Q.P.Y.A1},
+		[]*emulated.Element[ScalarField]{s},
+		scalarMulG2Hint)
+	if err != nil {
+		panic(fmt.Sprintf("scalarMulG2Hint: %v", err))
+	}
+	R := &G2Affine{
+		P: g2AffP{
+			X: fields_bls12381.E2{A0: *point[0], A1: *point[1]},
+			Y: fields_bls12381.E2{A0: *point[2], A1: *point[3]},
+		},
+	}
+	originalR := R // preserve the unmodified hint output for the return value
+
+	// Edge cases: Q = (0,0), s = 0, s = ±1 force R into a relation with Q
+	// that the incomplete table precomputations can't handle. Substitute
+	// dummy points and reconstruct the canonical result at the end.
+	dummyQ := &G2Affine{P: *g2.g2Gen}
+	dummyR := &G2Affine{P: *g2.g2GenNbits}
+
+	_selector0 := g2.api.And(g2.Ext2.IsZero(&Q.P.X), g2.Ext2.IsZero(&Q.P.Y))
+	_Q := g2.Select(_selector0, dummyQ, Q)
+
+	sIsOne := g2.fr.IsZero(g2.fr.Sub(s, g2.fr.One()))
+	sIsMinusOne := g2.fr.IsZero(g2.fr.Add(s, g2.fr.One()))
+	_selector1 := g2.api.Or(sIsOne, sIsMinusOne)
+
+	selectorAny := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
+	R = g2.Select(selectorAny, dummyR, R)
+
+	// Precompute -Q, -Φ(Q), Φ(Q).
+	var tableQ, tablePhiQ [2]*G2Affine
+	negQY := g2.Ext2.Neg(&_Q.P.Y)
 	tableQ[1] = &G2Affine{
 		P: g2AffP{
-			X: Q.P.X,
-			Y: *g2.Ext2.Select(selector1, negQY, &Q.P.Y),
+			X: _Q.P.X,
+			Y: *g2.Ext2.Select(isNegu1, negQY, &_Q.P.Y),
 		},
 	}
 	tableQ[0] = g2.neg(tableQ[1])
 	tablePhiQ[1] = &G2Affine{
 		P: g2AffP{
-			X: *g2.Ext2.MulByElement(&Q.P.X, g2.w2),
-			Y: *g2.Ext2.Select(selector2, negQY, &Q.P.Y),
+			X: *g2.Ext2.MulByElement(&_Q.P.X, g2.w2),
+			Y: *g2.Ext2.Select(isNegu2, negQY, &_Q.P.Y),
 		},
 	}
 	tablePhiQ[0] = g2.neg(tablePhiQ[1])
-	tableQ[2] = g2.triple(tableQ[1])
-	tablePhiQ[2] = &G2Affine{
+
+	// Precompute -R, -Φ(R), Φ(R).
+	var tableR, tablePhiR [2]*G2Affine
+	negRY := g2.Ext2.Neg(&R.P.Y)
+	tableR[1] = &G2Affine{
 		P: g2AffP{
-			X: *g2.Ext2.MulByElement(&tableQ[2].P.X, g2.w2),
-			Y: *g2.Ext2.Select(selector2, g2.Ext2.Neg(&tableQ[2].P.Y), &tableQ[2].P.Y),
+			X: R.P.X,
+			Y: *g2.Ext2.Select(isNegv1, negRY, &R.P.Y),
 		},
 	}
-
-	// we suppose that the first bits of the sub-scalars are 1 and set:
-	// 		Acc = Q + Φ(Q)
-	Acc := g2.add(tableQ[1], tablePhiQ[1])
-
-	// At each iteration we need to compute:
-	// 		[2]Acc ± Q ± Φ(Q).
-	// We can compute [2]Acc and look up the (precomputed) point P from:
-	// 		B1 = Q+Φ(Q)
-	// 		B2 = -Q-Φ(Q)
-	// 		B3 = Q-Φ(Q)
-	// 		B4 = -Q+Φ(Q)
-	//
-	// If we extend this by merging two iterations, we need to look up P and P'
-	// both from {B1, B2, B3, B4} and compute:
-	// 		[2]([2]Acc+P)+P' = [4]Acc + T
-	// where T = [2]P+P'. So at each (merged) iteration, we can compute [4]Acc
-	// and look up T from the precomputed list of points:
-	//
-	// T = [3](Q + Φ(Q))
-	// P = B1 and P' = B1
-	t1 := g2.add(tableQ[2], tablePhiQ[2])
-	// T = Q + Φ(Q)
-	// P = B1 and P' = B2
-	T2 := Acc
-	// T = [3]Q + Φ(Q)
-	// P = B1 and P' = B3
-	T3 := g2.add(tableQ[2], tablePhiQ[1])
-	// T = Q + [3]Φ(Q)
-	// P = B1 and P' = B4
-	t4 := g2.add(tableQ[1], tablePhiQ[2])
-	// T  = -[3](Q + Φ(Q))
-	// P = B2 and P' = B2
-	T6 := g2.neg(t1)
-	// T = -Q - [3]Φ(Q)
-	// P = B2 and P' = B3
-	T7 := g2.neg(t4)
-	// T = [3]Q - Φ(Q)
-	// P = B3 and P' = B1
-	t9 := g2.add(tableQ[2], tablePhiQ[0])
-	// T = Q - [3]Φ(Q)
-	// P = B3 and P' = B2
-	t := g2.neg(tablePhiQ[2])
-	T10 := g2.add(tableQ[1], t)
-	// T = [3](Q - Φ(Q))
-	// P = B3 and P' = B3
-	T11 := g2.add(tableQ[2], t)
-	// T = -Φ(Q) + Q
-	// P = B3 and P' = B4
-	T12 := g2.add(tablePhiQ[0], tableQ[1])
-	// T = Φ(Q) - [3]Q
-	// P = B4 and P' = B2
-	T14 := g2.neg(t9)
-	// T = Φ(Q) - Q
-	// P = B4 and P' = B3
-	T15 := g2.neg(T12)
-	// note that half the points are negatives of the other half,
-	// hence have the same X coordinates.
-
-	nbits := 130
-	for i := nbits - 2; i > 0; i -= 2 {
-		// selectorY takes values in [0,15]
+	tableR[0] = g2.neg(tableR[1])
+	tablePhiR[1] = &G2Affine{
+		P: g2AffP{
+			X: *g2.Ext2.MulByElement(&R.P.X, g2.w2),
+			Y: *g2.Ext2.Select(isNegv2, negRY, &R.P.Y),
+		},
+	}
+	tablePhiR[0] = g2.neg(tablePhiR[1])
+
+	// Combine Q, R precomputations into ±Q±R, ±Φ(Q)±Φ(R) tables.
+	var tableS [4]*G2Affine
+	tableS[0] = g2.add(tableQ[0], tableR[0])
+	tableS[1] = g2.neg(tableS[0])
+	tableS[2] = g2.add(tableQ[1], tableR[0])
+	tableS[3] = g2.neg(tableS[2])
+
+	var tablePhiS [4]*G2Affine
+	tablePhiS[0] = g2.add(tablePhiQ[0], tablePhiR[0])
+	tablePhiS[1] = g2.neg(tablePhiS[0])
+	tablePhiS[2] = g2.add(tablePhiQ[1], tablePhiR[0])
+	tablePhiS[3] = g2.neg(tablePhiS[2])
+
+	// Initial accumulator: Q + R + Φ(Q) + Φ(R) plus a fixed shift by the G2
+	// generator to avoid incomplete additions in the loop. At the end Acc
+	// will equal [2^(nbits-1)]G2 (the precomputed g2GenNbits).
+	Acc := g2.add(tableS[1], tablePhiS[1])
+	B1 := Acc
+	g2GenPoint := &G2Affine{P: *g2.g2Gen}
+	Acc = g2.add(Acc, g2GenPoint)
+
+	// LLL Hermite bound: u_i, v_i < γ₄·r^(1/4), fits in (BitLen+3)/4 + 2 bits.
+	nbits := (st.Modulus().BitLen()+3)/4 + 2
+	u1bits := g2.fr.ToBits(u1)
+	u2bits := g2.fr.ToBits(u2)
+	v1bits := g2.fr.ToBits(v1)
+	v2bits := g2.fr.ToBits(v2)
+
+	// 16-entry Bi precomputation: ±Q ± R ± Φ(Q) ± Φ(R). Half the entries are
+	// negatives of the other half (same X), so we use an 8-to-1 mux + signed Y.
+	B2 := g2.add(tableS[1], tablePhiS[2])
+	B3 := g2.add(tableS[1], tablePhiS[3])
+	B4 := g2.add(tableS[1], tablePhiS[0])
+	B5 := g2.add(tableS[2], tablePhiS[1])
+	B6 := g2.add(tableS[2], tablePhiS[2])
+	B7 := g2.add(tableS[2], tablePhiS[3])
+	B8 := g2.add(tableS[2], tablePhiS[0])
+	B10 := g2.neg(B7)
+	B12 := g2.neg(B5)
+	B14 := g2.neg(B3)
+	B16 := g2.neg(B1)
+
+	var Bi *G2Affine
+	for i := nbits - 1; i > 0; i-- {
 		selectorY := g2.api.Add(
-			s1bits[i],
-			g2.api.Mul(s2bits[i], 2),
-			g2.api.Mul(s1bits[i-1], 4),
-			g2.api.Mul(s2bits[i-1], 8),
+			u1bits[i],
+			g2.api.Mul(u2bits[i], 2),
+			g2.api.Mul(v1bits[i], 4),
+			g2.api.Mul(v2bits[i], 8),
 		)
-		// selectorX takes values in [0,7] s.t.:
-		// 		- when selectorY < 8: selectorX = selectorY
-		// 		- when selectorY >= 8: selectorX = 15 - selectorY
 		selectorX := g2.api.Add(
-			g2.api.Mul(selectorY, g2.api.Sub(1, g2.api.Mul(s2bits[i-1], 2))),
-			g2.api.Mul(s2bits[i-1], 15),
+			g2.api.Mul(selectorY, g2.api.Sub(1, g2.api.Mul(v2bits[i], 2))),
+			g2.api.Mul(v2bits[i], 15),
 		)
-		// Half of the Bi.X are distinct (8-to-1) and Y[i] = -Y[15-i],
-		// so we use 8-to-1 Mux for both X and Y, with conditional negation for Y.
-		T := &G2Affine{
+		Bi = &G2Affine{
 			P: g2AffP{
 				X: fields_bls12381.E2{
-					A0: *g2.fp.Mux(selectorX, &T6.P.X.A0, &T10.P.X.A0, &T14.P.X.A0, &T2.P.X.A0, &T7.P.X.A0, &T11.P.X.A0, &T15.P.X.A0, &T3.P.X.A0),
-					A1: *g2.fp.Mux(selectorX, &T6.P.X.A1, &T10.P.X.A1, &T14.P.X.A1, &T2.P.X.A1, &T7.P.X.A1, &T11.P.X.A1, &T15.P.X.A1, &T3.P.X.A1),
+					A0: *g2.fp.Mux(selectorX,
+						&B16.P.X.A0, &B8.P.X.A0, &B14.P.X.A0, &B6.P.X.A0, &B12.P.X.A0, &B4.P.X.A0, &B10.P.X.A0, &B2.P.X.A0,
+					),
+					A1: *g2.fp.Mux(selectorX,
+						&B16.P.X.A1, &B8.P.X.A1, &B14.P.X.A1, &B6.P.X.A1, &B12.P.X.A1, &B4.P.X.A1, &B10.P.X.A1, &B2.P.X.A1,
+					),
 				},
-				Y: *g2.muxE2Y8Signed(s2bits[i-1], selectorX,
-					[8]*emulated.Element[BaseField]{&T6.P.Y.A0, &T10.P.Y.A0, &T14.P.Y.A0, &T2.P.Y.A0, &T7.P.Y.A0, &T11.P.Y.A0, &T15.P.Y.A0, &T3.P.Y.A0},
-					[8]*emulated.Element[BaseField]{&T6.P.Y.A1, &T10.P.Y.A1, &T14.P.Y.A1, &T2.P.Y.A1, &T7.P.Y.A1, &T11.P.Y.A1, &T15.P.Y.A1, &T3.P.Y.A1},
+				Y: *g2.muxE2Y8Signed(v2bits[i], selectorX,
+					[8]*emulated.Element[BaseField]{&B16.P.Y.A0, &B8.P.Y.A0, &B14.P.Y.A0, &B6.P.Y.A0, &B12.P.Y.A0, &B4.P.Y.A0, &B10.P.Y.A0, &B2.P.Y.A0},
+					[8]*emulated.Element[BaseField]{&B16.P.Y.A1, &B8.P.Y.A1, &B14.P.Y.A1, &B6.P.Y.A1, &B12.P.Y.A1, &B4.P.Y.A1, &B10.P.Y.A1, &B2.P.Y.A1},
 				),
 			},
 		}
-		// Acc = [4]Acc + T
-		Acc = g2.double(Acc)
-		Acc = g2.doubleAndAdd(Acc, T)
-	}
-
-	// i = 0
-	// subtract the Q, Φ(Q) if the first bits are 0.
-	// We use AddUnified so that when s=0 then Acc=(0,0) because
-	// AddUnified(Q, -Q) = (0,0).
-	tableQ[0] = g2.AddUnified(tableQ[0], Acc)
-	Acc = g2.Select(s1bits[0], Acc, tableQ[0])
-	tablePhiQ[0] = g2.AddUnified(tablePhiQ[0], Acc)
-	Acc = g2.Select(s2bits[0], Acc, tablePhiQ[0])
-
-	zero := g2.Ext2.Zero()
-	Acc = g2.Select(selector, &G2Affine{P: g2AffP{X: *zero, Y: *zero}}, Acc)
-
-	return Acc
+		Acc = g2.doubleAndAdd(Acc, Bi)
+	}
+
+	// i = 0: subtract Q, Φ(Q), R, Φ(R) if the first bits are 0.
+	tableQ[0] = g2.add(tableQ[0], Acc)
+	Acc = g2.Select(u1bits[0], Acc, tableQ[0])
+	tablePhiQ[0] = g2.add(tablePhiQ[0], Acc)
+	Acc = g2.Select(u2bits[0], Acc, tablePhiQ[0])
+	tableR[0] = g2.add(tableR[0], Acc)
+	Acc = g2.Select(v1bits[0], Acc, tableR[0])
+	tablePhiR[0] = g2.add(tablePhiR[0], Acc)
+	Acc = g2.Select(v2bits[0], Acc, tablePhiR[0])
+
+	// At this point Acc must equal [2^(nbits-1)]G2 (the bias we added).
+	expected := &G2Affine{P: *g2.g2GenNbits}
+
+	// Skip the assertion on edge branches (where R is the dummy).
+	skip := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
+	Acc = g2.Select(skip, expected, Acc)
+	g2.AssertIsEqual(Acc, expected)
+
+	// Reconstruct the canonical result: hint output for the regular path,
+	// constants on the edge branches.
+	zeroE2 := g2.Ext2.Zero()
+	zeroG2 := &G2Affine{P: g2AffP{X: *zeroE2, Y: *zeroE2}}
+	negQ := g2.neg(Q)
+	result := g2.Select(sIsMinusOne, negQ, originalR)
+	result = g2.Select(sIsOne, Q, result)
+	returnZero := g2.api.Or(selector0, _selector0)
+	return g2.Select(returnZero, zeroG2, result)
 }
 
 // MultiScalarMul computes the multi scalar multiplication of the points P and
@@ -834,9 +915,9 @@ func (g2 *G2) MultiScalarMul(p []*G2Affine, s []*Scalar, opts ...algopts.Algebra
 			return nil, fmt.Errorf("mismatching points and scalars slice lengths")
 		}
 		n := len(p)
-		res := g2.scalarMulGLV(p[0], s[0], opts...)
+		res := g2.ScalarMul(p[0], s[0], opts...)
 		for i := 1; i < n; i++ {
-			q := g2.scalarMulGLV(p[i], s[i], opts...)
+			q := g2.ScalarMul(p[i], s[i], opts...)
 			res = g2.AddUnified(res, q)
 		}
 		return res, nil
@@ -846,10 +927,10 @@ func (g2 *G2) MultiScalarMul(p []*G2Affine, s []*Scalar, opts ...algopts.Algebra
 			return nil, fmt.Errorf("need scalar for folding")
 		}
 		gamma := s[0]
-		res := g2.scalarMulGLV(p[len(p)-1], gamma, opts...)
+		res := g2.ScalarMul(p[len(p)-1], gamma, opts...)
 		for i := len(p) - 2; i > 0; i-- {
 			res = g2.AddUnified(p[i], res)
-			res = g2.scalarMulGLV(res, gamma, opts...)
+			res = g2.ScalarMul(res, gamma, opts...)
 		}
 		res = g2.AddUnified(p[0], res)
 		return res, nil
diff --git a/std/algebra/emulated/sw_bls12381/g2_test.go b/std/algebra/emulated/sw_bls12381/g2_test.go
index 0bd443a3ab..0b4a08e4e9 100644
--- a/std/algebra/emulated/sw_bls12381/g2_test.go
+++ b/std/algebra/emulated/sw_bls12381/g2_test.go
@@ -25,7 +25,7 @@ func (c *mulG2Circuit) Define(api frontend.API) error {
 	if err != nil {
 		return fmt.Errorf("new G2 struct: %w", err)
 	}
-	res1 := g2.scalarMulGLV(&c.In, &c.S)
+	res1 := g2.ScalarMul(&c.In, &c.S)
 	res2 := g2.scalarMulGeneric(&c.In, &c.S)
 	g2.AssertIsEqual(res1, &c.Res)
 	g2.AssertIsEqual(res2, &c.Res)
diff --git a/std/algebra/emulated/sw_bls12381/hints.go b/std/algebra/emulated/sw_bls12381/hints.go
index 5441243728..86141eee50 100644
--- a/std/algebra/emulated/sw_bls12381/hints.go
+++ b/std/algebra/emulated/sw_bls12381/hints.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"math/big"
 
+	"github.com/consensys/gnark-crypto/algebra/lattice"
 	"github.com/consensys/gnark-crypto/ecc"
 	bls12381 "github.com/consensys/gnark-crypto/ecc/bls12-381"
 	"github.com/consensys/gnark-crypto/ecc/bls12-381/fp"
@@ -24,6 +25,8 @@ func GetHints() []solver.Hint {
 		pairingCheckHint,
 		millerLoopAndCheckFinalExpHint,
 		decomposeScalarG1,
+		scalarMulG2Hint,
+		rationalReconstructExtG2,
 		g1SqrtRatioHint,
 		g2SqrtRatioHint,
 		unmarshalG1,
@@ -452,3 +455,94 @@ func unmarshalG1(mod *big.Int, nativeInputs []*big.Int, outputs []*big.Int) erro
 		return nil
 	})
 }
+
+func scalarMulG2Hint(field *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	return emulated.UnwrapHintContext(field, inputs, outputs, func(hc emulated.HintContext) error {
+		moduli := hc.EmulatedModuli()
+		if len(moduli) != 2 {
+			return fmt.Errorf("expecting two moduli, got %d", len(moduli))
+		}
+		baseModulus, scalarModulus := moduli[0], moduli[1]
+		baseInputs, baseOutputs := hc.InputsOutputs(baseModulus)
+		scalarInputs, _ := hc.InputsOutputs(scalarModulus)
+		if len(baseInputs) != 4 {
+			return fmt.Errorf("expecting four base inputs (Q.X.A0, Q.X.A1, Q.Y.A0, Q.Y.A1), got %d", len(baseInputs))
+		}
+		if len(baseOutputs) != 4 {
+			return fmt.Errorf("expecting four base outputs, got %d", len(baseOutputs))
+		}
+		if len(scalarInputs) != 1 {
+			return fmt.Errorf("expecting one scalar input, got %d", len(scalarInputs))
+		}
+
+		// compute the resulting point [s]Q on G2
+		var Q bls12381.G2Affine
+		Q.X.A0.SetBigInt(baseInputs[0])
+		Q.X.A1.SetBigInt(baseInputs[1])
+		Q.Y.A0.SetBigInt(baseInputs[2])
+		Q.Y.A1.SetBigInt(baseInputs[3])
+		Q.ScalarMultiplication(&Q, scalarInputs[0])
+		Q.X.A0.BigInt(baseOutputs[0])
+		Q.X.A1.BigInt(baseOutputs[1])
+		Q.Y.A0.BigInt(baseOutputs[2])
+		Q.Y.A1.BigInt(baseOutputs[3])
+		return nil
+	})
+}
+
+func rationalReconstructExtG2(mod *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	return emulated.UnwrapHintContext(mod, inputs, outputs, func(hc emulated.HintContext) error {
+		moduli := hc.EmulatedModuli()
+		if len(moduli) != 1 {
+			return fmt.Errorf("expecting one modulus, got %d", len(moduli))
+		}
+		_, nativeOutputs := hc.NativeInputsOutputs()
+		if len(nativeOutputs) != 4 {
+			return fmt.Errorf("expecting four outputs, got %d", len(nativeOutputs))
+		}
+		emuInputs, emuOutputs := hc.InputsOutputs(moduli[0])
+		if len(emuInputs) != 2 {
+			return fmt.Errorf("expecting two inputs, got %d", len(emuInputs))
+		}
+		if len(emuOutputs) != 4 {
+			return fmt.Errorf("expecting four outputs, got %d", len(emuOutputs))
+		}
+
+		// Use lattice reduction to find (x, y, z, t) such that
+		// k ≡ (x + λ*y) / (z + λ*t) (mod r)
+		//
+		// in-circuit we check that R - [s]Q = 0 or equivalently R + [-s]Q = 0
+		// so here we use k = -s.
+		k := new(big.Int).Neg(emuInputs[0])
+		k.Mod(k, moduli[0])
+		rc := lattice.NewReconstructor(moduli[0]).SetLambda(emuInputs[1])
+		res := rc.RationalReconstructExt(k)
+		x, y, z, t := res[0], res[1], res[2], res[3]
+
+		// u1 = x, u2 = y, v1 = z, v2 = t
+		emuOutputs[0].Abs(x)
+		emuOutputs[1].Abs(y)
+		emuOutputs[2].Abs(z)
+		emuOutputs[3].Abs(t)
+
+		// signs
+		nativeOutputs[0].SetUint64(0)
+		nativeOutputs[1].SetUint64(0)
+		nativeOutputs[2].SetUint64(0)
+		nativeOutputs[3].SetUint64(0)
+
+		if x.Sign() < 0 {
+			nativeOutputs[0].SetUint64(1)
+		}
+		if y.Sign() < 0 {
+			nativeOutputs[1].SetUint64(1)
+		}
+		if z.Sign() < 0 {
+			nativeOutputs[2].SetUint64(1)
+		}
+		if t.Sign() < 0 {
+			nativeOutputs[3].SetUint64(1)
+		}
+		return nil
+	})
+}
diff --git a/std/algebra/emulated/sw_bn254/g2.go b/std/algebra/emulated/sw_bn254/g2.go
index 04b2b0133c..e5cb5e08fa 100644
--- a/std/algebra/emulated/sw_bn254/g2.go
+++ b/std/algebra/emulated/sw_bn254/g2.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/consensys/gnark-crypto/ecc/bn254"
 	"github.com/consensys/gnark/frontend"
+	"github.com/consensys/gnark/std/algebra/algopts"
 	"github.com/consensys/gnark/std/algebra/emulated/fields_bn254"
 	"github.com/consensys/gnark/std/math/emulated"
 )
@@ -13,9 +14,16 @@ import (
 type G2 struct {
 	api frontend.API
 	fp  *emulated.Field[BaseField]
+	fr  *emulated.Field[ScalarField]
 	*fields_bn254.Ext2
 	w    *emulated.Element[BaseField]
 	u, v *fields_bn254.E2
+	// GLV eigenvalue for endomorphism
+	eigenvalue *emulated.Element[ScalarField]
+
+	// Precomputed G2 generator and its multiple for GLV+FakeGLV
+	g2Gen      *g2AffP // G2 generator
+	g2GenNbits *g2AffP // [2^(nbits-1)]G2 where nbits = (r.BitLen()+3)/4 + 2
 }
 
 type g2AffP struct {
@@ -46,7 +54,14 @@ func NewG2(api frontend.API) (*G2, error) {
 	if err != nil {
 		return nil, fmt.Errorf("new base api: %w", err)
 	}
+	fr, err := emulated.NewField[ScalarField](api)
+	if err != nil {
+		return nil, fmt.Errorf("new scalar api: %w", err)
+	}
+	// w = thirdRootOneG2 = thirdRootOneG1^2 (used for both psi2 and GLV endomorphism)
 	w := fp.NewElement("21888242871839275220042445260109153167277707414472061641714758635765020556616")
+	// GLV eigenvalue: lambda such that phi(P) = [lambda]P
+	eigenvalue := fr.NewElement("4407920970296243842393367215006156084916469457145843978461")
 	u := fields_bn254.E2{
 		A0: *fp.NewElement("21575463638280843010398324269430826099269044274347216827212613867836435027261"),
 		A1: *fp.NewElement("10307601595873709700152284273816112264069230130616436755625194854815875713954"),
@@ -55,13 +70,43 @@ func NewG2(api frontend.API) (*G2, error) {
 		A0: *fp.NewElement("2821565182194536844548159561693502659359617185244120367078079554186484126554"),
 		A1: *fp.NewElement("3505843767911556378687030309984248845540243509899259641013678093033130930403"),
 	}
+
+	// Precomputed G2 generator for GLV+FakeGLV
+	g2Gen := &g2AffP{
+		X: fields_bn254.E2{
+			A0: *fp.NewElement("10857046999023057135944570762232829481370756359578518086990519993285655852781"),
+			A1: *fp.NewElement("11559732032986387107991004021392285783925812861821192530917403151452391805634"),
+		},
+		Y: fields_bn254.E2{
+			A0: *fp.NewElement("8495653923123431417604973247489272438418190587263600148770280649306958101930"),
+			A1: *fp.NewElement("4082367875863433681332203403145435568316851327593401208105741076214120093531"),
+		},
+	}
+	// [2^(nbits-1)]G2 where nbits = (254+3)/4 + 2 = 66, so this is [2^65]G2
+	// The loop does nbits-1 doublings, so the generator accumulates to [2^(nbits-1)]G2
+	g2GenNbits := &g2AffP{
+		X: fields_bn254.E2{
+			A0: *fp.NewElement("6099622139700402640581725571890015148411145321742729577177999911575645303725"),
+			A1: *fp.NewElement("9870328428465937988383794519490899227160817120884239055108452134207619193487"),
+		},
+		Y: fields_bn254.E2{
+			A0: *fp.NewElement("16268382111792290652321980382595025991160708296314050973435867558225525677485"),
+			A1: *fp.NewElement("15377126855853471483498618408547895055706247905282062963450025729940352455943"),
+		},
+	}
+
 	return &G2{
-		api:  api,
-		fp:   fp,
-		Ext2: fields_bn254.NewExt2(api),
-		w:    w,
-		u:    &u,
-		v:    &v,
+		api:        api,
+		fp:         fp,
+		fr:         fr,
+		Ext2:       fields_bn254.NewExt2(api),
+		w:          w,
+		eigenvalue: eigenvalue,
+		u:          &u,
+		v:          &v,
+		// GLV+FakeGLV precomputed values
+		g2Gen:      g2Gen,
+		g2GenNbits: g2GenNbits,
 	}, nil
 }
 
@@ -290,3 +335,345 @@ func (g2 *G2) IsEqual(p, q *G2Affine) frontend.Variable {
 	yEqual := g2.Ext2.IsEqual(&p.P.Y, &q.P.Y)
 	return g2.api.And(xEqual, yEqual)
 }
+
+// Select selects between p and q given the selector b. If b == 1, then returns
+// p and q otherwise.
+func (g2 *G2) Select(b frontend.Variable, p, q *G2Affine) *G2Affine {
+	x := g2.Ext2.Select(b, &p.P.X, &q.P.X)
+	y := g2.Ext2.Select(b, &p.P.Y, &q.P.Y)
+	return &G2Affine{
+		P:     g2AffP{X: *x, Y: *y},
+		Lines: nil,
+	}
+}
+
+func (g2 G2) triple(p *G2Affine) *G2Affine {
+	mone := g2.fp.NewElement(-1)
+
+	// compute λ1 = (3p.x²)/2p.y
+	xx := g2.Square(&p.P.X)
+	xx = g2.MulByConstElement(xx, big.NewInt(3))
+	y2 := g2.Double(&p.P.Y)
+	λ1 := g2.DivUnchecked(xx, y2)
+
+	// x2 = λ1²-2p.x
+	x20 := g2.fp.Eval([][]*baseEl{{&λ1.A0, &λ1.A0}, {mone, &λ1.A1, &λ1.A1}, {mone, &p.P.X.A0}}, []int{1, 1, 2})
+	x21 := g2.fp.Eval([][]*baseEl{{&λ1.A0, &λ1.A1}, {mone, &p.P.X.A1}}, []int{2, 2})
+	x2 := &fields_bn254.E2{A0: *x20, A1: *x21}
+
+	// omit y2 computation, and
+	// compute λ2 = 2p.y/(x2 − p.x) − λ1.
+	x1x2 := g2.Sub(&p.P.X, x2)
+	λ2 := g2.DivUnchecked(y2, x1x2)
+	λ2 = g2.Sub(λ2, λ1)
+
+	// compute x3 =λ2²-p.x-x2
+	x30 := g2.fp.Eval([][]*baseEl{{&λ2.A0, &λ2.A0}, {mone, &λ2.A1, &λ2.A1}, {mone, &p.P.X.A0}, {mone, x20}}, []int{1, 1, 1, 1})
+	x31 := g2.fp.Eval([][]*baseEl{{&λ2.A0, &λ2.A1}, {mone, &p.P.X.A1}, {mone, x21}}, []int{2, 1, 1})
+	x3 := &fields_bn254.E2{A0: *x30, A1: *x31}
+
+	// compute y3 = λ2*(p.x - x3)-p.y
+	y3 := g2.Ext2.Sub(&p.P.X, x3)
+	y30 := g2.fp.Eval([][]*baseEl{{&λ2.A0, &y3.A0}, {mone, &λ2.A1, &y3.A1}, {mone, &p.P.Y.A0}}, []int{1, 1, 1})
+	y31 := g2.fp.Eval([][]*baseEl{{&λ2.A0, &y3.A1}, {&λ2.A1, &y3.A0}, {mone, &p.P.Y.A1}}, []int{1, 1, 1})
+	y3 = &fields_bn254.E2{A0: *y30, A1: *y31}
+
+	return &G2Affine{
+		P: g2AffP{
+			X: *x3,
+			Y: *y3,
+		},
+	}
+}
+
+// ScalarMul computes [s]Q using an efficient endomorphism and returns it. It doesn't modify Q nor s.
+// It implements the GLV+fakeGLV optimization from [EEMP25] which achieves r^(1/4) bounds
+// on the sub-scalars, reducing the number of iterations in the scalar multiplication loop.
+//
+// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// (0,0) is not on the curve but we conventionally take it as the
+// neutral/infinity point as per the [EVM].
+//
+// [EEMP25]: https://eprint.iacr.org/2025/933
+// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
+func (g2 *G2) ScalarMul(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
+	return g2.scalarMulGLVAndFakeGLV(Q, s, opts...)
+}
+
+// scalarMulGLVAndFakeGLV computes [s]Q using GLV+fakeGLV with r^(1/4) bounds.
+// It implements the "GLV + fake GLV" explained in [EEMP25] (Sec. 3.3).
+//
+// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+//
+// [EEMP25]: https://eprint.iacr.org/2025/933
+func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
+	cfg, err := algopts.NewConfig(opts...)
+	if err != nil {
+		panic(err)
+	}
+
+	// handle 0-scalar
+	var selector0 frontend.Variable
+	_s := s
+	if cfg.CompleteArithmetic {
+		one := g2.fr.One()
+		selector0 = g2.fr.IsZero(s)
+		_s = g2.fr.Select(selector0, one, s)
+	}
+
+	// Instead of computing [s]Q=R, we check that R-[s]Q == 0.
+	// This is equivalent to [v]R + [-s*v]Q = 0 for some nonzero v.
+	//
+	// Using LLL-based lattice reduction we find small sub-scalars:
+	// 		[v1 + λ*v2]R + [u1 + λ*u2]Q = 0
+	// 		[v1]R + [v2]Φ(R) + [u1]Q + [u2]Φ(Q) = 0
+	//
+	// where u1, u2, v1, v2 < c*r^{1/4} with c ≈ 1.25 (proven bound from LLL).
+
+	// decompose s into u1, u2, v1, v2
+	signs, sd, err := g2.fr.NewHintGeneric(rationalReconstructExtG2, 4, 4, nil, []*emulated.Element[ScalarField]{_s, g2.eigenvalue})
+	if err != nil {
+		panic(fmt.Sprintf("rationalReconstructExtG2 hint: %v", err))
+	}
+	u1, u2, v1, v2 := sd[0], sd[1], sd[2], sd[3]
+	isNegu1, isNegu2, isNegv1, isNegv2 := signs[0], signs[1], signs[2], signs[3]
+
+	// Check that: s*(v1 + λ*v2) + u1 + λ*u2 = 0
+	var st ScalarField
+	sv1 := g2.fr.Mul(_s, v1)
+	sλv2 := g2.fr.Mul(_s, g2.fr.Mul(g2.eigenvalue, v2))
+	λu2 := g2.fr.Mul(g2.eigenvalue, u2)
+	zero := g2.fr.Zero()
+
+	lhs1 := g2.fr.Select(isNegv1, zero, sv1)
+	lhs2 := g2.fr.Select(isNegv2, zero, sλv2)
+	lhs3 := g2.fr.Select(isNegu1, zero, u1)
+	lhs4 := g2.fr.Select(isNegu2, zero, λu2)
+	lhs := g2.fr.Add(
+		g2.fr.Add(lhs1, lhs2),
+		g2.fr.Add(lhs3, lhs4),
+	)
+
+	rhs1 := g2.fr.Select(isNegv1, sv1, zero)
+	rhs2 := g2.fr.Select(isNegv2, sλv2, zero)
+	rhs3 := g2.fr.Select(isNegu1, u1, zero)
+	rhs4 := g2.fr.Select(isNegu2, λu2, zero)
+	rhs := g2.fr.Add(
+		g2.fr.Add(rhs1, rhs2),
+		g2.fr.Add(rhs3, rhs4),
+	)
+
+	g2.fr.AssertIsEqual(lhs, rhs)
+
+	// Ensure the denominator v1 + λ*v2 is non-zero to prevent trivial decomposition
+	den := g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2))
+	g2.fr.AssertIsDifferent(den, g2.fr.Zero())
+
+	// Hint the scalar multiplication R = [s]Q
+	_, point, _, err := emulated.NewVarGenericHint(g2.api, 0, 4, 0, nil,
+		[]*emulated.Element[BaseField]{&Q.P.X.A0, &Q.P.X.A1, &Q.P.Y.A0, &Q.P.Y.A1},
+		[]*emulated.Element[ScalarField]{s},
+		scalarMulG2Hint)
+	if err != nil {
+		panic(fmt.Sprintf("scalarMulG2Hint: %v", err))
+	}
+	R := &G2Affine{
+		P: g2AffP{
+			X: fields_bn254.E2{A0: *point[0], A1: *point[1]},
+			Y: fields_bn254.E2{A0: *point[2], A1: *point[3]},
+		},
+	}
+	// Preserve the original hinted R for return value (before edge-case modifications)
+	originalR := R
+
+	// handle (0,0)-point and edge cases
+	var _selector0, _selector1 frontend.Variable
+	var sIsOne, sIsMinusOne frontend.Variable
+	_Q := Q
+	if cfg.CompleteArithmetic {
+		// Use different dummy points for _Q and R to avoid _Q == ±R
+		// which would cause incomplete addition failures in the table precomputations.
+		dummyQ := &G2Affine{P: *g2.g2Gen}
+		dummyR := &G2Affine{P: *g2.g2GenNbits}
+		// if Q=(0,0) we assign a dummy point
+		_selector0 = g2.api.And(g2.Ext2.IsZero(&Q.P.X), g2.Ext2.IsZero(&Q.P.Y))
+		_Q = g2.Select(_selector0, dummyQ, Q)
+		// if s=±1, R=±Q and incomplete addition fails (R.X == Q.X).
+		// We detect this from the constrained scalar, not from the unconstrained R.
+		sIsOne = g2.fr.IsZero(g2.fr.Sub(s, g2.fr.One()))
+		sIsMinusOne = g2.fr.IsZero(g2.fr.Add(s, g2.fr.One()))
+		_selector1 = g2.api.Or(sIsOne, sIsMinusOne)
+		// if s=0 (selector0), Q=(0,0) (_selector0), or s=±1 (_selector1),
+		// we assign a dummy point to R
+		selectorAny := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
+		R = g2.Select(selectorAny, dummyR, R)
+	}
+
+	// precompute -Q, -Φ(Q), Φ(Q)
+	var tableQ, tablePhiQ [2]*G2Affine
+	negQY := g2.Ext2.Neg(&_Q.P.Y)
+	tableQ[1] = &G2Affine{
+		P: g2AffP{
+			X: _Q.P.X,
+			Y: *g2.Ext2.Select(isNegu1, negQY, &_Q.P.Y),
+		},
+	}
+	tableQ[0] = g2.neg(tableQ[1])
+	// For BN254 G2, glvPhi(Q) = (w * Q.X, Q.Y)
+	tablePhiQ[1] = &G2Affine{
+		P: g2AffP{
+			X: *g2.Ext2.MulByElement(&_Q.P.X, g2.w),
+			Y: *g2.Ext2.Select(isNegu2, negQY, &_Q.P.Y),
+		},
+	}
+	tablePhiQ[0] = g2.neg(tablePhiQ[1])
+
+	// precompute -R, -Φ(R), Φ(R)
+	var tableR, tablePhiR [2]*G2Affine
+	negRY := g2.Ext2.Neg(&R.P.Y)
+	tableR[1] = &G2Affine{
+		P: g2AffP{
+			X: R.P.X,
+			Y: *g2.Ext2.Select(isNegv1, negRY, &R.P.Y),
+		},
+	}
+	tableR[0] = g2.neg(tableR[1])
+	tablePhiR[1] = &G2Affine{
+		P: g2AffP{
+			X: *g2.Ext2.MulByElement(&R.P.X, g2.w),
+			Y: *g2.Ext2.Select(isNegv2, negRY, &R.P.Y),
+		},
+	}
+	tablePhiR[0] = g2.neg(tablePhiR[1])
+
+	// precompute -Q-R, Q+R, Q-R, -Q+R (combining the two points Q and R)
+	var tableS [4]*G2Affine
+	tableS[0] = g2.add(tableQ[0], tableR[0]) // -Q - R
+	tableS[1] = g2.neg(tableS[0])            // Q + R
+	tableS[2] = g2.add(tableQ[1], tableR[0]) // Q - R
+	tableS[3] = g2.neg(tableS[2])            // -Q + R
+
+	// precompute -Φ(Q)-Φ(R), Φ(Q)+Φ(R), Φ(Q)-Φ(R), -Φ(Q)+Φ(R) (combining endomorphisms)
+	var tablePhiS [4]*G2Affine
+	tablePhiS[0] = g2.add(tablePhiQ[0], tablePhiR[0]) // -Φ(Q) - Φ(R)
+	tablePhiS[1] = g2.neg(tablePhiS[0])               // Φ(Q) + Φ(R)
+	tablePhiS[2] = g2.add(tablePhiQ[1], tablePhiR[0]) // Φ(Q) - Φ(R)
+	tablePhiS[3] = g2.neg(tablePhiS[2])               // -Φ(Q) + Φ(R)
+
+	// Acc = Q + Φ(Q) + R + Φ(R)
+	Acc := g2.add(tableS[1], tablePhiS[1])
+	B1 := Acc
+
+	// Add G2 generator to Acc to avoid incomplete additions in the loop.
+	// At the end, since [u1]Q + [u2]Φ(Q) + [v1]R + [v2]Φ(R) = 0,
+	// Acc will equal [2^nbits]G2 (precomputed).
+	g2GenPoint := &G2Affine{P: *g2.g2Gen}
+	Acc = g2.add(Acc, g2GenPoint)
+
+	// u1, u2, v1, v2 < c*r^{1/4} where c ≈ 1.25
+	nbits := (st.Modulus().BitLen()+3)/4 + 2
+	u1bits := g2.fr.ToBits(u1)
+	u2bits := g2.fr.ToBits(u2)
+	v1bits := g2.fr.ToBits(v1)
+	v2bits := g2.fr.ToBits(v2)
+
+	// Precompute all 16 combinations: ±Q ± Φ(Q) ± R ± Φ(R)
+	// Using tableS (Q±R) and tablePhiS (Φ(Q)±Φ(R)) to match G1 pattern
+	// B1 = (Q+R) + (Φ(Q)+Φ(R)) = Q + R + Φ(Q) + Φ(R)
+	B2 := g2.add(tableS[1], tablePhiS[2]) // (Q+R) + (Φ(Q)-Φ(R)) = Q + R + Φ(Q) - Φ(R)
+	B3 := g2.add(tableS[1], tablePhiS[3]) // (Q+R) + (-Φ(Q)+Φ(R)) = Q + R - Φ(Q) + Φ(R)
+	B4 := g2.add(tableS[1], tablePhiS[0]) // (Q+R) + (-Φ(Q)-Φ(R)) = Q + R - Φ(Q) - Φ(R)
+	B5 := g2.add(tableS[2], tablePhiS[1]) // (Q-R) + (Φ(Q)+Φ(R)) = Q - R + Φ(Q) + Φ(R)
+	B6 := g2.add(tableS[2], tablePhiS[2]) // (Q-R) + (Φ(Q)-Φ(R)) = Q - R + Φ(Q) - Φ(R)
+	B7 := g2.add(tableS[2], tablePhiS[3]) // (Q-R) + (-Φ(Q)+Φ(R)) = Q - R - Φ(Q) + Φ(R)
+	B8 := g2.add(tableS[2], tablePhiS[0]) // (Q-R) + (-Φ(Q)-Φ(R)) = Q - R - Φ(Q) - Φ(R)
+	B9 := g2.neg(B8)                      // -Q + R + Φ(Q) + Φ(R)
+	B10 := g2.neg(B7)                     // -Q + R + Φ(Q) - Φ(R)
+	B11 := g2.neg(B6)                     // -Q + R - Φ(Q) + Φ(R)
+	B12 := g2.neg(B5)                     // -Q + R - Φ(Q) - Φ(R)
+	B13 := g2.neg(B4)                     // -Q - R + Φ(Q) + Φ(R)
+	B14 := g2.neg(B3)                     // -Q - R + Φ(Q) - Φ(R)
+	B15 := g2.neg(B2)                     // -Q - R - Φ(Q) + Φ(R)
+	B16 := g2.neg(B1)                     // -Q - R - Φ(Q) - Φ(R)
+
+	var Bi *G2Affine
+	for i := nbits - 1; i > 0; i-- {
+		// selectorY takes values in [0,15]
+		selectorY := g2.api.Add(
+			u1bits[i],
+			g2.api.Mul(u2bits[i], 2),
+			g2.api.Mul(v1bits[i], 4),
+			g2.api.Mul(v2bits[i], 8),
+		)
+		// selectorX takes values in [0,7] s.t.:
+		// 		- when selectorY < 8: selectorX = selectorY
+		// 		- when selectorY >= 8: selectorX = 15 - selectorY
+		selectorX := g2.api.Add(
+			g2.api.Mul(selectorY, g2.api.Sub(1, g2.api.Mul(v2bits[i], 2))),
+			g2.api.Mul(v2bits[i], 15),
+		)
+
+		// Bi.Y are distinct so we need a 16-to-1 multiplexer,
+		// but only half of the Bi.X are distinct so we need an 8-to-1.
+		Bi = &G2Affine{
+			P: g2AffP{
+				X: fields_bn254.E2{
+					A0: *g2.fp.Mux(selectorX,
+						&B16.P.X.A0, &B8.P.X.A0, &B14.P.X.A0, &B6.P.X.A0, &B12.P.X.A0, &B4.P.X.A0, &B10.P.X.A0, &B2.P.X.A0,
+					),
+					A1: *g2.fp.Mux(selectorX,
+						&B16.P.X.A1, &B8.P.X.A1, &B14.P.X.A1, &B6.P.X.A1, &B12.P.X.A1, &B4.P.X.A1, &B10.P.X.A1, &B2.P.X.A1,
+					),
+				},
+				Y: fields_bn254.E2{
+					A0: *g2.fp.Mux(selectorY,
+						&B16.P.Y.A0, &B8.P.Y.A0, &B14.P.Y.A0, &B6.P.Y.A0, &B12.P.Y.A0, &B4.P.Y.A0, &B10.P.Y.A0, &B2.P.Y.A0,
+						&B15.P.Y.A0, &B7.P.Y.A0, &B13.P.Y.A0, &B5.P.Y.A0, &B11.P.Y.A0, &B3.P.Y.A0, &B9.P.Y.A0, &B1.P.Y.A0,
+					),
+					A1: *g2.fp.Mux(selectorY,
+						&B16.P.Y.A1, &B8.P.Y.A1, &B14.P.Y.A1, &B6.P.Y.A1, &B12.P.Y.A1, &B4.P.Y.A1, &B10.P.Y.A1, &B2.P.Y.A1,
+						&B15.P.Y.A1, &B7.P.Y.A1, &B13.P.Y.A1, &B5.P.Y.A1, &B11.P.Y.A1, &B3.P.Y.A1, &B9.P.Y.A1, &B1.P.Y.A1,
+					),
+				},
+			},
+		}
+		// Acc = [2]Acc + Bi
+		Acc = g2.doubleAndAdd(Acc, Bi)
+	}
+
+	// i = 0: subtract Q, Φ(Q), R, Φ(R) if the first bits are 0
+	tableQ[0] = g2.add(tableQ[0], Acc)
+	Acc = g2.Select(u1bits[0], Acc, tableQ[0])
+	tablePhiQ[0] = g2.add(tablePhiQ[0], Acc)
+	Acc = g2.Select(u2bits[0], Acc, tablePhiQ[0])
+	tableR[0] = g2.add(tableR[0], Acc)
+	Acc = g2.Select(v1bits[0], Acc, tableR[0])
+	tablePhiR[0] = g2.add(tablePhiR[0], Acc)
+	Acc = g2.Select(v2bits[0], Acc, tablePhiR[0])
+
+	// Acc should now be [2^(nbits-1)]G2 since [u1]Q + [u2]Φ(Q) + [v1]R + [v2]Φ(R) = 0
+	// and we added G2 to the initial accumulator.
+	expected := &G2Affine{P: *g2.g2GenNbits}
+
+	if cfg.CompleteArithmetic {
+		// if Q=(0,0), s=0, or R.X==Q.X, skip the check
+		skip := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
+		Acc = g2.Select(skip, expected, Acc)
+	}
+	g2.AssertIsEqual(Acc, expected)
+
+	if cfg.CompleteArithmetic {
+		zeroE2 := g2.Ext2.Zero()
+		zeroG2 := &G2Affine{P: g2AffP{X: *zeroE2, Y: *zeroE2}}
+		negQ := g2.neg(Q)
+		// s=-1 → -Q, else → R (constrained by MSM check above)
+		result := g2.Select(sIsMinusOne, negQ, originalR)
+		// s=1 → Q
+		result = g2.Select(sIsOne, Q, result)
+		// s=0 or Q=(0,0) → (0,0)
+		returnZero := g2.api.Or(selector0, _selector0)
+		return g2.Select(returnZero, zeroG2, result)
+	}
+
+	return R
+}
diff --git a/std/algebra/emulated/sw_bn254/hints.go b/std/algebra/emulated/sw_bn254/hints.go
index 7d675e2cf6..329f2b908c 100644
--- a/std/algebra/emulated/sw_bn254/hints.go
+++ b/std/algebra/emulated/sw_bn254/hints.go
@@ -2,8 +2,10 @@ package sw_bn254
 
 import (
 	"errors"
+	"fmt"
 	"math/big"
 
+	"github.com/consensys/gnark-crypto/algebra/lattice"
 	"github.com/consensys/gnark-crypto/ecc/bn254"
 	"github.com/consensys/gnark/constraint/solver"
 	"github.com/consensys/gnark/std/math/emulated"
@@ -19,6 +21,8 @@ func GetHints() []solver.Hint {
 		finalExpHint,
 		pairingCheckHint,
 		millerLoopAndCheckFinalExpHint,
+		scalarMulG2Hint,
+		rationalReconstructExtG2,
 	}
 }
 
@@ -278,3 +282,94 @@ func finalExpWitness(millerLoop *bn254.E12) (residueWitness, cubicNonResiduePowe
 
 	return residueWitness, cubicNonResiduePower
 }
+
+func scalarMulG2Hint(field *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	return emulated.UnwrapHintContext(field, inputs, outputs, func(hc emulated.HintContext) error {
+		moduli := hc.EmulatedModuli()
+		if len(moduli) != 2 {
+			return fmt.Errorf("expecting two moduli, got %d", len(moduli))
+		}
+		baseModulus, scalarModulus := moduli[0], moduli[1]
+		baseInputs, baseOutputs := hc.InputsOutputs(baseModulus)
+		scalarInputs, _ := hc.InputsOutputs(scalarModulus)
+		if len(baseInputs) != 4 {
+			return fmt.Errorf("expecting four base inputs (Q.X.A0, Q.X.A1, Q.Y.A0, Q.Y.A1), got %d", len(baseInputs))
+		}
+		if len(baseOutputs) != 4 {
+			return fmt.Errorf("expecting four base outputs, got %d", len(baseOutputs))
+		}
+		if len(scalarInputs) != 1 {
+			return fmt.Errorf("expecting one scalar input, got %d", len(scalarInputs))
+		}
+
+		// compute the resulting point [s]Q on G2
+		var Q bn254.G2Affine
+		Q.X.A0.SetBigInt(baseInputs[0])
+		Q.X.A1.SetBigInt(baseInputs[1])
+		Q.Y.A0.SetBigInt(baseInputs[2])
+		Q.Y.A1.SetBigInt(baseInputs[3])
+		Q.ScalarMultiplication(&Q, scalarInputs[0])
+		Q.X.A0.BigInt(baseOutputs[0])
+		Q.X.A1.BigInt(baseOutputs[1])
+		Q.Y.A0.BigInt(baseOutputs[2])
+		Q.Y.A1.BigInt(baseOutputs[3])
+		return nil
+	})
+}
+
+func rationalReconstructExtG2(mod *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	return emulated.UnwrapHintContext(mod, inputs, outputs, func(hc emulated.HintContext) error {
+		moduli := hc.EmulatedModuli()
+		if len(moduli) != 1 {
+			return fmt.Errorf("expecting one modulus, got %d", len(moduli))
+		}
+		_, nativeOutputs := hc.NativeInputsOutputs()
+		if len(nativeOutputs) != 4 {
+			return fmt.Errorf("expecting four outputs, got %d", len(nativeOutputs))
+		}
+		emuInputs, emuOutputs := hc.InputsOutputs(moduli[0])
+		if len(emuInputs) != 2 {
+			return fmt.Errorf("expecting two inputs, got %d", len(emuInputs))
+		}
+		if len(emuOutputs) != 4 {
+			return fmt.Errorf("expecting four outputs, got %d", len(emuOutputs))
+		}
+
+		// Use lattice reduction to find (x, y, z, t) such that
+		// k ≡ (x + λ*y) / (z + λ*t) (mod r)
+		//
+		// in-circuit we check that R - [s]Q = 0 or equivalently R + [-s]Q = 0
+		// so here we use k = -s.
+		k := new(big.Int).Neg(emuInputs[0])
+		k.Mod(k, moduli[0])
+		rc := lattice.NewReconstructor(moduli[0]).SetLambda(emuInputs[1])
+		res := rc.RationalReconstructExt(k)
+		x, y, z, t := res[0], res[1], res[2], res[3]
+
+		// u1 = x, u2 = y, v1 = z, v2 = t
+		emuOutputs[0].Abs(x)
+		emuOutputs[1].Abs(y)
+		emuOutputs[2].Abs(z)
+		emuOutputs[3].Abs(t)
+
+		// signs
+		nativeOutputs[0].SetUint64(0)
+		nativeOutputs[1].SetUint64(0)
+		nativeOutputs[2].SetUint64(0)
+		nativeOutputs[3].SetUint64(0)
+
+		if x.Sign() < 0 {
+			nativeOutputs[0].SetUint64(1)
+		}
+		if y.Sign() < 0 {
+			nativeOutputs[1].SetUint64(1)
+		}
+		if z.Sign() < 0 {
+			nativeOutputs[2].SetUint64(1)
+		}
+		if t.Sign() < 0 {
+			nativeOutputs[3].SetUint64(1)
+		}
+		return nil
+	})
+}
diff --git a/std/algebra/emulated/sw_bw6761/g2.go b/std/algebra/emulated/sw_bw6761/g2.go
index a83e0b307c..8732893800 100644
--- a/std/algebra/emulated/sw_bw6761/g2.go
+++ b/std/algebra/emulated/sw_bw6761/g2.go
@@ -6,6 +6,7 @@ import (
 
 	bw6761 "github.com/consensys/gnark-crypto/ecc/bw6-761"
 	"github.com/consensys/gnark/frontend"
+	"github.com/consensys/gnark/std/algebra/algopts"
 	"github.com/consensys/gnark/std/algebra/emulated/sw_emulated"
 	"github.com/consensys/gnark/std/math/emulated"
 )
@@ -59,8 +60,16 @@ func NewG2AffineFixedPlaceholder() G2Affine {
 }
 
 type G2 struct {
+	api    frontend.API
 	curveF *emulated.Field[BaseField]
+	fr     *emulated.Field[ScalarField]
 	w      *emulated.Element[BaseField]
+	// GLV eigenvalue for endomorphism
+	eigenvalue *emulated.Element[ScalarField]
+
+	// Precomputed G2 generator and its multiple for GLV+FakeGLV
+	g2Gen      *g2AffP // G2 generator
+	g2GenNbits *g2AffP // [2^(nbits-1)]G2 where nbits = (r.BitLen()+3)/4 + 2
 }
 
 func NewG2(api frontend.API) (*G2, error) {
@@ -68,10 +77,36 @@ func NewG2(api frontend.API) (*G2, error) {
 	if err != nil {
 		return nil, fmt.Errorf("new base api: %w", err)
 	}
+	fr, err := emulated.NewField[ScalarField](api)
+	if err != nil {
+		return nil, fmt.Errorf("new scalar api: %w", err)
+	}
+	// w = thirdRootOneG2 = thirdRootOneG1^2 (used for GLV endomorphism)
 	w := ba.NewElement("4922464560225523242118178942575080391082002530232324381063048548642823052024664478336818169867474395270858391911405337707247735739826664939444490469542109391530482826728203582549674992333383150446779312029624171857054392282775648")
+	// GLV eigenvalue: lambda such that phi(P) = [lambda]P
+	eigenvalue := fr.NewElement("80949648264912719408558363140637477264845294720710499478137287262712535938301461879813459410945")
+
+	// Precomputed G2 generator for GLV+FakeGLV
+	g2Gen := &g2AffP{
+		X: *ba.NewElement("6445332910596979336035888152774071626898886139774101364933948236926875073754470830732273879639675437155036544153105017729592600560631678554299562762294743927912429096636156401171909259073181112518725201388196280039960074422214428"),
+		Y: *ba.NewElement("562923658089539719386922163444547387757586534741080263946953401595155211934630598999300396317104182598044793758153214972605680357108252243146746187917218885078195819486220416605630144001533548163105316661692978285266378674355041"),
+	}
+	// [2^(nbits-1)]G2 where nbits = (377+3)/4 + 2 = 97, so this is [2^96]G2
+	// The loop does nbits-1 doublings, so the generator accumulates to [2^(nbits-1)]G2
+	g2GenNbits := &g2AffP{
+		X: *ba.NewElement("3095984673093732516312387265169694060996602327701627003095800025572039633257324043941471095859774515229409057356532230556857309141882262691503434703676863345821048055421798431014967860961114720963410640620563703233324706890355614"),
+		Y: *ba.NewElement("6717446314608317454056612988521276523143603352262745009529835803932138303462642316467740443074785130100608444461459148229179290796669940701932233012187852232981798195344309857014515889020782044489099447799956729215609170567055537"),
+	}
+
 	return &G2{
-		curveF: ba,
-		w:      w,
+		api:        api,
+		curveF:     ba,
+		fr:         fr,
+		w:          w,
+		eigenvalue: eigenvalue,
+		// GLV+FakeGLV precomputed values
+		g2Gen:      g2Gen,
+		g2GenNbits: g2GenNbits,
 	}, nil
 }
 
@@ -236,3 +271,295 @@ func (g2 *G2) AssertIsEqual(p, q *G2Affine) {
 	g2.curveF.AssertIsEqual(&p.P.X, &q.P.X)
 	g2.curveF.AssertIsEqual(&p.P.Y, &q.P.Y)
 }
+
+// Select selects between p and q given the selector b. If b == 1, then returns
+// p and q otherwise.
+func (g2 *G2) Select(b frontend.Variable, p, q *G2Affine) *G2Affine {
+	x := g2.curveF.Select(b, &p.P.X, &q.P.X)
+	y := g2.curveF.Select(b, &p.P.Y, &q.P.Y)
+	return &G2Affine{
+		P:     g2AffP{X: *x, Y: *y},
+		Lines: nil,
+	}
+}
+
+// ScalarMul computes [s]Q using an efficient endomorphism and returns it. It doesn't modify Q nor s.
+// It implements the GLV+fakeGLV optimization from [EEMP25] which achieves r^(1/4) bounds
+// on the sub-scalars, reducing the number of iterations in the scalar multiplication loop.
+//
+// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// (0,0) is not on the curve but we conventionally take it as the
+// neutral/infinity point as per the [EVM].
+//
+// [EEMP25]: https://eprint.iacr.org/2025/933
+// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
+func (g2 *G2) ScalarMul(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
+	return g2.scalarMulGLVAndFakeGLV(Q, s, opts...)
+}
+
+// scalarMulGLVAndFakeGLV computes [s]Q using GLV+fakeGLV with r^(1/4) bounds.
+// It implements the "GLV + fake GLV" explained in [EEMP25] (Sec. 3.3).
+//
+// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+//
+// [EEMP25]: https://eprint.iacr.org/2025/933
+func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
+	cfg, err := algopts.NewConfig(opts...)
+	if err != nil {
+		panic(err)
+	}
+
+	// handle 0-scalar
+	var selector0 frontend.Variable
+	_s := s
+	if cfg.CompleteArithmetic {
+		one := g2.fr.One()
+		selector0 = g2.fr.IsZero(s)
+		_s = g2.fr.Select(selector0, one, s)
+	}
+
+	// Instead of computing [s]Q=R, we check that R-[s]Q == 0.
+	// This is equivalent to [v]R + [-s*v]Q = 0 for some nonzero v.
+	//
+	// Using LLL-based lattice reduction we find small sub-scalars:
+	// 		[v1 + λ*v2]R + [u1 + λ*u2]Q = 0
+	// 		[v1]R + [v2]Φ(R) + [u1]Q + [u2]Φ(Q) = 0
+	//
+	// where u1, u2, v1, v2 < c*r^{1/4} with c ≈ 1.25 (proven bound from LLL).
+
+	// decompose s into u1, u2, v1, v2
+	signs, sd, err := g2.fr.NewHintGeneric(rationalReconstructExtG2, 4, 4, nil, []*emulated.Element[ScalarField]{_s, g2.eigenvalue})
+	if err != nil {
+		panic(fmt.Sprintf("rationalReconstructExtG2 hint: %v", err))
+	}
+	u1, u2, v1, v2 := sd[0], sd[1], sd[2], sd[3]
+	isNegu1, isNegu2, isNegv1, isNegv2 := signs[0], signs[1], signs[2], signs[3]
+
+	// Check that: s*(v1 + λ*v2) + u1 + λ*u2 = 0
+	var st ScalarField
+	sv1 := g2.fr.Mul(_s, v1)
+	sλv2 := g2.fr.Mul(_s, g2.fr.Mul(g2.eigenvalue, v2))
+	λu2 := g2.fr.Mul(g2.eigenvalue, u2)
+	zero := g2.fr.Zero()
+
+	lhs1 := g2.fr.Select(isNegv1, zero, sv1)
+	lhs2 := g2.fr.Select(isNegv2, zero, sλv2)
+	lhs3 := g2.fr.Select(isNegu1, zero, u1)
+	lhs4 := g2.fr.Select(isNegu2, zero, λu2)
+	lhs := g2.fr.Add(
+		g2.fr.Add(lhs1, lhs2),
+		g2.fr.Add(lhs3, lhs4),
+	)
+
+	rhs1 := g2.fr.Select(isNegv1, sv1, zero)
+	rhs2 := g2.fr.Select(isNegv2, sλv2, zero)
+	rhs3 := g2.fr.Select(isNegu1, u1, zero)
+	rhs4 := g2.fr.Select(isNegu2, λu2, zero)
+	rhs := g2.fr.Add(
+		g2.fr.Add(rhs1, rhs2),
+		g2.fr.Add(rhs3, rhs4),
+	)
+
+	g2.fr.AssertIsEqual(lhs, rhs)
+
+	// Ensure the denominator v1 + λ*v2 is non-zero to prevent trivial decomposition
+	den := g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2))
+	g2.fr.AssertIsDifferent(den, g2.fr.Zero())
+
+	// Hint the scalar multiplication R = [s]Q
+	_, point, _, err := emulated.NewVarGenericHint(g2.api, 0, 2, 0, nil,
+		[]*emulated.Element[BaseField]{&Q.P.X, &Q.P.Y},
+		[]*emulated.Element[ScalarField]{s},
+		scalarMulG2Hint)
+	if err != nil {
+		panic(fmt.Sprintf("scalarMulG2Hint: %v", err))
+	}
+	R := &G2Affine{
+		P: g2AffP{
+			X: *point[0],
+			Y: *point[1],
+		},
+	}
+	// Preserve the original hinted R for return value (before edge-case modifications)
+	originalR := R
+
+	// handle (0,0)-point and edge cases
+	var _selector0, _selector1 frontend.Variable
+	var sIsOne, sIsMinusOne frontend.Variable
+	_Q := Q
+	if cfg.CompleteArithmetic {
+		// Use different dummy points for _Q and R to avoid _Q == ±R
+		// which would cause incomplete addition failures in the table precomputations.
+		dummyQ := &G2Affine{P: *g2.g2Gen}
+		dummyR := &G2Affine{P: *g2.g2GenNbits}
+		// if Q=(0,0) we assign a dummy point
+		_selector0 = g2.api.And(g2.curveF.IsZero(&Q.P.X), g2.curveF.IsZero(&Q.P.Y))
+		_Q = g2.Select(_selector0, dummyQ, Q)
+		// if s=±1, R=±Q and incomplete addition fails (R.X == Q.X).
+		// We detect this from the constrained scalar, not from the unconstrained R.
+		sIsOne = g2.fr.IsZero(g2.fr.Sub(s, g2.fr.One()))
+		sIsMinusOne = g2.fr.IsZero(g2.fr.Add(s, g2.fr.One()))
+		_selector1 = g2.api.Or(sIsOne, sIsMinusOne)
+		// if s=0 (selector0), Q=(0,0) (_selector0), or s=±1 (_selector1),
+		// we assign a dummy point to R
+		selectorAny := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
+		R = g2.Select(selectorAny, dummyR, R)
+	}
+
+	// precompute -Q, -Φ(Q), Φ(Q)
+	var tableQ, tablePhiQ [2]*G2Affine
+	negQY := g2.curveF.Neg(&_Q.P.Y)
+	tableQ[1] = &G2Affine{
+		P: g2AffP{
+			X: _Q.P.X,
+			Y: *g2.curveF.Select(isNegu1, negQY, &_Q.P.Y),
+		},
+	}
+	tableQ[0] = g2.neg(tableQ[1])
+	// For BW6-761 G2, phi(Q) = (w * Q.X, Q.Y)
+	tablePhiQ[1] = &G2Affine{
+		P: g2AffP{
+			X: *g2.curveF.Mul(&_Q.P.X, g2.w),
+			Y: *g2.curveF.Select(isNegu2, negQY, &_Q.P.Y),
+		},
+	}
+	tablePhiQ[0] = g2.neg(tablePhiQ[1])
+
+	// precompute -R, -Φ(R), Φ(R)
+	var tableR, tablePhiR [2]*G2Affine
+	negRY := g2.curveF.Neg(&R.P.Y)
+	tableR[1] = &G2Affine{
+		P: g2AffP{
+			X: R.P.X,
+			Y: *g2.curveF.Select(isNegv1, negRY, &R.P.Y),
+		},
+	}
+	tableR[0] = g2.neg(tableR[1])
+	tablePhiR[1] = &G2Affine{
+		P: g2AffP{
+			X: *g2.curveF.Mul(&R.P.X, g2.w),
+			Y: *g2.curveF.Select(isNegv2, negRY, &R.P.Y),
+		},
+	}
+	tablePhiR[0] = g2.neg(tablePhiR[1])
+
+	// precompute -Q-R, Q+R, Q-R, -Q+R (combining the two points Q and R)
+	var tableS [4]*G2Affine
+	tableS[0] = g2.add(tableQ[0], tableR[0]) // -Q - R
+	tableS[1] = g2.neg(tableS[0])            // Q + R
+	tableS[2] = g2.add(tableQ[1], tableR[0]) // Q - R
+	tableS[3] = g2.neg(tableS[2])            // -Q + R
+
+	// precompute -Φ(Q)-Φ(R), Φ(Q)+Φ(R), Φ(Q)-Φ(R), -Φ(Q)+Φ(R) (combining endomorphisms)
+	var tablePhiS [4]*G2Affine
+	tablePhiS[0] = g2.add(tablePhiQ[0], tablePhiR[0]) // -Φ(Q) - Φ(R)
+	tablePhiS[1] = g2.neg(tablePhiS[0])               // Φ(Q) + Φ(R)
+	tablePhiS[2] = g2.add(tablePhiQ[1], tablePhiR[0]) // Φ(Q) - Φ(R)
+	tablePhiS[3] = g2.neg(tablePhiS[2])               // -Φ(Q) + Φ(R)
+
+	// Acc = Q + Φ(Q) + R + Φ(R)
+	Acc := g2.add(tableS[1], tablePhiS[1])
+	B1 := Acc
+
+	// Add G2 generator to Acc to avoid incomplete additions in the loop.
+	// At the end, since [u1]Q + [u2]Φ(Q) + [v1]R + [v2]Φ(R) = 0,
+	// Acc will equal [2^nbits]G2 (precomputed).
+	g2GenPoint := &G2Affine{P: *g2.g2Gen}
+	Acc = g2.add(Acc, g2GenPoint)
+
+	// u1, u2, v1, v2 < c*r^{1/4} where c ≈ 1.25
+	nbits := (st.Modulus().BitLen()+3)/4 + 2
+	u1bits := g2.fr.ToBits(u1)
+	u2bits := g2.fr.ToBits(u2)
+	v1bits := g2.fr.ToBits(v1)
+	v2bits := g2.fr.ToBits(v2)
+
+	// Precompute all 16 combinations: ±Q ± Φ(Q) ± R ± Φ(R)
+	// Using tableS (Q±R) and tablePhiS (Φ(Q)±Φ(R)) to match G1 pattern
+	// B1 = (Q+R) + (Φ(Q)+Φ(R)) = Q + R + Φ(Q) + Φ(R)
+	B2 := g2.add(tableS[1], tablePhiS[2]) // (Q+R) + (Φ(Q)-Φ(R)) = Q + R + Φ(Q) - Φ(R)
+	B3 := g2.add(tableS[1], tablePhiS[3]) // (Q+R) + (-Φ(Q)+Φ(R)) = Q + R - Φ(Q) + Φ(R)
+	B4 := g2.add(tableS[1], tablePhiS[0]) // (Q+R) + (-Φ(Q)-Φ(R)) = Q + R - Φ(Q) - Φ(R)
+	B5 := g2.add(tableS[2], tablePhiS[1]) // (Q-R) + (Φ(Q)+Φ(R)) = Q - R + Φ(Q) + Φ(R)
+	B6 := g2.add(tableS[2], tablePhiS[2]) // (Q-R) + (Φ(Q)-Φ(R)) = Q - R + Φ(Q) - Φ(R)
+	B7 := g2.add(tableS[2], tablePhiS[3]) // (Q-R) + (-Φ(Q)+Φ(R)) = Q - R - Φ(Q) + Φ(R)
+	B8 := g2.add(tableS[2], tablePhiS[0]) // (Q-R) + (-Φ(Q)-Φ(R)) = Q - R - Φ(Q) - Φ(R)
+	B9 := g2.neg(B8)                      // -Q + R + Φ(Q) + Φ(R)
+	B10 := g2.neg(B7)                     // -Q + R + Φ(Q) - Φ(R)
+	B11 := g2.neg(B6)                     // -Q + R - Φ(Q) + Φ(R)
+	B12 := g2.neg(B5)                     // -Q + R - Φ(Q) - Φ(R)
+	B13 := g2.neg(B4)                     // -Q - R + Φ(Q) + Φ(R)
+	B14 := g2.neg(B3)                     // -Q - R + Φ(Q) - Φ(R)
+	B15 := g2.neg(B2)                     // -Q - R - Φ(Q) + Φ(R)
+	B16 := g2.neg(B1)                     // -Q - R - Φ(Q) - Φ(R)
+
+	var Bi *G2Affine
+	for i := nbits - 1; i > 0; i-- {
+		// selectorY takes values in [0,15]
+		selectorY := g2.api.Add(
+			u1bits[i],
+			g2.api.Mul(u2bits[i], 2),
+			g2.api.Mul(v1bits[i], 4),
+			g2.api.Mul(v2bits[i], 8),
+		)
+		// selectorX takes values in [0,7] s.t.:
+		// 		- when selectorY < 8: selectorX = selectorY
+		// 		- when selectorY >= 8: selectorX = 15 - selectorY
+		selectorX := g2.api.Add(
+			g2.api.Mul(selectorY, g2.api.Sub(1, g2.api.Mul(v2bits[i], 2))),
+			g2.api.Mul(v2bits[i], 15),
+		)
+
+		// Bi.Y are distinct so we need a 16-to-1 multiplexer,
+		// but only half of the Bi.X are distinct so we need an 8-to-1.
+		Bi = &G2Affine{
+			P: g2AffP{
+				X: *g2.curveF.Mux(selectorX,
+					&B16.P.X, &B8.P.X, &B14.P.X, &B6.P.X, &B12.P.X, &B4.P.X, &B10.P.X, &B2.P.X,
+				),
+				Y: *g2.curveF.Mux(selectorY,
+					&B16.P.Y, &B8.P.Y, &B14.P.Y, &B6.P.Y, &B12.P.Y, &B4.P.Y, &B10.P.Y, &B2.P.Y,
+					&B15.P.Y, &B7.P.Y, &B13.P.Y, &B5.P.Y, &B11.P.Y, &B3.P.Y, &B9.P.Y, &B1.P.Y,
+				),
+			},
+		}
+		// Acc = [2]Acc + Bi
+		Acc = g2.doubleAndAdd(Acc, Bi)
+	}
+
+	// i = 0: subtract Q, Φ(Q), R, Φ(R) if the first bits are 0
+	tableQ[0] = g2.add(tableQ[0], Acc)
+	Acc = g2.Select(u1bits[0], Acc, tableQ[0])
+	tablePhiQ[0] = g2.add(tablePhiQ[0], Acc)
+	Acc = g2.Select(u2bits[0], Acc, tablePhiQ[0])
+	tableR[0] = g2.add(tableR[0], Acc)
+	Acc = g2.Select(v1bits[0], Acc, tableR[0])
+	tablePhiR[0] = g2.add(tablePhiR[0], Acc)
+	Acc = g2.Select(v2bits[0], Acc, tablePhiR[0])
+
+	// Acc should now be [2^(nbits-1)]G2 since [u1]Q + [u2]Φ(Q) + [v1]R + [v2]Φ(R) = 0
+	// and we added G2 to the initial accumulator.
+	expected := &G2Affine{P: *g2.g2GenNbits}
+
+	if cfg.CompleteArithmetic {
+		// if Q=(0,0), s=0, or R.X==Q.X, skip the check
+		skip := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
+		Acc = g2.Select(skip, expected, Acc)
+	}
+	g2.AssertIsEqual(Acc, expected)
+
+	if cfg.CompleteArithmetic {
+		zeroEl := g2.curveF.Zero()
+		zeroG2 := &G2Affine{P: g2AffP{X: *zeroEl, Y: *zeroEl}}
+		negQ := g2.neg(Q)
+		// s=-1 → -Q, else → R (constrained by MSM check above)
+		result := g2.Select(sIsMinusOne, negQ, originalR)
+		// s=1 → Q
+		result = g2.Select(sIsOne, Q, result)
+		// s=0 or Q=(0,0) → (0,0)
+		returnZero := g2.api.Or(selector0, _selector0)
+		return g2.Select(returnZero, zeroG2, result)
+	}
+
+	return R
+}
diff --git a/std/algebra/emulated/sw_bw6761/hints.go b/std/algebra/emulated/sw_bw6761/hints.go
index 4b88692b7c..9821a6b074 100644
--- a/std/algebra/emulated/sw_bw6761/hints.go
+++ b/std/algebra/emulated/sw_bw6761/hints.go
@@ -1,8 +1,11 @@
 package sw_bw6761
 
 import (
+	"fmt"
 	"math/big"
 
+	"github.com/consensys/gnark-crypto/algebra/lattice"
+	"github.com/consensys/gnark-crypto/ecc"
 	bw6761 "github.com/consensys/gnark-crypto/ecc/bw6-761"
 	"github.com/consensys/gnark/constraint/solver"
 	"github.com/consensys/gnark/std/math/emulated"
@@ -17,6 +20,9 @@ func GetHints() []solver.Hint {
 	return []solver.Hint{
 		finalExpHint,
 		pairingCheckHint,
+		decomposeScalarG1,
+		scalarMulG2Hint,
+		rationalReconstructExtG2,
 	}
 }
 
@@ -111,3 +117,133 @@ func finalExpWitness(millerLoop *bw6761.E6, mInv *big.Int) (residueWitness bw676
 
 	return residueWitness
 }
+
+func decomposeScalarG1(mod *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	return emulated.UnwrapHintContext(mod, inputs, outputs, func(hc emulated.HintContext) error {
+		moduli := hc.EmulatedModuli()
+		if len(moduli) != 1 {
+			return fmt.Errorf("expecting one moduli, got %d", len(moduli))
+		}
+		_, nativeOutputs := hc.NativeInputsOutputs()
+		if len(nativeOutputs) != 2 {
+			return fmt.Errorf("expecting two outputs, got %d", len(nativeOutputs))
+		}
+		emuInputs, emuOutputs := hc.InputsOutputs(moduli[0])
+		if len(emuInputs) != 2 {
+			return fmt.Errorf("expecting two inputs, got %d", len(emuInputs))
+		}
+		if len(emuOutputs) != 2 {
+			return fmt.Errorf("expecting two outputs, got %d", len(emuOutputs))
+		}
+
+		glvBasis := new(ecc.Lattice)
+		ecc.PrecomputeLattice(moduli[0], emuInputs[1], glvBasis)
+		sp := ecc.SplitScalar(emuInputs[0], glvBasis)
+		emuOutputs[0].Set(&sp[0])
+		emuOutputs[1].Set(&sp[1])
+		nativeOutputs[0].SetUint64(0)
+		nativeOutputs[1].SetUint64(0)
+		// we need the absolute values for the in-circuit computations,
+		// otherwise the negative values will be reduced modulo the SNARK scalar
+		// field and not the emulated field.
+		// 		output0 = |s0| mod r
+		// 		output1 = |s1| mod r
+		if emuOutputs[0].Sign() == -1 {
+			emuOutputs[0].Neg(emuOutputs[0])
+			nativeOutputs[0].SetUint64(1)
+		}
+		if emuOutputs[1].Sign() == -1 {
+			emuOutputs[1].Neg(emuOutputs[1])
+			nativeOutputs[1].SetUint64(1)
+		}
+
+		return nil
+	})
+}
+
+func scalarMulG2Hint(field *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	return emulated.UnwrapHintContext(field, inputs, outputs, func(hc emulated.HintContext) error {
+		moduli := hc.EmulatedModuli()
+		if len(moduli) != 2 {
+			return fmt.Errorf("expecting two moduli, got %d", len(moduli))
+		}
+		baseModulus, scalarModulus := moduli[0], moduli[1]
+		baseInputs, baseOutputs := hc.InputsOutputs(baseModulus)
+		scalarInputs, _ := hc.InputsOutputs(scalarModulus)
+		if len(baseInputs) != 2 {
+			return fmt.Errorf("expecting two base inputs (Q.X, Q.Y), got %d", len(baseInputs))
+		}
+		if len(baseOutputs) != 2 {
+			return fmt.Errorf("expecting two base outputs, got %d", len(baseOutputs))
+		}
+		if len(scalarInputs) != 1 {
+			return fmt.Errorf("expecting one scalar input, got %d", len(scalarInputs))
+		}
+
+		// compute the resulting point [s]Q on G2
+		var Q bw6761.G2Affine
+		Q.X.SetBigInt(baseInputs[0])
+		Q.Y.SetBigInt(baseInputs[1])
+		Q.ScalarMultiplication(&Q, scalarInputs[0])
+		Q.X.BigInt(baseOutputs[0])
+		Q.Y.BigInt(baseOutputs[1])
+		return nil
+	})
+}
+
+func rationalReconstructExtG2(mod *big.Int, inputs []*big.Int, outputs []*big.Int) error {
+	return emulated.UnwrapHintContext(mod, inputs, outputs, func(hc emulated.HintContext) error {
+		moduli := hc.EmulatedModuli()
+		if len(moduli) != 1 {
+			return fmt.Errorf("expecting one modulus, got %d", len(moduli))
+		}
+		_, nativeOutputs := hc.NativeInputsOutputs()
+		if len(nativeOutputs) != 4 {
+			return fmt.Errorf("expecting four outputs, got %d", len(nativeOutputs))
+		}
+		emuInputs, emuOutputs := hc.InputsOutputs(moduli[0])
+		if len(emuInputs) != 2 {
+			return fmt.Errorf("expecting two inputs, got %d", len(emuInputs))
+		}
+		if len(emuOutputs) != 4 {
+			return fmt.Errorf("expecting four outputs, got %d", len(emuOutputs))
+		}
+
+		// Use lattice reduction to find (x, y, z, t) such that
+		// k ≡ (x + λ*y) / (z + λ*t) (mod r)
+		//
+		// in-circuit we check that R - [s]Q = 0 or equivalently R + [-s]Q = 0
+		// so here we use k = -s.
+		k := new(big.Int).Neg(emuInputs[0])
+		k.Mod(k, moduli[0])
+		rc := lattice.NewReconstructor(moduli[0]).SetLambda(emuInputs[1])
+		res := rc.RationalReconstructExt(k)
+		x, y, z, t := res[0], res[1], res[2], res[3]
+
+		// u1 = x, u2 = y, v1 = z, v2 = t
+		emuOutputs[0].Abs(x)
+		emuOutputs[1].Abs(y)
+		emuOutputs[2].Abs(z)
+		emuOutputs[3].Abs(t)
+
+		// signs
+		nativeOutputs[0].SetUint64(0)
+		nativeOutputs[1].SetUint64(0)
+		nativeOutputs[2].SetUint64(0)
+		nativeOutputs[3].SetUint64(0)
+
+		if x.Sign() < 0 {
+			nativeOutputs[0].SetUint64(1)
+		}
+		if y.Sign() < 0 {
+			nativeOutputs[1].SetUint64(1)
+		}
+		if z.Sign() < 0 {
+			nativeOutputs[2].SetUint64(1)
+		}
+		if t.Sign() < 0 {
+			nativeOutputs[3].SetUint64(1)
+		}
+		return nil
+	})
+}

From 34211579483263ce2f3966f143733a4e574958de Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.elhousni@consensys.net>
Date: Fri, 8 May 2026 11:37:59 -0400
Subject: [PATCH 3/8] fix: remove unused BN254 G2 triple helper

---
 std/algebra/emulated/sw_bn254/g2.go | 39 -----------------------------
 1 file changed, 39 deletions(-)

diff --git a/std/algebra/emulated/sw_bn254/g2.go b/std/algebra/emulated/sw_bn254/g2.go
index e5cb5e08fa..1f69cd20e5 100644
--- a/std/algebra/emulated/sw_bn254/g2.go
+++ b/std/algebra/emulated/sw_bn254/g2.go
@@ -347,45 +347,6 @@ func (g2 *G2) Select(b frontend.Variable, p, q *G2Affine) *G2Affine {
 	}
 }
 
-func (g2 G2) triple(p *G2Affine) *G2Affine {
-	mone := g2.fp.NewElement(-1)
-
-	// compute λ1 = (3p.x²)/2p.y
-	xx := g2.Square(&p.P.X)
-	xx = g2.MulByConstElement(xx, big.NewInt(3))
-	y2 := g2.Double(&p.P.Y)
-	λ1 := g2.DivUnchecked(xx, y2)
-
-	// x2 = λ1²-2p.x
-	x20 := g2.fp.Eval([][]*baseEl{{&λ1.A0, &λ1.A0}, {mone, &λ1.A1, &λ1.A1}, {mone, &p.P.X.A0}}, []int{1, 1, 2})
-	x21 := g2.fp.Eval([][]*baseEl{{&λ1.A0, &λ1.A1}, {mone, &p.P.X.A1}}, []int{2, 2})
-	x2 := &fields_bn254.E2{A0: *x20, A1: *x21}
-
-	// omit y2 computation, and
-	// compute λ2 = 2p.y/(x2 − p.x) − λ1.
-	x1x2 := g2.Sub(&p.P.X, x2)
-	λ2 := g2.DivUnchecked(y2, x1x2)
-	λ2 = g2.Sub(λ2, λ1)
-
-	// compute x3 =λ2²-p.x-x2
-	x30 := g2.fp.Eval([][]*baseEl{{&λ2.A0, &λ2.A0}, {mone, &λ2.A1, &λ2.A1}, {mone, &p.P.X.A0}, {mone, x20}}, []int{1, 1, 1, 1})
-	x31 := g2.fp.Eval([][]*baseEl{{&λ2.A0, &λ2.A1}, {mone, &p.P.X.A1}, {mone, x21}}, []int{2, 1, 1})
-	x3 := &fields_bn254.E2{A0: *x30, A1: *x31}
-
-	// compute y3 = λ2*(p.x - x3)-p.y
-	y3 := g2.Ext2.Sub(&p.P.X, x3)
-	y30 := g2.fp.Eval([][]*baseEl{{&λ2.A0, &y3.A0}, {mone, &λ2.A1, &y3.A1}, {mone, &p.P.Y.A0}}, []int{1, 1, 1})
-	y31 := g2.fp.Eval([][]*baseEl{{&λ2.A0, &y3.A1}, {&λ2.A1, &y3.A0}, {mone, &p.P.Y.A1}}, []int{1, 1, 1})
-	y3 = &fields_bn254.E2{A0: *y30, A1: *y31}
-
-	return &G2Affine{
-		P: g2AffP{
-			X: *x3,
-			Y: *y3,
-		},
-	}
-}
-
 // ScalarMul computes [s]Q using an efficient endomorphism and returns it. It doesn't modify Q nor s.
 // It implements the GLV+fakeGLV optimization from [EEMP25] which achieves r^(1/4) bounds
 // on the sub-scalars, reducing the number of iterations in the scalar multiplication loop.

From 8e258f49d1b244c527829b8510d08cbd0e7244b9 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.elhousni@consensys.net>
Date: Fri, 15 May 2026 10:19:44 -0400
Subject: [PATCH 4/8] refactor: G2 to follow sw_emulated

---
 std/algebra/emulated/sw_bls12381/g2.go      | 106 ++++++++++-------
 std/algebra/emulated/sw_bls12381/g2_test.go |  56 ++++++++-
 std/algebra/emulated/sw_bn254/g2.go         | 121 ++++++++++----------
 std/algebra/emulated/sw_bn254/g2_test.go    |  61 ++++++++++
 std/algebra/emulated/sw_bw6761/g2.go        | 121 ++++++++++----------
 std/algebra/emulated/sw_bw6761/g2_test.go   |  72 ++++++++++++
 6 files changed, 368 insertions(+), 169 deletions(-)
 create mode 100644 std/algebra/emulated/sw_bw6761/g2_test.go

diff --git a/std/algebra/emulated/sw_bls12381/g2.go b/std/algebra/emulated/sw_bls12381/g2.go
index 742ce085b2..9454236f1f 100644
--- a/std/algebra/emulated/sw_bls12381/g2.go
+++ b/std/algebra/emulated/sw_bls12381/g2.go
@@ -7,11 +7,9 @@ import (
 	bls12381 "github.com/consensys/gnark-crypto/ecc/bls12-381"
 	"github.com/consensys/gnark-crypto/ecc/bls12-381/hash_to_curve"
 	"github.com/consensys/gnark/frontend"
-	"github.com/consensys/gnark/internal/compilelogger"
 	"github.com/consensys/gnark/std/algebra/algopts"
 	"github.com/consensys/gnark/std/algebra/emulated/fields_bls12381"
 	"github.com/consensys/gnark/std/math/emulated"
-	"github.com/rs/zerolog"
 )
 
 type G2 struct {
@@ -641,8 +639,15 @@ func (g2 *G2) scalarMulGeneric(p *G2Affine, s *Scalar, opts ...algopts.AlgebraOp
 // ScalarMul computes [s]Q using GLV+FakeGLV with proven r^(1/4) sub-scalar
 // bounds (LLL Hermite). Routes through scalarMulGLVAndFakeGLV.
 //
+// Q is assumed to be in the prime-order G2 subgroup; this method does not check
+// subgroup membership for arbitrary twist points.
+//
 // This method is complete by default.
-// [algopts.WithIncompleteArithmetic] is deprecated here and ignored.
+//
+// ⚠️  When [algopts.WithIncompleteArithmetic] is set, this method is faster but
+// not complete. Besides Q=(0,0) and s in {0, ±1}, there is a sparse
+// point-dependent exceptional set coming from incomplete precomputations and the
+// initial bias step. This mode is intended for random non-adversarial inputs.
 // (0,0) is not on the curve but we conventionally take it as the
 // neutral/infinity point as per the [EVM].
 //
@@ -665,7 +670,11 @@ func (g2 *G2) ScalarMul(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *
 // bits — about a quarter of the iteration count of plain GLV.
 //
 // This method is complete by default.
-// [algopts.WithIncompleteArithmetic] is deprecated here and ignored.
+//
+// ⚠️  When [algopts.WithIncompleteArithmetic] is set, this method is faster but
+// not complete. Besides Q=(0,0) and s in {0, ±1}, there is a sparse
+// point-dependent exceptional set coming from incomplete precomputations and the
+// initial bias step. This mode is intended for random non-adversarial inputs.
 //
 // [EEMP25]: https://eprint.iacr.org/2025/933
 func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
@@ -673,15 +682,18 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 	if err != nil {
 		panic(err)
 	}
-	if cfg.IncompleteArithmetic {
-		compilelogger.LogOnce(g2.api.Compiler(), zerolog.InfoLevel,
-			"sw_bls12_381/scalarMulGLVAndFakeGLV", "WithIncompleteArithmetic is deprecated in (*sw_bls12381.G2).scalarMulGLVAndFakeGLV and complete arithmetic is always used")
-	}
 
-	// Handle s = 0 by routing through s = 1 and overriding the result later.
-	one := g2.fr.One()
-	selector0 := g2.fr.IsZero(s)
-	_s := g2.fr.Select(selector0, one, s)
+	// handle 0-scalar and (-1)-scalar cases
+	var isScalarZero, isScalarZeroOrMinusOne, isScalarOne, isScalarMinusOne frontend.Variable
+	_s := s
+	if !cfg.IncompleteArithmetic {
+		isScalarZero = g2.fr.IsZero(s)
+		one := g2.fr.One()
+		isScalarOne = g2.fr.IsZero(g2.fr.Sub(s, one))
+		isScalarMinusOne = g2.fr.IsZero(g2.fr.Add(s, one))
+		isScalarZeroOrMinusOne = g2.api.Or(isScalarZero, isScalarMinusOne)
+		_s = g2.fr.Select(isScalarZeroOrMinusOne, one, s)
+	}
 
 	// Decompose s into (u1, u2, v1, v2) via LLL: s·(v1 + λ·v2) + u1 + λ·u2 ≡ 0
 	// (mod r), with each sub-scalar bounded by ~r^(1/4).
@@ -734,21 +746,17 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 	}
 	originalR := R // preserve the unmodified hint output for the return value
 
-	// Edge cases: Q = (0,0), s = 0, s = ±1 force R into a relation with Q
-	// that the incomplete table precomputations can't handle. Substitute
-	// dummy points and reconstruct the canonical result at the end.
-	dummyQ := &G2Affine{P: *g2.g2Gen}
-	dummyR := &G2Affine{P: *g2.g2GenNbits}
-
-	_selector0 := g2.api.And(g2.Ext2.IsZero(&Q.P.X), g2.Ext2.IsZero(&Q.P.Y))
-	_Q := g2.Select(_selector0, dummyQ, Q)
-
-	sIsOne := g2.fr.IsZero(g2.fr.Sub(s, g2.fr.One()))
-	sIsMinusOne := g2.fr.IsZero(g2.fr.Add(s, g2.fr.One()))
-	_selector1 := g2.api.Or(sIsOne, sIsMinusOne)
-
-	selectorAny := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
-	R = g2.Select(selectorAny, dummyR, R)
+	// handle (0,0)-point and scalar edge cases
+	var isInputPointAtInfinity frontend.Variable
+	_Q := Q
+	if !cfg.IncompleteArithmetic {
+		dummyQ := &G2Affine{P: *g2.g2Gen}
+		dummyR := &G2Affine{P: *g2.g2GenNbits}
+		R = g2.Select(isScalarZeroOrMinusOne, dummyR, R)
+		isInputPointAtInfinity = g2.api.And(g2.Ext2.IsZero(&Q.P.X), g2.Ext2.IsZero(&Q.P.Y))
+		_Q = g2.Select(isInputPointAtInfinity, dummyQ, Q)
+		R = g2.Select(isScalarOne, dummyR, R)
+	}
 
 	// Precompute -Q, -Φ(Q), Φ(Q).
 	var tableQ, tablePhiQ [2]*G2Affine
@@ -872,28 +880,34 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 	// At this point Acc must equal [2^(nbits-1)]G2 (the bias we added).
 	expected := &G2Affine{P: *g2.g2GenNbits}
 
-	// Skip the assertion on edge branches (where R is the dummy).
-	skip := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
-	Acc = g2.Select(skip, expected, Acc)
+	if !cfg.IncompleteArithmetic {
+		Acc = g2.Select(g2.api.Or(g2.api.Or(isScalarZeroOrMinusOne, isInputPointAtInfinity), isScalarOne), expected, Acc)
+	}
 	g2.AssertIsEqual(Acc, expected)
 
-	// Reconstruct the canonical result: hint output for the regular path,
-	// constants on the edge branches.
-	zeroE2 := g2.Ext2.Zero()
-	zeroG2 := &G2Affine{P: g2AffP{X: *zeroE2, Y: *zeroE2}}
-	negQ := g2.neg(Q)
-	result := g2.Select(sIsMinusOne, negQ, originalR)
-	result = g2.Select(sIsOne, Q, result)
-	returnZero := g2.api.Or(selector0, _selector0)
-	return g2.Select(returnZero, zeroG2, result)
+	if !cfg.IncompleteArithmetic {
+		zeroE2 := g2.Ext2.Zero()
+		zeroG2 := &G2Affine{P: g2AffP{X: *zeroE2, Y: *zeroE2}}
+		result := g2.Select(isScalarOne, Q, originalR)
+		result = g2.Select(isScalarZeroOrMinusOne, g2.neg(Q), result)
+		result = g2.Select(isScalarZero, zeroG2, result)
+		result = g2.Select(isInputPointAtInfinity, zeroG2, result)
+		return result
+	}
+	return R
 }
 
 // MultiScalarMul computes the multi scalar multiplication of the points P and
 // scalars s. It returns an error if the length of the slices mismatch. If the
 // input slices are empty, then returns point at infinity.
 //
-// This method is complete by default.
-// [algopts.WithIncompleteArithmetic] is deprecated here and ignored.
+// By default, uses complete arithmetic which correctly handles zero scalars and
+// points at infinity.
+//
+// ⚠️  When [algopts.WithIncompleteArithmetic] is set, this method is faster but
+// not complete. It inherits the exceptional sets of the underlying scalar-mul
+// calls and additionally depends on internal accumulator collisions, so the
+// incomplete exceptional set is not fully characterized at the API level.
 func (g2 *G2) MultiScalarMul(p []*G2Affine, s []*Scalar, opts ...algopts.AlgebraOption) (*G2Affine, error) {
 
 	if len(p) == 0 {
@@ -909,6 +923,10 @@ func (g2 *G2) MultiScalarMul(p []*G2Affine, s []*Scalar, opts ...algopts.Algebra
 	if err != nil {
 		return nil, fmt.Errorf("new config: %w", err)
 	}
+	addFn := g2.add
+	if !cfg.IncompleteArithmetic {
+		addFn = g2.AddUnified
+	}
 	if !cfg.FoldMulti {
 		// the scalars are unique
 		if len(p) != len(s) {
@@ -918,7 +936,7 @@ func (g2 *G2) MultiScalarMul(p []*G2Affine, s []*Scalar, opts ...algopts.Algebra
 		res := g2.ScalarMul(p[0], s[0], opts...)
 		for i := 1; i < n; i++ {
 			q := g2.ScalarMul(p[i], s[i], opts...)
-			res = g2.AddUnified(res, q)
+			res = addFn(res, q)
 		}
 		return res, nil
 	} else {
@@ -929,10 +947,10 @@ func (g2 *G2) MultiScalarMul(p []*G2Affine, s []*Scalar, opts ...algopts.Algebra
 		gamma := s[0]
 		res := g2.ScalarMul(p[len(p)-1], gamma, opts...)
 		for i := len(p) - 2; i > 0; i-- {
-			res = g2.AddUnified(p[i], res)
+			res = addFn(p[i], res)
 			res = g2.ScalarMul(res, gamma, opts...)
 		}
-		res = g2.AddUnified(p[0], res)
+		res = addFn(p[0], res)
 		return res, nil
 	}
 }
diff --git a/std/algebra/emulated/sw_bls12381/g2_test.go b/std/algebra/emulated/sw_bls12381/g2_test.go
index 0b4a08e4e9..25ec197e9e 100644
--- a/std/algebra/emulated/sw_bls12381/g2_test.go
+++ b/std/algebra/emulated/sw_bls12381/g2_test.go
@@ -10,6 +10,7 @@ import (
 	"github.com/consensys/gnark-crypto/ecc/bls12-381/fp"
 	fr_bls12381 "github.com/consensys/gnark-crypto/ecc/bls12-381/fr"
 	"github.com/consensys/gnark/frontend"
+	"github.com/consensys/gnark/std/algebra/algopts"
 	"github.com/consensys/gnark/std/algebra/emulated/fields_bls12381"
 	"github.com/consensys/gnark/std/math/emulated"
 	"github.com/consensys/gnark/test"
@@ -18,6 +19,9 @@ import (
 type mulG2Circuit struct {
 	In, Res G2Affine
 	S       Scalar
+
+	incompleteArithmetic bool
+	skipGeneric          bool
 }
 
 func (c *mulG2Circuit) Define(api frontend.API) error {
@@ -25,10 +29,16 @@ func (c *mulG2Circuit) Define(api frontend.API) error {
 	if err != nil {
 		return fmt.Errorf("new G2 struct: %w", err)
 	}
-	res1 := g2.ScalarMul(&c.In, &c.S)
-	res2 := g2.scalarMulGeneric(&c.In, &c.S)
+	opts := []algopts.AlgebraOption{}
+	if c.incompleteArithmetic {
+		opts = append(opts, algopts.WithIncompleteArithmetic())
+	}
+	res1 := g2.ScalarMul(&c.In, &c.S, opts...)
 	g2.AssertIsEqual(res1, &c.Res)
-	g2.AssertIsEqual(res2, &c.Res)
+	if !c.skipGeneric {
+		res2 := g2.scalarMulGeneric(&c.In, &c.S)
+		g2.AssertIsEqual(res2, &c.Res)
+	}
 	return nil
 }
 
@@ -51,6 +61,46 @@ func TestScalarMulG2TestSolve(t *testing.T) {
 	assert.NoError(err)
 }
 
+func TestScalarMulG2EdgeCases(t *testing.T) {
+	_, _, _, gen := bls12381.Generators()
+	var zero, negGen, sevenGen bls12381.G2Affine
+	negGen.Neg(&gen)
+	sevenGen.ScalarMultiplication(&gen, big.NewInt(7))
+
+	testCases := []struct {
+		name                 string
+		point                bls12381.G2Affine
+		scalar               *big.Int
+		expected             bls12381.G2Affine
+		incompleteArithmetic bool
+	}{
+		{name: "zero-scalar", point: gen, scalar: big.NewInt(0), expected: zero},
+		{name: "one", point: gen, scalar: big.NewInt(1), expected: gen},
+		{name: "minus-one", point: gen, scalar: big.NewInt(-1), expected: negGen},
+		{name: "zero-point", point: zero, scalar: big.NewInt(7), expected: zero},
+		{name: "incomplete-option", point: gen, scalar: big.NewInt(7), expected: sevenGen, incompleteArithmetic: true},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			assert := test.NewAssert(t)
+			circuit := mulG2Circuit{
+				incompleteArithmetic: tc.incompleteArithmetic,
+				skipGeneric:          true,
+			}
+			witness := mulG2Circuit{
+				In:                   NewG2Affine(tc.point),
+				S:                    emulated.ValueOf[ScalarField](tc.scalar),
+				Res:                  NewG2Affine(tc.expected),
+				incompleteArithmetic: tc.incompleteArithmetic,
+				skipGeneric:          true,
+			}
+			err := test.IsSolved(&circuit, &witness, ecc.BN254.ScalarField())
+			assert.NoError(err)
+		})
+	}
+}
+
 type addG2Circuit struct {
 	In1, In2   G2Affine
 	Res        G2Affine
diff --git a/std/algebra/emulated/sw_bn254/g2.go b/std/algebra/emulated/sw_bn254/g2.go
index 1f69cd20e5..a638aa1c93 100644
--- a/std/algebra/emulated/sw_bn254/g2.go
+++ b/std/algebra/emulated/sw_bn254/g2.go
@@ -347,24 +347,45 @@ func (g2 *G2) Select(b frontend.Variable, p, q *G2Affine) *G2Affine {
 	}
 }
 
-// ScalarMul computes [s]Q using an efficient endomorphism and returns it. It doesn't modify Q nor s.
-// It implements the GLV+fakeGLV optimization from [EEMP25] which achieves r^(1/4) bounds
-// on the sub-scalars, reducing the number of iterations in the scalar multiplication loop.
+// ScalarMul computes [s]Q using GLV+FakeGLV with proven r^(1/4) sub-scalar
+// bounds (LLL Hermite). Routes through scalarMulGLVAndFakeGLV.
 //
-// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// Q is assumed to be in the prime-order G2 subgroup; this method does not check
+// subgroup membership for arbitrary twist points.
+//
+// This method is complete by default.
+//
+// ⚠️  When [algopts.WithIncompleteArithmetic] is set, this method is faster but
+// not complete. Besides Q=(0,0) and s in {0, ±1}, there is a sparse
+// point-dependent exceptional set coming from incomplete precomputations and the
+// initial bias step. This mode is intended for random non-adversarial inputs.
 // (0,0) is not on the curve but we conventionally take it as the
 // neutral/infinity point as per the [EVM].
 //
-// [EEMP25]: https://eprint.iacr.org/2025/933
 // [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
+//
+// [EEMP25]: https://eprint.iacr.org/2025/933
 func (g2 *G2) ScalarMul(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
 	return g2.scalarMulGLVAndFakeGLV(Q, s, opts...)
 }
 
-// scalarMulGLVAndFakeGLV computes [s]Q using GLV+fakeGLV with r^(1/4) bounds.
+// scalarMulGLVAndFakeGLV computes [s]Q using GLV+FakeGLV with r^(1/4) bounds.
 // It implements the "GLV + fake GLV" explained in [EEMP25] (Sec. 3.3).
 //
-// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// We hint the result R = [s]Q and verify the equation
+//
+//	[v1]R + [v2]Φ(R) + [u1]Q + [u2]Φ(Q) = O
+//
+// where (u1, u2, v1, v2) is the LLL-reduced 4-D Eisenstein decomposition of −s
+// against the GLV eigenvalue λ, so each sub-scalar fits in roughly r^(1/4)
+// bits — about a quarter of the iteration count of plain GLV.
+//
+// This method is complete by default.
+//
+// ⚠️  When [algopts.WithIncompleteArithmetic] is set, this method is faster but
+// not complete. Besides Q=(0,0) and s in {0, ±1}, there is a sparse
+// point-dependent exceptional set coming from incomplete precomputations and the
+// initial bias step. This mode is intended for random non-adversarial inputs.
 //
 // [EEMP25]: https://eprint.iacr.org/2025/933
 func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
@@ -373,25 +394,20 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 		panic(err)
 	}
 
-	// handle 0-scalar
-	var selector0 frontend.Variable
+	// handle 0-scalar and (-1)-scalar cases
+	var isScalarZero, isScalarZeroOrMinusOne, isScalarOne, isScalarMinusOne frontend.Variable
 	_s := s
-	if cfg.CompleteArithmetic {
+	if !cfg.IncompleteArithmetic {
+		isScalarZero = g2.fr.IsZero(s)
 		one := g2.fr.One()
-		selector0 = g2.fr.IsZero(s)
-		_s = g2.fr.Select(selector0, one, s)
+		isScalarOne = g2.fr.IsZero(g2.fr.Sub(s, one))
+		isScalarMinusOne = g2.fr.IsZero(g2.fr.Add(s, one))
+		isScalarZeroOrMinusOne = g2.api.Or(isScalarZero, isScalarMinusOne)
+		_s = g2.fr.Select(isScalarZeroOrMinusOne, one, s)
 	}
 
-	// Instead of computing [s]Q=R, we check that R-[s]Q == 0.
-	// This is equivalent to [v]R + [-s*v]Q = 0 for some nonzero v.
-	//
-	// Using LLL-based lattice reduction we find small sub-scalars:
-	// 		[v1 + λ*v2]R + [u1 + λ*u2]Q = 0
-	// 		[v1]R + [v2]Φ(R) + [u1]Q + [u2]Φ(Q) = 0
-	//
-	// where u1, u2, v1, v2 < c*r^{1/4} with c ≈ 1.25 (proven bound from LLL).
-
-	// decompose s into u1, u2, v1, v2
+	// Decompose s into (u1, u2, v1, v2) via LLL: s·(v1 + λ·v2) + u1 + λ·u2 ≡ 0
+	// (mod r), with each sub-scalar bounded by ~r^(1/4).
 	signs, sd, err := g2.fr.NewHintGeneric(rationalReconstructExtG2, 4, 4, nil, []*emulated.Element[ScalarField]{_s, g2.eigenvalue})
 	if err != nil {
 		panic(fmt.Sprintf("rationalReconstructExtG2 hint: %v", err))
@@ -399,7 +415,7 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 	u1, u2, v1, v2 := sd[0], sd[1], sd[2], sd[3]
 	isNegu1, isNegu2, isNegv1, isNegv2 := signs[0], signs[1], signs[2], signs[3]
 
-	// Check that: s*(v1 + λ*v2) + u1 + λ*u2 = 0
+	// Verify s·(v1 + λ·v2) + u1 + λ·u2 ≡ 0 (mod r).
 	var st ScalarField
 	sv1 := g2.fr.Mul(_s, v1)
 	sλv2 := g2.fr.Mul(_s, g2.fr.Mul(g2.eigenvalue, v2))
@@ -426,11 +442,12 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 
 	g2.fr.AssertIsEqual(lhs, rhs)
 
-	// Ensure the denominator v1 + λ*v2 is non-zero to prevent trivial decomposition
-	den := g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2))
-	g2.fr.AssertIsDifferent(den, g2.fr.Zero())
+	// Soundness: forbid the trivial all-zeros decomposition. With v1 + λ·v2 ≠ 0
+	// the relation forces (u1, u2) to the unique LLL lattice point and pins
+	// the hinted R = [s]Q.
+	g2.fr.AssertIsDifferent(g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2)), g2.fr.Zero())
 
-	// Hint the scalar multiplication R = [s]Q
+	// Hint R = [s]Q.
 	_, point, _, err := emulated.NewVarGenericHint(g2.api, 0, 4, 0, nil,
 		[]*emulated.Element[BaseField]{&Q.P.X.A0, &Q.P.X.A1, &Q.P.Y.A0, &Q.P.Y.A1},
 		[]*emulated.Element[ScalarField]{s},
@@ -444,30 +461,18 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 			Y: fields_bn254.E2{A0: *point[2], A1: *point[3]},
 		},
 	}
-	// Preserve the original hinted R for return value (before edge-case modifications)
-	originalR := R
+	originalR := R // preserve the unmodified hint output for the return value
 
-	// handle (0,0)-point and edge cases
-	var _selector0, _selector1 frontend.Variable
-	var sIsOne, sIsMinusOne frontend.Variable
+	// handle (0,0)-point and scalar edge cases
+	var isInputPointAtInfinity frontend.Variable
 	_Q := Q
-	if cfg.CompleteArithmetic {
-		// Use different dummy points for _Q and R to avoid _Q == ±R
-		// which would cause incomplete addition failures in the table precomputations.
+	if !cfg.IncompleteArithmetic {
 		dummyQ := &G2Affine{P: *g2.g2Gen}
 		dummyR := &G2Affine{P: *g2.g2GenNbits}
-		// if Q=(0,0) we assign a dummy point
-		_selector0 = g2.api.And(g2.Ext2.IsZero(&Q.P.X), g2.Ext2.IsZero(&Q.P.Y))
-		_Q = g2.Select(_selector0, dummyQ, Q)
-		// if s=±1, R=±Q and incomplete addition fails (R.X == Q.X).
-		// We detect this from the constrained scalar, not from the unconstrained R.
-		sIsOne = g2.fr.IsZero(g2.fr.Sub(s, g2.fr.One()))
-		sIsMinusOne = g2.fr.IsZero(g2.fr.Add(s, g2.fr.One()))
-		_selector1 = g2.api.Or(sIsOne, sIsMinusOne)
-		// if s=0 (selector0), Q=(0,0) (_selector0), or s=±1 (_selector1),
-		// we assign a dummy point to R
-		selectorAny := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
-		R = g2.Select(selectorAny, dummyR, R)
+		R = g2.Select(isScalarZeroOrMinusOne, dummyR, R)
+		isInputPointAtInfinity = g2.api.And(g2.Ext2.IsZero(&Q.P.X), g2.Ext2.IsZero(&Q.P.Y))
+		_Q = g2.Select(isInputPointAtInfinity, dummyQ, Q)
+		R = g2.Select(isScalarOne, dummyR, R)
 	}
 
 	// precompute -Q, -Φ(Q), Φ(Q)
@@ -616,25 +621,19 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 	// and we added G2 to the initial accumulator.
 	expected := &G2Affine{P: *g2.g2GenNbits}
 
-	if cfg.CompleteArithmetic {
-		// if Q=(0,0), s=0, or R.X==Q.X, skip the check
-		skip := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
-		Acc = g2.Select(skip, expected, Acc)
+	if !cfg.IncompleteArithmetic {
+		Acc = g2.Select(g2.api.Or(g2.api.Or(isScalarZeroOrMinusOne, isInputPointAtInfinity), isScalarOne), expected, Acc)
 	}
 	g2.AssertIsEqual(Acc, expected)
 
-	if cfg.CompleteArithmetic {
+	if !cfg.IncompleteArithmetic {
 		zeroE2 := g2.Ext2.Zero()
 		zeroG2 := &G2Affine{P: g2AffP{X: *zeroE2, Y: *zeroE2}}
-		negQ := g2.neg(Q)
-		// s=-1 → -Q, else → R (constrained by MSM check above)
-		result := g2.Select(sIsMinusOne, negQ, originalR)
-		// s=1 → Q
-		result = g2.Select(sIsOne, Q, result)
-		// s=0 or Q=(0,0) → (0,0)
-		returnZero := g2.api.Or(selector0, _selector0)
-		return g2.Select(returnZero, zeroG2, result)
+		result := g2.Select(isScalarOne, Q, originalR)
+		result = g2.Select(isScalarZeroOrMinusOne, g2.neg(Q), result)
+		result = g2.Select(isScalarZero, zeroG2, result)
+		result = g2.Select(isInputPointAtInfinity, zeroG2, result)
+		return result
 	}
-
 	return R
 }
diff --git a/std/algebra/emulated/sw_bn254/g2_test.go b/std/algebra/emulated/sw_bn254/g2_test.go
index 812e21e0e0..8eb94ad1b2 100644
--- a/std/algebra/emulated/sw_bn254/g2_test.go
+++ b/std/algebra/emulated/sw_bn254/g2_test.go
@@ -7,6 +7,8 @@ import (
 	"github.com/consensys/gnark-crypto/ecc"
 	"github.com/consensys/gnark-crypto/ecc/bn254"
 	"github.com/consensys/gnark/frontend"
+	"github.com/consensys/gnark/std/algebra/algopts"
+	"github.com/consensys/gnark/std/math/emulated"
 	"github.com/consensys/gnark/test"
 )
 
@@ -102,6 +104,65 @@ func TestDoubleAndAddG2TestSolve(t *testing.T) {
 	assert.NoError(err)
 }
 
+type mulG2Circuit struct {
+	In, Res G2Affine
+	S       Scalar
+
+	incompleteArithmetic bool
+}
+
+func (c *mulG2Circuit) Define(api frontend.API) error {
+	g2, err := NewG2(api)
+	if err != nil {
+		panic(err)
+	}
+	opts := []algopts.AlgebraOption{}
+	if c.incompleteArithmetic {
+		opts = append(opts, algopts.WithIncompleteArithmetic())
+	}
+	res := g2.ScalarMul(&c.In, &c.S, opts...)
+	g2.AssertIsEqual(res, &c.Res)
+	return nil
+}
+
+func TestScalarMulG2EdgeCases(t *testing.T) {
+	_, _, _, gen := bn254.Generators()
+	var zero, negGen, sevenGen bn254.G2Affine
+	negGen.Neg(&gen)
+	sevenGen.ScalarMultiplication(&gen, big.NewInt(7))
+
+	testCases := []struct {
+		name                 string
+		point                bn254.G2Affine
+		scalar               *big.Int
+		expected             bn254.G2Affine
+		incompleteArithmetic bool
+	}{
+		{name: "zero-scalar", point: gen, scalar: big.NewInt(0), expected: zero},
+		{name: "one", point: gen, scalar: big.NewInt(1), expected: gen},
+		{name: "minus-one", point: gen, scalar: big.NewInt(-1), expected: negGen},
+		{name: "zero-point", point: zero, scalar: big.NewInt(7), expected: zero},
+		{name: "incomplete-option", point: gen, scalar: big.NewInt(7), expected: sevenGen, incompleteArithmetic: true},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			assert := test.NewAssert(t)
+			circuit := mulG2Circuit{
+				incompleteArithmetic: tc.incompleteArithmetic,
+			}
+			witness := mulG2Circuit{
+				In:                   NewG2Affine(tc.point),
+				S:                    emulated.ValueOf[ScalarField](tc.scalar),
+				Res:                  NewG2Affine(tc.expected),
+				incompleteArithmetic: tc.incompleteArithmetic,
+			}
+			err := test.IsSolved(&circuit, &witness, ecc.BN254.ScalarField())
+			assert.NoError(err)
+		})
+	}
+}
+
 type scalarMulG2BySeedCircuit struct {
 	In1 G2Affine
 	Res G2Affine
diff --git a/std/algebra/emulated/sw_bw6761/g2.go b/std/algebra/emulated/sw_bw6761/g2.go
index 8732893800..bc38602901 100644
--- a/std/algebra/emulated/sw_bw6761/g2.go
+++ b/std/algebra/emulated/sw_bw6761/g2.go
@@ -283,24 +283,45 @@ func (g2 *G2) Select(b frontend.Variable, p, q *G2Affine) *G2Affine {
 	}
 }
 
-// ScalarMul computes [s]Q using an efficient endomorphism and returns it. It doesn't modify Q nor s.
-// It implements the GLV+fakeGLV optimization from [EEMP25] which achieves r^(1/4) bounds
-// on the sub-scalars, reducing the number of iterations in the scalar multiplication loop.
+// ScalarMul computes [s]Q using GLV+FakeGLV with proven r^(1/4) sub-scalar
+// bounds (LLL Hermite). Routes through scalarMulGLVAndFakeGLV.
 //
-// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// Q is assumed to be in the prime-order G2 subgroup; this method does not check
+// subgroup membership for arbitrary twist points.
+//
+// This method is complete by default.
+//
+// ⚠️  When [algopts.WithIncompleteArithmetic] is set, this method is faster but
+// not complete. Besides Q=(0,0) and s in {0, ±1}, there is a sparse
+// point-dependent exceptional set coming from incomplete precomputations and the
+// initial bias step. This mode is intended for random non-adversarial inputs.
 // (0,0) is not on the curve but we conventionally take it as the
 // neutral/infinity point as per the [EVM].
 //
-// [EEMP25]: https://eprint.iacr.org/2025/933
 // [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
+//
+// [EEMP25]: https://eprint.iacr.org/2025/933
 func (g2 *G2) ScalarMul(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
 	return g2.scalarMulGLVAndFakeGLV(Q, s, opts...)
 }
 
-// scalarMulGLVAndFakeGLV computes [s]Q using GLV+fakeGLV with r^(1/4) bounds.
+// scalarMulGLVAndFakeGLV computes [s]Q using GLV+FakeGLV with r^(1/4) bounds.
 // It implements the "GLV + fake GLV" explained in [EEMP25] (Sec. 3.3).
 //
-// ⚠️  The scalar s must be nonzero and the point Q different from (0,0) unless [algopts.WithCompleteArithmetic] is set.
+// We hint the result R = [s]Q and verify the equation
+//
+//	[v1]R + [v2]Φ(R) + [u1]Q + [u2]Φ(Q) = O
+//
+// where (u1, u2, v1, v2) is the LLL-reduced 4-D Eisenstein decomposition of −s
+// against the GLV eigenvalue λ, so each sub-scalar fits in roughly r^(1/4)
+// bits — about a quarter of the iteration count of plain GLV.
+//
+// This method is complete by default.
+//
+// ⚠️  When [algopts.WithIncompleteArithmetic] is set, this method is faster but
+// not complete. Besides Q=(0,0) and s in {0, ±1}, there is a sparse
+// point-dependent exceptional set coming from incomplete precomputations and the
+// initial bias step. This mode is intended for random non-adversarial inputs.
 //
 // [EEMP25]: https://eprint.iacr.org/2025/933
 func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.AlgebraOption) *G2Affine {
@@ -309,25 +330,20 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 		panic(err)
 	}
 
-	// handle 0-scalar
-	var selector0 frontend.Variable
+	// handle 0-scalar and (-1)-scalar cases
+	var isScalarZero, isScalarZeroOrMinusOne, isScalarOne, isScalarMinusOne frontend.Variable
 	_s := s
-	if cfg.CompleteArithmetic {
+	if !cfg.IncompleteArithmetic {
+		isScalarZero = g2.fr.IsZero(s)
 		one := g2.fr.One()
-		selector0 = g2.fr.IsZero(s)
-		_s = g2.fr.Select(selector0, one, s)
+		isScalarOne = g2.fr.IsZero(g2.fr.Sub(s, one))
+		isScalarMinusOne = g2.fr.IsZero(g2.fr.Add(s, one))
+		isScalarZeroOrMinusOne = g2.api.Or(isScalarZero, isScalarMinusOne)
+		_s = g2.fr.Select(isScalarZeroOrMinusOne, one, s)
 	}
 
-	// Instead of computing [s]Q=R, we check that R-[s]Q == 0.
-	// This is equivalent to [v]R + [-s*v]Q = 0 for some nonzero v.
-	//
-	// Using LLL-based lattice reduction we find small sub-scalars:
-	// 		[v1 + λ*v2]R + [u1 + λ*u2]Q = 0
-	// 		[v1]R + [v2]Φ(R) + [u1]Q + [u2]Φ(Q) = 0
-	//
-	// where u1, u2, v1, v2 < c*r^{1/4} with c ≈ 1.25 (proven bound from LLL).
-
-	// decompose s into u1, u2, v1, v2
+	// Decompose s into (u1, u2, v1, v2) via LLL: s·(v1 + λ·v2) + u1 + λ·u2 ≡ 0
+	// (mod r), with each sub-scalar bounded by ~r^(1/4).
 	signs, sd, err := g2.fr.NewHintGeneric(rationalReconstructExtG2, 4, 4, nil, []*emulated.Element[ScalarField]{_s, g2.eigenvalue})
 	if err != nil {
 		panic(fmt.Sprintf("rationalReconstructExtG2 hint: %v", err))
@@ -335,7 +351,7 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 	u1, u2, v1, v2 := sd[0], sd[1], sd[2], sd[3]
 	isNegu1, isNegu2, isNegv1, isNegv2 := signs[0], signs[1], signs[2], signs[3]
 
-	// Check that: s*(v1 + λ*v2) + u1 + λ*u2 = 0
+	// Verify s·(v1 + λ·v2) + u1 + λ·u2 ≡ 0 (mod r).
 	var st ScalarField
 	sv1 := g2.fr.Mul(_s, v1)
 	sλv2 := g2.fr.Mul(_s, g2.fr.Mul(g2.eigenvalue, v2))
@@ -362,11 +378,12 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 
 	g2.fr.AssertIsEqual(lhs, rhs)
 
-	// Ensure the denominator v1 + λ*v2 is non-zero to prevent trivial decomposition
-	den := g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2))
-	g2.fr.AssertIsDifferent(den, g2.fr.Zero())
+	// Soundness: forbid the trivial all-zeros decomposition. With v1 + λ·v2 ≠ 0
+	// the relation forces (u1, u2) to the unique LLL lattice point and pins
+	// the hinted R = [s]Q.
+	g2.fr.AssertIsDifferent(g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2)), g2.fr.Zero())
 
-	// Hint the scalar multiplication R = [s]Q
+	// Hint R = [s]Q.
 	_, point, _, err := emulated.NewVarGenericHint(g2.api, 0, 2, 0, nil,
 		[]*emulated.Element[BaseField]{&Q.P.X, &Q.P.Y},
 		[]*emulated.Element[ScalarField]{s},
@@ -380,30 +397,18 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 			Y: *point[1],
 		},
 	}
-	// Preserve the original hinted R for return value (before edge-case modifications)
-	originalR := R
+	originalR := R // preserve the unmodified hint output for the return value
 
-	// handle (0,0)-point and edge cases
-	var _selector0, _selector1 frontend.Variable
-	var sIsOne, sIsMinusOne frontend.Variable
+	// handle (0,0)-point and scalar edge cases
+	var isInputPointAtInfinity frontend.Variable
 	_Q := Q
-	if cfg.CompleteArithmetic {
-		// Use different dummy points for _Q and R to avoid _Q == ±R
-		// which would cause incomplete addition failures in the table precomputations.
+	if !cfg.IncompleteArithmetic {
 		dummyQ := &G2Affine{P: *g2.g2Gen}
 		dummyR := &G2Affine{P: *g2.g2GenNbits}
-		// if Q=(0,0) we assign a dummy point
-		_selector0 = g2.api.And(g2.curveF.IsZero(&Q.P.X), g2.curveF.IsZero(&Q.P.Y))
-		_Q = g2.Select(_selector0, dummyQ, Q)
-		// if s=±1, R=±Q and incomplete addition fails (R.X == Q.X).
-		// We detect this from the constrained scalar, not from the unconstrained R.
-		sIsOne = g2.fr.IsZero(g2.fr.Sub(s, g2.fr.One()))
-		sIsMinusOne = g2.fr.IsZero(g2.fr.Add(s, g2.fr.One()))
-		_selector1 = g2.api.Or(sIsOne, sIsMinusOne)
-		// if s=0 (selector0), Q=(0,0) (_selector0), or s=±1 (_selector1),
-		// we assign a dummy point to R
-		selectorAny := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
-		R = g2.Select(selectorAny, dummyR, R)
+		R = g2.Select(isScalarZeroOrMinusOne, dummyR, R)
+		isInputPointAtInfinity = g2.api.And(g2.curveF.IsZero(&Q.P.X), g2.curveF.IsZero(&Q.P.Y))
+		_Q = g2.Select(isInputPointAtInfinity, dummyQ, Q)
+		R = g2.Select(isScalarOne, dummyR, R)
 	}
 
 	// precompute -Q, -Φ(Q), Φ(Q)
@@ -541,25 +546,19 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 	// and we added G2 to the initial accumulator.
 	expected := &G2Affine{P: *g2.g2GenNbits}
 
-	if cfg.CompleteArithmetic {
-		// if Q=(0,0), s=0, or R.X==Q.X, skip the check
-		skip := g2.api.Or(g2.api.Or(selector0, _selector0), _selector1)
-		Acc = g2.Select(skip, expected, Acc)
+	if !cfg.IncompleteArithmetic {
+		Acc = g2.Select(g2.api.Or(g2.api.Or(isScalarZeroOrMinusOne, isInputPointAtInfinity), isScalarOne), expected, Acc)
 	}
 	g2.AssertIsEqual(Acc, expected)
 
-	if cfg.CompleteArithmetic {
+	if !cfg.IncompleteArithmetic {
 		zeroEl := g2.curveF.Zero()
 		zeroG2 := &G2Affine{P: g2AffP{X: *zeroEl, Y: *zeroEl}}
-		negQ := g2.neg(Q)
-		// s=-1 → -Q, else → R (constrained by MSM check above)
-		result := g2.Select(sIsMinusOne, negQ, originalR)
-		// s=1 → Q
-		result = g2.Select(sIsOne, Q, result)
-		// s=0 or Q=(0,0) → (0,0)
-		returnZero := g2.api.Or(selector0, _selector0)
-		return g2.Select(returnZero, zeroG2, result)
+		result := g2.Select(isScalarOne, Q, originalR)
+		result = g2.Select(isScalarZeroOrMinusOne, g2.neg(Q), result)
+		result = g2.Select(isScalarZero, zeroG2, result)
+		result = g2.Select(isInputPointAtInfinity, zeroG2, result)
+		return result
 	}
-
 	return R
 }
diff --git a/std/algebra/emulated/sw_bw6761/g2_test.go b/std/algebra/emulated/sw_bw6761/g2_test.go
new file mode 100644
index 0000000000..29f032bd80
--- /dev/null
+++ b/std/algebra/emulated/sw_bw6761/g2_test.go
@@ -0,0 +1,72 @@
+package sw_bw6761
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	bw6761 "github.com/consensys/gnark-crypto/ecc/bw6-761"
+	"github.com/consensys/gnark/frontend"
+	"github.com/consensys/gnark/std/algebra/algopts"
+	"github.com/consensys/gnark/std/math/emulated"
+	"github.com/consensys/gnark/test"
+)
+
+type mulG2Circuit struct {
+	In, Res G2Affine
+	S       Scalar
+
+	incompleteArithmetic bool
+}
+
+func (c *mulG2Circuit) Define(api frontend.API) error {
+	g2, err := NewG2(api)
+	if err != nil {
+		panic(err)
+	}
+	opts := []algopts.AlgebraOption{}
+	if c.incompleteArithmetic {
+		opts = append(opts, algopts.WithIncompleteArithmetic())
+	}
+	res := g2.ScalarMul(&c.In, &c.S, opts...)
+	g2.AssertIsEqual(res, &c.Res)
+	return nil
+}
+
+func TestScalarMulG2EdgeCases(t *testing.T) {
+	_, _, _, gen := bw6761.Generators()
+	var zero, negGen, sevenGen bw6761.G2Affine
+	negGen.Neg(&gen)
+	sevenGen.ScalarMultiplication(&gen, big.NewInt(7))
+
+	testCases := []struct {
+		name                 string
+		point                bw6761.G2Affine
+		scalar               *big.Int
+		expected             bw6761.G2Affine
+		incompleteArithmetic bool
+	}{
+		{name: "zero-scalar", point: gen, scalar: big.NewInt(0), expected: zero},
+		{name: "one", point: gen, scalar: big.NewInt(1), expected: gen},
+		{name: "minus-one", point: gen, scalar: big.NewInt(-1), expected: negGen},
+		{name: "zero-point", point: zero, scalar: big.NewInt(7), expected: zero},
+		{name: "incomplete-option", point: gen, scalar: big.NewInt(7), expected: sevenGen, incompleteArithmetic: true},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			assert := test.NewAssert(t)
+			circuit := mulG2Circuit{
+				incompleteArithmetic: tc.incompleteArithmetic,
+			}
+			witness := mulG2Circuit{
+				In:                   NewG2Affine(tc.point),
+				S:                    emulated.ValueOf[ScalarField](tc.scalar),
+				Res:                  NewG2Affine(tc.expected),
+				incompleteArithmetic: tc.incompleteArithmetic,
+			}
+			err := test.IsSolved(&circuit, &witness, ecc.BN254.ScalarField())
+			assert.NoError(err)
+		})
+	}
+}

From b2c4899ad85799b8f99314d1ced15f04ab6b2d2b Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.elhousni@consensys.net>
Date: Fri, 15 May 2026 10:40:54 -0400
Subject: [PATCH 5/8] refactor: G2 to follow sw_emulated

---
 std/algebra/emulated/sw_bls12381/g2.go |  57 ++++++---
 std/algebra/emulated/sw_bn254/g2.go    | 169 +++++++++++++++++++------
 std/algebra/emulated/sw_bw6761/g2.go   | 156 ++++++++++++++++++-----
 3 files changed, 287 insertions(+), 95 deletions(-)

diff --git a/std/algebra/emulated/sw_bls12381/g2.go b/std/algebra/emulated/sw_bls12381/g2.go
index 9454236f1f..214edfca9c 100644
--- a/std/algebra/emulated/sw_bls12381/g2.go
+++ b/std/algebra/emulated/sw_bls12381/g2.go
@@ -344,12 +344,23 @@ func (g2 G2) sub(p, q *G2Affine) *G2Affine {
 }
 
 func (g2 *G2) double(p *G2Affine) *G2Affine {
+	return g2.doubleGeneric(p, false)
+}
 
+func (g2 *G2) doubleGeneric(p *G2Affine, unified bool) *G2Affine {
 	// compute λ = (3p.x²)/2*p.y
 	xx3a := g2.Square(&p.P.X)
 	xx3a = g2.MulByConstElement(xx3a, big.NewInt(3))
 	y2 := g2.Double(&p.P.Y)
+	var isDoubleYZero frontend.Variable = 0
+	if unified {
+		isDoubleYZero = g2.Ext2.IsZero(y2)
+		y2 = g2.Ext2.Select(isDoubleYZero, g2.Ext2.One(), y2)
+	}
 	λ := g2.DivUnchecked(xx3a, y2)
+	if unified {
+		λ = g2.Ext2.Select(isDoubleYZero, g2.Ext2.Zero(), λ)
+	}
 
 	// xr = λ²-2p.x
 	xr0 := g2.fp.Eval([][]*baseEl{{&λ.A0, &λ.A0}, {&λ.A1, &λ.A1}, {&p.P.X.A0}}, []int{1, -1, -2})
@@ -758,6 +769,11 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 		R = g2.Select(isScalarOne, dummyR, R)
 	}
 
+	addFn := g2.add
+	if !cfg.IncompleteArithmetic {
+		addFn = g2.AddUnified
+	}
+
 	// Precompute -Q, -Φ(Q), Φ(Q).
 	var tableQ, tablePhiQ [2]*G2Affine
 	negQY := g2.Ext2.Neg(&_Q.P.Y)
@@ -796,24 +812,24 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 
 	// Combine Q, R precomputations into ±Q±R, ±Φ(Q)±Φ(R) tables.
 	var tableS [4]*G2Affine
-	tableS[0] = g2.add(tableQ[0], tableR[0])
+	tableS[0] = addFn(tableQ[0], tableR[0])
 	tableS[1] = g2.neg(tableS[0])
-	tableS[2] = g2.add(tableQ[1], tableR[0])
+	tableS[2] = addFn(tableQ[1], tableR[0])
 	tableS[3] = g2.neg(tableS[2])
 
 	var tablePhiS [4]*G2Affine
-	tablePhiS[0] = g2.add(tablePhiQ[0], tablePhiR[0])
+	tablePhiS[0] = addFn(tablePhiQ[0], tablePhiR[0])
 	tablePhiS[1] = g2.neg(tablePhiS[0])
-	tablePhiS[2] = g2.add(tablePhiQ[1], tablePhiR[0])
+	tablePhiS[2] = addFn(tablePhiQ[1], tablePhiR[0])
 	tablePhiS[3] = g2.neg(tablePhiS[2])
 
 	// Initial accumulator: Q + R + Φ(Q) + Φ(R) plus a fixed shift by the G2
 	// generator to avoid incomplete additions in the loop. At the end Acc
 	// will equal [2^(nbits-1)]G2 (the precomputed g2GenNbits).
-	Acc := g2.add(tableS[1], tablePhiS[1])
+	Acc := addFn(tableS[1], tablePhiS[1])
 	B1 := Acc
 	g2GenPoint := &G2Affine{P: *g2.g2Gen}
-	Acc = g2.add(Acc, g2GenPoint)
+	Acc = addFn(Acc, g2GenPoint)
 
 	// LLL Hermite bound: u_i, v_i < γ₄·r^(1/4), fits in (BitLen+3)/4 + 2 bits.
 	nbits := (st.Modulus().BitLen()+3)/4 + 2
@@ -824,13 +840,13 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 
 	// 16-entry Bi precomputation: ±Q ± R ± Φ(Q) ± Φ(R). Half the entries are
 	// negatives of the other half (same X), so we use an 8-to-1 mux + signed Y.
-	B2 := g2.add(tableS[1], tablePhiS[2])
-	B3 := g2.add(tableS[1], tablePhiS[3])
-	B4 := g2.add(tableS[1], tablePhiS[0])
-	B5 := g2.add(tableS[2], tablePhiS[1])
-	B6 := g2.add(tableS[2], tablePhiS[2])
-	B7 := g2.add(tableS[2], tablePhiS[3])
-	B8 := g2.add(tableS[2], tablePhiS[0])
+	B2 := addFn(tableS[1], tablePhiS[2])
+	B3 := addFn(tableS[1], tablePhiS[3])
+	B4 := addFn(tableS[1], tablePhiS[0])
+	B5 := addFn(tableS[2], tablePhiS[1])
+	B6 := addFn(tableS[2], tablePhiS[2])
+	B7 := addFn(tableS[2], tablePhiS[3])
+	B8 := addFn(tableS[2], tablePhiS[0])
 	B10 := g2.neg(B7)
 	B12 := g2.neg(B5)
 	B14 := g2.neg(B3)
@@ -864,17 +880,22 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 				),
 			},
 		}
-		Acc = g2.doubleAndAdd(Acc, Bi)
+		if !cfg.IncompleteArithmetic {
+			Acc = g2.doubleGeneric(Acc, true)
+			Acc = addFn(Acc, Bi)
+		} else {
+			Acc = g2.doubleAndAdd(Acc, Bi)
+		}
 	}
 
 	// i = 0: subtract Q, Φ(Q), R, Φ(R) if the first bits are 0.
-	tableQ[0] = g2.add(tableQ[0], Acc)
+	tableQ[0] = addFn(tableQ[0], Acc)
 	Acc = g2.Select(u1bits[0], Acc, tableQ[0])
-	tablePhiQ[0] = g2.add(tablePhiQ[0], Acc)
+	tablePhiQ[0] = addFn(tablePhiQ[0], Acc)
 	Acc = g2.Select(u2bits[0], Acc, tablePhiQ[0])
-	tableR[0] = g2.add(tableR[0], Acc)
+	tableR[0] = addFn(tableR[0], Acc)
 	Acc = g2.Select(v1bits[0], Acc, tableR[0])
-	tablePhiR[0] = g2.add(tablePhiR[0], Acc)
+	tablePhiR[0] = addFn(tablePhiR[0], Acc)
 	Acc = g2.Select(v2bits[0], Acc, tablePhiR[0])
 
 	// At this point Acc must equal [2^(nbits-1)]G2 (the bias we added).
diff --git a/std/algebra/emulated/sw_bn254/g2.go b/std/algebra/emulated/sw_bn254/g2.go
index a638aa1c93..045783b8bc 100644
--- a/std/algebra/emulated/sw_bn254/g2.go
+++ b/std/algebra/emulated/sw_bn254/g2.go
@@ -209,6 +209,66 @@ func (g2 *G2) scalarMulBySeed(q *G2Affine) *G2Affine {
 	return z
 }
 
+// AddUnified adds p and q and returns it. It doesn't modify p nor q.
+//
+// ✅ p can be equal to q, and either or both can be (0,0).
+// ([0,0],[0,0]) is not on the twist but we conventionally take it as the
+// neutral/infinity point as per the [EVM].
+//
+// It uses a chord/tangent split with a single-Div fold to avoid exceptional
+// cases in complete-mode scalar multiplication.
+//
+// [EVM]: https://ethereum.github.io/yellowpaper/paper.pdf
+func (g2 *G2) AddUnified(p, q *G2Affine) *G2Affine {
+	isPInf := g2.api.And(g2.Ext2.IsZero(&p.P.X), g2.Ext2.IsZero(&p.P.Y))
+	isQInf := g2.api.And(g2.Ext2.IsZero(&q.P.X), g2.Ext2.IsZero(&q.P.Y))
+
+	xDiff := g2.Sub(&q.P.X, &p.P.X)
+	xEqual := g2.IsZero(xDiff)
+
+	numChord := g2.Sub(&q.P.Y, &p.P.Y)
+	denChord := xDiff
+	xx := g2.Square(&p.P.X)
+	numTangent := g2.MulByConstElement(xx, big.NewInt(3))
+	denTangent := g2.MulByConstElement(&p.P.Y, big.NewInt(2))
+
+	num := g2.Ext2.Select(xEqual, numTangent, numChord)
+	den := g2.Ext2.Select(xEqual, denTangent, denChord)
+	denIsZero := g2.IsZero(den)
+	denSafe := g2.Ext2.Select(denIsZero, g2.One(), den)
+	λ := g2.DivUnchecked(num, denSafe)
+	λ = g2.Ext2.Select(denIsZero, g2.Zero(), λ)
+
+	pxPlusQx := g2.Add(&p.P.X, &q.P.X)
+	xr := g2.Mul(λ, λ)
+	xr = g2.Sub(xr, pxPlusQx)
+
+	pxMinusXr := g2.Sub(&p.P.X, xr)
+	yr := g2.Mul(λ, pxMinusXr)
+	yr = g2.Sub(yr, &p.P.Y)
+
+	result := &G2Affine{
+		P:     g2AffP{X: *xr, Y: *yr},
+		Lines: nil,
+	}
+
+	result = g2.Select(isPInf, q, result)
+	result = g2.Select(isQInf, p, result)
+
+	ySub := g2.Sub(&p.P.Y, &q.P.Y)
+	yEqual := g2.IsZero(ySub)
+	areFinite := g2.api.And(g2.api.Sub(1, isPInf), g2.api.Sub(1, isQInf))
+	isInverse := g2.api.And(g2.api.And(xEqual, g2.api.Sub(1, yEqual)), areFinite)
+	zero := g2.Ext2.Zero()
+	infinity := G2Affine{
+		P:     g2AffP{X: *zero, Y: *zero},
+		Lines: nil,
+	}
+	result = g2.Select(isInverse, &infinity, result)
+
+	return result
+}
+
 func (g2 G2) add(p, q *G2Affine) *G2Affine {
 
 	// compute λ = (q.y-p.y)/(q.x-p.x)
@@ -246,18 +306,43 @@ func (g2 G2) neg(p *G2Affine) *G2Affine {
 	}
 }
 
+// muxE2Y8Signed selects from 8 E2 Y-values using selector (0-7) and conditionally
+// negates based on signBit. This optimizes the common GLV pattern where Y[i] =
+// -Y[15-i], reducing a 16-to-1 Mux to an 8-to-1 Mux plus conditional negation.
+func (g2 *G2) muxE2Y8Signed(signBit frontend.Variable, selector frontend.Variable, yA0, yA1 [8]*emulated.Element[BaseField]) *fields_bn254.E2 {
+	baseA0 := g2.fp.Mux(selector, yA0[:]...)
+	baseA1 := g2.fp.Mux(selector, yA1[:]...)
+	negA0 := g2.fp.Neg(baseA0)
+	negA1 := g2.fp.Neg(baseA1)
+	return &fields_bn254.E2{
+		A0: *g2.fp.Select(signBit, negA0, baseA0),
+		A1: *g2.fp.Select(signBit, negA1, baseA1),
+	}
+}
+
 func (g2 G2) sub(p, q *G2Affine) *G2Affine {
 	qNeg := g2.neg(q)
 	return g2.add(p, qNeg)
 }
 
 func (g2 *G2) double(p *G2Affine) *G2Affine {
+	return g2.doubleGeneric(p, false)
+}
 
+func (g2 *G2) doubleGeneric(p *G2Affine, unified bool) *G2Affine {
 	// compute λ = (3p.x²)/2*p.y
 	xx3a := g2.Square(&p.P.X)
 	xx3a = g2.MulByConstElement(xx3a, big.NewInt(3))
 	y2 := g2.Double(&p.P.Y)
+	var isDoubleYZero frontend.Variable = 0
+	if unified {
+		isDoubleYZero = g2.Ext2.IsZero(y2)
+		y2 = g2.Ext2.Select(isDoubleYZero, g2.Ext2.One(), y2)
+	}
 	λ := g2.DivUnchecked(xx3a, y2)
+	if unified {
+		λ = g2.Ext2.Select(isDoubleYZero, g2.Ext2.Zero(), λ)
+	}
 
 	// xr = λ²-2p.x
 	xr0 := g2.fp.Eval([][]*baseEl{{&λ.A0, &λ.A0}, {&λ.A1, &λ.A1}, {&p.P.X.A0}}, []int{1, -1, -2})
@@ -475,6 +560,11 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 		R = g2.Select(isScalarOne, dummyR, R)
 	}
 
+	addFn := g2.add
+	if !cfg.IncompleteArithmetic {
+		addFn = g2.AddUnified
+	}
+
 	// precompute -Q, -Φ(Q), Φ(Q)
 	var tableQ, tablePhiQ [2]*G2Affine
 	negQY := g2.Ext2.Neg(&_Q.P.Y)
@@ -514,27 +604,27 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 
 	// precompute -Q-R, Q+R, Q-R, -Q+R (combining the two points Q and R)
 	var tableS [4]*G2Affine
-	tableS[0] = g2.add(tableQ[0], tableR[0]) // -Q - R
-	tableS[1] = g2.neg(tableS[0])            // Q + R
-	tableS[2] = g2.add(tableQ[1], tableR[0]) // Q - R
-	tableS[3] = g2.neg(tableS[2])            // -Q + R
+	tableS[0] = addFn(tableQ[0], tableR[0]) // -Q - R
+	tableS[1] = g2.neg(tableS[0])           // Q + R
+	tableS[2] = addFn(tableQ[1], tableR[0]) // Q - R
+	tableS[3] = g2.neg(tableS[2])           // -Q + R
 
 	// precompute -Φ(Q)-Φ(R), Φ(Q)+Φ(R), Φ(Q)-Φ(R), -Φ(Q)+Φ(R) (combining endomorphisms)
 	var tablePhiS [4]*G2Affine
-	tablePhiS[0] = g2.add(tablePhiQ[0], tablePhiR[0]) // -Φ(Q) - Φ(R)
-	tablePhiS[1] = g2.neg(tablePhiS[0])               // Φ(Q) + Φ(R)
-	tablePhiS[2] = g2.add(tablePhiQ[1], tablePhiR[0]) // Φ(Q) - Φ(R)
-	tablePhiS[3] = g2.neg(tablePhiS[2])               // -Φ(Q) + Φ(R)
+	tablePhiS[0] = addFn(tablePhiQ[0], tablePhiR[0]) // -Φ(Q) - Φ(R)
+	tablePhiS[1] = g2.neg(tablePhiS[0])              // Φ(Q) + Φ(R)
+	tablePhiS[2] = addFn(tablePhiQ[1], tablePhiR[0]) // Φ(Q) - Φ(R)
+	tablePhiS[3] = g2.neg(tablePhiS[2])              // -Φ(Q) + Φ(R)
 
 	// Acc = Q + Φ(Q) + R + Φ(R)
-	Acc := g2.add(tableS[1], tablePhiS[1])
+	Acc := addFn(tableS[1], tablePhiS[1])
 	B1 := Acc
 
 	// Add G2 generator to Acc to avoid incomplete additions in the loop.
 	// At the end, since [u1]Q + [u2]Φ(Q) + [v1]R + [v2]Φ(R) = 0,
 	// Acc will equal [2^nbits]G2 (precomputed).
 	g2GenPoint := &G2Affine{P: *g2.g2Gen}
-	Acc = g2.add(Acc, g2GenPoint)
+	Acc = addFn(Acc, g2GenPoint)
 
 	// u1, u2, v1, v2 < c*r^{1/4} where c ≈ 1.25
 	nbits := (st.Modulus().BitLen()+3)/4 + 2
@@ -546,21 +636,17 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 	// Precompute all 16 combinations: ±Q ± Φ(Q) ± R ± Φ(R)
 	// Using tableS (Q±R) and tablePhiS (Φ(Q)±Φ(R)) to match G1 pattern
 	// B1 = (Q+R) + (Φ(Q)+Φ(R)) = Q + R + Φ(Q) + Φ(R)
-	B2 := g2.add(tableS[1], tablePhiS[2]) // (Q+R) + (Φ(Q)-Φ(R)) = Q + R + Φ(Q) - Φ(R)
-	B3 := g2.add(tableS[1], tablePhiS[3]) // (Q+R) + (-Φ(Q)+Φ(R)) = Q + R - Φ(Q) + Φ(R)
-	B4 := g2.add(tableS[1], tablePhiS[0]) // (Q+R) + (-Φ(Q)-Φ(R)) = Q + R - Φ(Q) - Φ(R)
-	B5 := g2.add(tableS[2], tablePhiS[1]) // (Q-R) + (Φ(Q)+Φ(R)) = Q - R + Φ(Q) + Φ(R)
-	B6 := g2.add(tableS[2], tablePhiS[2]) // (Q-R) + (Φ(Q)-Φ(R)) = Q - R + Φ(Q) - Φ(R)
-	B7 := g2.add(tableS[2], tablePhiS[3]) // (Q-R) + (-Φ(Q)+Φ(R)) = Q - R - Φ(Q) + Φ(R)
-	B8 := g2.add(tableS[2], tablePhiS[0]) // (Q-R) + (-Φ(Q)-Φ(R)) = Q - R - Φ(Q) - Φ(R)
-	B9 := g2.neg(B8)                      // -Q + R + Φ(Q) + Φ(R)
-	B10 := g2.neg(B7)                     // -Q + R + Φ(Q) - Φ(R)
-	B11 := g2.neg(B6)                     // -Q + R - Φ(Q) + Φ(R)
-	B12 := g2.neg(B5)                     // -Q + R - Φ(Q) - Φ(R)
-	B13 := g2.neg(B4)                     // -Q - R + Φ(Q) + Φ(R)
-	B14 := g2.neg(B3)                     // -Q - R + Φ(Q) - Φ(R)
-	B15 := g2.neg(B2)                     // -Q - R - Φ(Q) + Φ(R)
-	B16 := g2.neg(B1)                     // -Q - R - Φ(Q) - Φ(R)
+	B2 := addFn(tableS[1], tablePhiS[2]) // (Q+R) + (Φ(Q)-Φ(R)) = Q + R + Φ(Q) - Φ(R)
+	B3 := addFn(tableS[1], tablePhiS[3]) // (Q+R) + (-Φ(Q)+Φ(R)) = Q + R - Φ(Q) + Φ(R)
+	B4 := addFn(tableS[1], tablePhiS[0]) // (Q+R) + (-Φ(Q)-Φ(R)) = Q + R - Φ(Q) - Φ(R)
+	B5 := addFn(tableS[2], tablePhiS[1]) // (Q-R) + (Φ(Q)+Φ(R)) = Q - R + Φ(Q) + Φ(R)
+	B6 := addFn(tableS[2], tablePhiS[2]) // (Q-R) + (Φ(Q)-Φ(R)) = Q - R + Φ(Q) - Φ(R)
+	B7 := addFn(tableS[2], tablePhiS[3]) // (Q-R) + (-Φ(Q)+Φ(R)) = Q - R - Φ(Q) + Φ(R)
+	B8 := addFn(tableS[2], tablePhiS[0]) // (Q-R) + (-Φ(Q)-Φ(R)) = Q - R - Φ(Q) - Φ(R)
+	B10 := g2.neg(B7)                    // -Q + R + Φ(Q) - Φ(R)
+	B12 := g2.neg(B5)                    // -Q + R - Φ(Q) - Φ(R)
+	B14 := g2.neg(B3)                    // -Q - R + Φ(Q) - Φ(R)
+	B16 := g2.neg(B1)                    // -Q - R - Φ(Q) - Φ(R)
 
 	var Bi *G2Affine
 	for i := nbits - 1; i > 0; i-- {
@@ -579,8 +665,8 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 			g2.api.Mul(v2bits[i], 15),
 		)
 
-		// Bi.Y are distinct so we need a 16-to-1 multiplexer,
-		// but only half of the Bi.X are distinct so we need an 8-to-1.
+		// Only half of the Bi.X are distinct, and the other half of Bi.Y are
+		// negations, so select from 8 entries and conditionally negate Y.
 		Bi = &G2Affine{
 			P: g2AffP{
 				X: fields_bn254.E2{
@@ -591,30 +677,29 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 						&B16.P.X.A1, &B8.P.X.A1, &B14.P.X.A1, &B6.P.X.A1, &B12.P.X.A1, &B4.P.X.A1, &B10.P.X.A1, &B2.P.X.A1,
 					),
 				},
-				Y: fields_bn254.E2{
-					A0: *g2.fp.Mux(selectorY,
-						&B16.P.Y.A0, &B8.P.Y.A0, &B14.P.Y.A0, &B6.P.Y.A0, &B12.P.Y.A0, &B4.P.Y.A0, &B10.P.Y.A0, &B2.P.Y.A0,
-						&B15.P.Y.A0, &B7.P.Y.A0, &B13.P.Y.A0, &B5.P.Y.A0, &B11.P.Y.A0, &B3.P.Y.A0, &B9.P.Y.A0, &B1.P.Y.A0,
-					),
-					A1: *g2.fp.Mux(selectorY,
-						&B16.P.Y.A1, &B8.P.Y.A1, &B14.P.Y.A1, &B6.P.Y.A1, &B12.P.Y.A1, &B4.P.Y.A1, &B10.P.Y.A1, &B2.P.Y.A1,
-						&B15.P.Y.A1, &B7.P.Y.A1, &B13.P.Y.A1, &B5.P.Y.A1, &B11.P.Y.A1, &B3.P.Y.A1, &B9.P.Y.A1, &B1.P.Y.A1,
-					),
-				},
+				Y: *g2.muxE2Y8Signed(v2bits[i], selectorX,
+					[8]*emulated.Element[BaseField]{&B16.P.Y.A0, &B8.P.Y.A0, &B14.P.Y.A0, &B6.P.Y.A0, &B12.P.Y.A0, &B4.P.Y.A0, &B10.P.Y.A0, &B2.P.Y.A0},
+					[8]*emulated.Element[BaseField]{&B16.P.Y.A1, &B8.P.Y.A1, &B14.P.Y.A1, &B6.P.Y.A1, &B12.P.Y.A1, &B4.P.Y.A1, &B10.P.Y.A1, &B2.P.Y.A1},
+				),
 			},
 		}
 		// Acc = [2]Acc + Bi
-		Acc = g2.doubleAndAdd(Acc, Bi)
+		if !cfg.IncompleteArithmetic {
+			Acc = g2.doubleGeneric(Acc, true)
+			Acc = addFn(Acc, Bi)
+		} else {
+			Acc = g2.doubleAndAdd(Acc, Bi)
+		}
 	}
 
 	// i = 0: subtract Q, Φ(Q), R, Φ(R) if the first bits are 0
-	tableQ[0] = g2.add(tableQ[0], Acc)
+	tableQ[0] = addFn(tableQ[0], Acc)
 	Acc = g2.Select(u1bits[0], Acc, tableQ[0])
-	tablePhiQ[0] = g2.add(tablePhiQ[0], Acc)
+	tablePhiQ[0] = addFn(tablePhiQ[0], Acc)
 	Acc = g2.Select(u2bits[0], Acc, tablePhiQ[0])
-	tableR[0] = g2.add(tableR[0], Acc)
+	tableR[0] = addFn(tableR[0], Acc)
 	Acc = g2.Select(v1bits[0], Acc, tableR[0])
-	tablePhiR[0] = g2.add(tablePhiR[0], Acc)
+	tablePhiR[0] = addFn(tablePhiR[0], Acc)
 	Acc = g2.Select(v2bits[0], Acc, tablePhiR[0])
 
 	// Acc should now be [2^(nbits-1)]G2 since [u1]Q + [u2]Φ(Q) + [v1]R + [v2]Φ(R) = 0
diff --git a/std/algebra/emulated/sw_bw6761/g2.go b/std/algebra/emulated/sw_bw6761/g2.go
index bc38602901..e58dcaccc3 100644
--- a/std/algebra/emulated/sw_bw6761/g2.go
+++ b/std/algebra/emulated/sw_bw6761/g2.go
@@ -138,6 +138,64 @@ func (g2 *G2) scalarMulBySeed(q *G2Affine) *G2Affine {
 	return z
 }
 
+// AddUnified adds p and q and returns it. It doesn't modify p nor q.
+//
+// ✅ p can be equal to q, and either or both can be (0,0).
+// ([0,0],[0,0]) is not on the curve but we conventionally take it as the
+// neutral/infinity point.
+//
+// It uses a chord/tangent split with a single-Div fold to avoid exceptional
+// cases in complete-mode scalar multiplication.
+func (g2 *G2) AddUnified(p, q *G2Affine) *G2Affine {
+	isPInf := g2.api.And(g2.curveF.IsZero(&p.P.X), g2.curveF.IsZero(&p.P.Y))
+	isQInf := g2.api.And(g2.curveF.IsZero(&q.P.X), g2.curveF.IsZero(&q.P.Y))
+
+	xDiff := g2.curveF.Sub(&q.P.X, &p.P.X)
+	xEqual := g2.curveF.IsZero(xDiff)
+
+	numChord := g2.curveF.Sub(&q.P.Y, &p.P.Y)
+	denChord := xDiff
+	xx := g2.curveF.Mul(&p.P.X, &p.P.X)
+	numTangent := g2.curveF.MulConst(xx, big.NewInt(3))
+	denTangent := g2.curveF.MulConst(&p.P.Y, big.NewInt(2))
+
+	num := g2.curveF.Select(xEqual, numTangent, numChord)
+	den := g2.curveF.Select(xEqual, denTangent, denChord)
+	denIsZero := g2.curveF.IsZero(den)
+	denSafe := g2.curveF.Select(denIsZero, g2.curveF.One(), den)
+	λ := g2.curveF.Div(num, denSafe)
+	λ = g2.curveF.Select(denIsZero, g2.curveF.Zero(), λ)
+
+	pxPlusQx := g2.curveF.Add(&p.P.X, &q.P.X)
+	xr := g2.curveF.Mul(λ, λ)
+	xr = g2.curveF.Sub(xr, pxPlusQx)
+
+	pxMinusXr := g2.curveF.Sub(&p.P.X, xr)
+	yr := g2.curveF.Mul(λ, pxMinusXr)
+	yr = g2.curveF.Sub(yr, &p.P.Y)
+
+	result := &G2Affine{
+		P:     g2AffP{X: *xr, Y: *yr},
+		Lines: nil,
+	}
+
+	result = g2.Select(isPInf, q, result)
+	result = g2.Select(isQInf, p, result)
+
+	ySub := g2.curveF.Sub(&p.P.Y, &q.P.Y)
+	yEqual := g2.curveF.IsZero(ySub)
+	areFinite := g2.api.And(g2.api.Sub(1, isPInf), g2.api.Sub(1, isQInf))
+	isInverse := g2.api.And(g2.api.And(xEqual, g2.api.Sub(1, yEqual)), areFinite)
+	zero := g2.curveF.Zero()
+	infinity := G2Affine{
+		P:     g2AffP{X: *zero, Y: *zero},
+		Lines: nil,
+	}
+	result = g2.Select(isInverse, &infinity, result)
+
+	return result
+}
+
 func (g2 G2) add(p, q *G2Affine) *G2Affine {
 	// compute λ = (q.y-p.y)/(q.x-p.x)
 	qypy := g2.curveF.Sub(&q.P.Y, &p.P.Y)
@@ -169,17 +227,38 @@ func (g2 G2) neg(p *G2Affine) *G2Affine {
 	}
 }
 
+// muxY8Signed selects from 8 Y-values using selector (0-7) and conditionally
+// negates based on signBit. This optimizes the common GLV pattern where Y[i] =
+// -Y[15-i], reducing a 16-to-1 Mux to an 8-to-1 Mux plus conditional negation.
+func (g2 *G2) muxY8Signed(signBit frontend.Variable, selector frontend.Variable, ys [8]*emulated.Element[BaseField]) *emulated.Element[BaseField] {
+	y := g2.curveF.Mux(selector, ys[:]...)
+	negY := g2.curveF.Neg(y)
+	return g2.curveF.Select(signBit, negY, y)
+}
+
 func (g2 G2) sub(p, q *G2Affine) *G2Affine {
 	qNeg := g2.neg(q)
 	return g2.add(p, qNeg)
 }
 
 func (g2 *G2) double(p *G2Affine) *G2Affine {
+	return g2.doubleGeneric(p, false)
+}
+
+func (g2 *G2) doubleGeneric(p *G2Affine, unified bool) *G2Affine {
 	// compute λ = (3p.x²)/2*p.y
 	xx3a := g2.curveF.Mul(&p.P.X, &p.P.X)
 	xx3a = g2.curveF.MulConst(xx3a, big.NewInt(3))
 	y2 := g2.curveF.MulConst(&p.P.Y, big.NewInt(2))
+	var isDoubleYZero frontend.Variable = 0
+	if unified {
+		isDoubleYZero = g2.curveF.IsZero(y2)
+		y2 = g2.curveF.Select(isDoubleYZero, g2.curveF.One(), y2)
+	}
 	λ := g2.curveF.Div(xx3a, y2)
+	if unified {
+		λ = g2.curveF.Select(isDoubleYZero, g2.curveF.Zero(), λ)
+	}
 
 	// xr = λ²-2p.x
 	xr := g2.curveF.Eval([][]*baseEl{{λ, λ}, {&p.P.X}}, []int{1, -2})
@@ -411,6 +490,11 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 		R = g2.Select(isScalarOne, dummyR, R)
 	}
 
+	addFn := g2.add
+	if !cfg.IncompleteArithmetic {
+		addFn = g2.AddUnified
+	}
+
 	// precompute -Q, -Φ(Q), Φ(Q)
 	var tableQ, tablePhiQ [2]*G2Affine
 	negQY := g2.curveF.Neg(&_Q.P.Y)
@@ -450,27 +534,27 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 
 	// precompute -Q-R, Q+R, Q-R, -Q+R (combining the two points Q and R)
 	var tableS [4]*G2Affine
-	tableS[0] = g2.add(tableQ[0], tableR[0]) // -Q - R
-	tableS[1] = g2.neg(tableS[0])            // Q + R
-	tableS[2] = g2.add(tableQ[1], tableR[0]) // Q - R
-	tableS[3] = g2.neg(tableS[2])            // -Q + R
+	tableS[0] = addFn(tableQ[0], tableR[0]) // -Q - R
+	tableS[1] = g2.neg(tableS[0])           // Q + R
+	tableS[2] = addFn(tableQ[1], tableR[0]) // Q - R
+	tableS[3] = g2.neg(tableS[2])           // -Q + R
 
 	// precompute -Φ(Q)-Φ(R), Φ(Q)+Φ(R), Φ(Q)-Φ(R), -Φ(Q)+Φ(R) (combining endomorphisms)
 	var tablePhiS [4]*G2Affine
-	tablePhiS[0] = g2.add(tablePhiQ[0], tablePhiR[0]) // -Φ(Q) - Φ(R)
-	tablePhiS[1] = g2.neg(tablePhiS[0])               // Φ(Q) + Φ(R)
-	tablePhiS[2] = g2.add(tablePhiQ[1], tablePhiR[0]) // Φ(Q) - Φ(R)
-	tablePhiS[3] = g2.neg(tablePhiS[2])               // -Φ(Q) + Φ(R)
+	tablePhiS[0] = addFn(tablePhiQ[0], tablePhiR[0]) // -Φ(Q) - Φ(R)
+	tablePhiS[1] = g2.neg(tablePhiS[0])              // Φ(Q) + Φ(R)
+	tablePhiS[2] = addFn(tablePhiQ[1], tablePhiR[0]) // Φ(Q) - Φ(R)
+	tablePhiS[3] = g2.neg(tablePhiS[2])              // -Φ(Q) + Φ(R)
 
 	// Acc = Q + Φ(Q) + R + Φ(R)
-	Acc := g2.add(tableS[1], tablePhiS[1])
+	Acc := addFn(tableS[1], tablePhiS[1])
 	B1 := Acc
 
 	// Add G2 generator to Acc to avoid incomplete additions in the loop.
 	// At the end, since [u1]Q + [u2]Φ(Q) + [v1]R + [v2]Φ(R) = 0,
 	// Acc will equal [2^nbits]G2 (precomputed).
 	g2GenPoint := &G2Affine{P: *g2.g2Gen}
-	Acc = g2.add(Acc, g2GenPoint)
+	Acc = addFn(Acc, g2GenPoint)
 
 	// u1, u2, v1, v2 < c*r^{1/4} where c ≈ 1.25
 	nbits := (st.Modulus().BitLen()+3)/4 + 2
@@ -482,21 +566,17 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 	// Precompute all 16 combinations: ±Q ± Φ(Q) ± R ± Φ(R)
 	// Using tableS (Q±R) and tablePhiS (Φ(Q)±Φ(R)) to match G1 pattern
 	// B1 = (Q+R) + (Φ(Q)+Φ(R)) = Q + R + Φ(Q) + Φ(R)
-	B2 := g2.add(tableS[1], tablePhiS[2]) // (Q+R) + (Φ(Q)-Φ(R)) = Q + R + Φ(Q) - Φ(R)
-	B3 := g2.add(tableS[1], tablePhiS[3]) // (Q+R) + (-Φ(Q)+Φ(R)) = Q + R - Φ(Q) + Φ(R)
-	B4 := g2.add(tableS[1], tablePhiS[0]) // (Q+R) + (-Φ(Q)-Φ(R)) = Q + R - Φ(Q) - Φ(R)
-	B5 := g2.add(tableS[2], tablePhiS[1]) // (Q-R) + (Φ(Q)+Φ(R)) = Q - R + Φ(Q) + Φ(R)
-	B6 := g2.add(tableS[2], tablePhiS[2]) // (Q-R) + (Φ(Q)-Φ(R)) = Q - R + Φ(Q) - Φ(R)
-	B7 := g2.add(tableS[2], tablePhiS[3]) // (Q-R) + (-Φ(Q)+Φ(R)) = Q - R - Φ(Q) + Φ(R)
-	B8 := g2.add(tableS[2], tablePhiS[0]) // (Q-R) + (-Φ(Q)-Φ(R)) = Q - R - Φ(Q) - Φ(R)
-	B9 := g2.neg(B8)                      // -Q + R + Φ(Q) + Φ(R)
-	B10 := g2.neg(B7)                     // -Q + R + Φ(Q) - Φ(R)
-	B11 := g2.neg(B6)                     // -Q + R - Φ(Q) + Φ(R)
-	B12 := g2.neg(B5)                     // -Q + R - Φ(Q) - Φ(R)
-	B13 := g2.neg(B4)                     // -Q - R + Φ(Q) + Φ(R)
-	B14 := g2.neg(B3)                     // -Q - R + Φ(Q) - Φ(R)
-	B15 := g2.neg(B2)                     // -Q - R - Φ(Q) + Φ(R)
-	B16 := g2.neg(B1)                     // -Q - R - Φ(Q) - Φ(R)
+	B2 := addFn(tableS[1], tablePhiS[2]) // (Q+R) + (Φ(Q)-Φ(R)) = Q + R + Φ(Q) - Φ(R)
+	B3 := addFn(tableS[1], tablePhiS[3]) // (Q+R) + (-Φ(Q)+Φ(R)) = Q + R - Φ(Q) + Φ(R)
+	B4 := addFn(tableS[1], tablePhiS[0]) // (Q+R) + (-Φ(Q)-Φ(R)) = Q + R - Φ(Q) - Φ(R)
+	B5 := addFn(tableS[2], tablePhiS[1]) // (Q-R) + (Φ(Q)+Φ(R)) = Q - R + Φ(Q) + Φ(R)
+	B6 := addFn(tableS[2], tablePhiS[2]) // (Q-R) + (Φ(Q)-Φ(R)) = Q - R + Φ(Q) - Φ(R)
+	B7 := addFn(tableS[2], tablePhiS[3]) // (Q-R) + (-Φ(Q)+Φ(R)) = Q - R - Φ(Q) + Φ(R)
+	B8 := addFn(tableS[2], tablePhiS[0]) // (Q-R) + (-Φ(Q)-Φ(R)) = Q - R - Φ(Q) - Φ(R)
+	B10 := g2.neg(B7)                    // -Q + R + Φ(Q) - Φ(R)
+	B12 := g2.neg(B5)                    // -Q + R - Φ(Q) - Φ(R)
+	B14 := g2.neg(B3)                    // -Q - R + Φ(Q) - Φ(R)
+	B16 := g2.neg(B1)                    // -Q - R - Φ(Q) - Φ(R)
 
 	var Bi *G2Affine
 	for i := nbits - 1; i > 0; i-- {
@@ -515,31 +595,37 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 			g2.api.Mul(v2bits[i], 15),
 		)
 
-		// Bi.Y are distinct so we need a 16-to-1 multiplexer,
-		// but only half of the Bi.X are distinct so we need an 8-to-1.
+		// Only half of the Bi.X are distinct, and the other half of Bi.Y are
+		// negations, so select from 8 entries and conditionally negate Y.
 		Bi = &G2Affine{
 			P: g2AffP{
 				X: *g2.curveF.Mux(selectorX,
 					&B16.P.X, &B8.P.X, &B14.P.X, &B6.P.X, &B12.P.X, &B4.P.X, &B10.P.X, &B2.P.X,
 				),
-				Y: *g2.curveF.Mux(selectorY,
-					&B16.P.Y, &B8.P.Y, &B14.P.Y, &B6.P.Y, &B12.P.Y, &B4.P.Y, &B10.P.Y, &B2.P.Y,
-					&B15.P.Y, &B7.P.Y, &B13.P.Y, &B5.P.Y, &B11.P.Y, &B3.P.Y, &B9.P.Y, &B1.P.Y,
+				Y: *g2.muxY8Signed(v2bits[i], selectorX,
+					[8]*emulated.Element[BaseField]{
+						&B16.P.Y, &B8.P.Y, &B14.P.Y, &B6.P.Y, &B12.P.Y, &B4.P.Y, &B10.P.Y, &B2.P.Y,
+					},
 				),
 			},
 		}
 		// Acc = [2]Acc + Bi
-		Acc = g2.doubleAndAdd(Acc, Bi)
+		if !cfg.IncompleteArithmetic {
+			Acc = g2.doubleGeneric(Acc, true)
+			Acc = addFn(Acc, Bi)
+		} else {
+			Acc = g2.doubleAndAdd(Acc, Bi)
+		}
 	}
 
 	// i = 0: subtract Q, Φ(Q), R, Φ(R) if the first bits are 0
-	tableQ[0] = g2.add(tableQ[0], Acc)
+	tableQ[0] = addFn(tableQ[0], Acc)
 	Acc = g2.Select(u1bits[0], Acc, tableQ[0])
-	tablePhiQ[0] = g2.add(tablePhiQ[0], Acc)
+	tablePhiQ[0] = addFn(tablePhiQ[0], Acc)
 	Acc = g2.Select(u2bits[0], Acc, tablePhiQ[0])
-	tableR[0] = g2.add(tableR[0], Acc)
+	tableR[0] = addFn(tableR[0], Acc)
 	Acc = g2.Select(v1bits[0], Acc, tableR[0])
-	tablePhiR[0] = g2.add(tablePhiR[0], Acc)
+	tablePhiR[0] = addFn(tablePhiR[0], Acc)
 	Acc = g2.Select(v2bits[0], Acc, tablePhiR[0])
 
 	// Acc should now be [2^(nbits-1)]G2 since [u1]Q + [u2]Φ(Q) + [v1]R + [v2]Φ(R) = 0

From cd1ab5f5746ec99c40d0954ad81d384827c72320 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.elhousni@consensys.net>
Date: Fri, 15 May 2026 10:52:41 -0400
Subject: [PATCH 6/8] fix: up stats

---
 internal/stats/latest_stats.csv | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/internal/stats/latest_stats.csv b/internal/stats/latest_stats.csv
index 8bbce37c15..e7eeee9693 100644
--- a/internal/stats/latest_stats.csv
+++ b/internal/stats/latest_stats.csv
@@ -99,12 +99,12 @@ scalar_mul_G1_bn254,bn254,groth16,108168,163915
 scalar_mul_G1_bn254,bn254,plonk,355353,340385
 scalar_mul_G1_bn254_incomplete,bn254,groth16,51579,81902
 scalar_mul_G1_bn254_incomplete,bn254,plonk,185916,179316
-scalar_mul_G2_bls12381,bn254,groth16,138129,216643
-scalar_mul_G2_bls12381,bn254,plonk,481714,463233
-scalar_mul_G2_bn254,bn254,groth16,102279,158428
-scalar_mul_G2_bn254,bn254,plonk,349799,336420
-scalar_mul_G2_bw6761,bn254,groth16,191105,296444
-scalar_mul_G2_bw6761,bn254,plonk,659865,637514
+scalar_mul_G2_bls12381,bn254,groth16,303403,456748
+scalar_mul_G2_bls12381,bn254,plonk,989674,947132
+scalar_mul_G2_bn254,bn254,groth16,217144,326127
+scalar_mul_G2_bn254,bn254,plonk,721164,689795
+scalar_mul_G2_bw6761,bn254,groth16,389192,617365
+scalar_mul_G2_bw6761,bn254,plonk,1244527,1192451
 scalar_mul_P256,bn254,groth16,96724,151768
 scalar_mul_P256,bn254,plonk,328895,315729
 scalar_mul_P256_incomplete,bn254,groth16,75542,121798

From 624dd29ebee92fc897e16effc2061fc6d0093538 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.elhousni@consensys.net>
Date: Wed, 27 May 2026 08:01:52 -0400
Subject: [PATCH 7/8] fix: non-zero signed coefficients

---
 std/algebra/emulated/sw_bls12381/g2.go | 11 +++++++----
 std/algebra/emulated/sw_bn254/g2.go    | 11 +++++++----
 std/algebra/emulated/sw_bw6761/g2.go   | 11 +++++++----
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/std/algebra/emulated/sw_bls12381/g2.go b/std/algebra/emulated/sw_bls12381/g2.go
index 214edfca9c..47b3a6aa55 100644
--- a/std/algebra/emulated/sw_bls12381/g2.go
+++ b/std/algebra/emulated/sw_bls12381/g2.go
@@ -736,10 +736,13 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 
 	g2.fr.AssertIsEqual(lhs, rhs)
 
-	// Soundness: forbid the trivial all-zeros decomposition. With v1 + λ·v2 ≠ 0
-	// the relation forces (u1, u2) to the unique LLL lattice point and pins
-	// the hinted R = [s]Q.
-	g2.fr.AssertIsDifferent(g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2)), g2.fr.Zero())
+	// Soundness: forbid the trivial all-zeros decomposition. The MSM consumes
+	// the signed coefficient (±v1) + λ·(±v2) of R, so the non-zero check must
+	// be on that signed value — not on the unsigned hinted limbs — otherwise an
+	// adversarial hint could zero the signed coefficient and leave R unconstrained.
+	signedV1 := g2.fr.Select(isNegv1, g2.fr.Neg(v1), v1)
+	signedV2 := g2.fr.Select(isNegv2, g2.fr.Neg(v2), v2)
+	g2.fr.AssertIsDifferent(g2.fr.Add(signedV1, g2.fr.Mul(g2.eigenvalue, signedV2)), g2.fr.Zero())
 
 	// Hint R = [s]Q.
 	_, point, _, err := emulated.NewVarGenericHint(g2.api, 0, 4, 0, nil,
diff --git a/std/algebra/emulated/sw_bn254/g2.go b/std/algebra/emulated/sw_bn254/g2.go
index 045783b8bc..6df014c75b 100644
--- a/std/algebra/emulated/sw_bn254/g2.go
+++ b/std/algebra/emulated/sw_bn254/g2.go
@@ -527,10 +527,13 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 
 	g2.fr.AssertIsEqual(lhs, rhs)
 
-	// Soundness: forbid the trivial all-zeros decomposition. With v1 + λ·v2 ≠ 0
-	// the relation forces (u1, u2) to the unique LLL lattice point and pins
-	// the hinted R = [s]Q.
-	g2.fr.AssertIsDifferent(g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2)), g2.fr.Zero())
+	// Soundness: forbid the trivial all-zeros decomposition. The MSM consumes
+	// the signed coefficient (±v1) + λ·(±v2) of R, so the non-zero check must
+	// be on that signed value — not on the unsigned hinted limbs — otherwise an
+	// adversarial hint could zero the signed coefficient and leave R unconstrained.
+	signedV1 := g2.fr.Select(isNegv1, g2.fr.Neg(v1), v1)
+	signedV2 := g2.fr.Select(isNegv2, g2.fr.Neg(v2), v2)
+	g2.fr.AssertIsDifferent(g2.fr.Add(signedV1, g2.fr.Mul(g2.eigenvalue, signedV2)), g2.fr.Zero())
 
 	// Hint R = [s]Q.
 	_, point, _, err := emulated.NewVarGenericHint(g2.api, 0, 4, 0, nil,
diff --git a/std/algebra/emulated/sw_bw6761/g2.go b/std/algebra/emulated/sw_bw6761/g2.go
index e58dcaccc3..3b0bce4cfc 100644
--- a/std/algebra/emulated/sw_bw6761/g2.go
+++ b/std/algebra/emulated/sw_bw6761/g2.go
@@ -457,10 +457,13 @@ func (g2 *G2) scalarMulGLVAndFakeGLV(Q *G2Affine, s *Scalar, opts ...algopts.Alg
 
 	g2.fr.AssertIsEqual(lhs, rhs)
 
-	// Soundness: forbid the trivial all-zeros decomposition. With v1 + λ·v2 ≠ 0
-	// the relation forces (u1, u2) to the unique LLL lattice point and pins
-	// the hinted R = [s]Q.
-	g2.fr.AssertIsDifferent(g2.fr.Add(v1, g2.fr.Mul(g2.eigenvalue, v2)), g2.fr.Zero())
+	// Soundness: forbid the trivial all-zeros decomposition. The MSM consumes
+	// the signed coefficient (±v1) + λ·(±v2) of R, so the non-zero check must
+	// be on that signed value — not on the unsigned hinted limbs — otherwise an
+	// adversarial hint could zero the signed coefficient and leave R unconstrained.
+	signedV1 := g2.fr.Select(isNegv1, g2.fr.Neg(v1), v1)
+	signedV2 := g2.fr.Select(isNegv2, g2.fr.Neg(v2), v2)
+	g2.fr.AssertIsDifferent(g2.fr.Add(signedV1, g2.fr.Mul(g2.eigenvalue, signedV2)), g2.fr.Zero())
 
 	// Hint R = [s]Q.
 	_, point, _, err := emulated.NewVarGenericHint(g2.api, 0, 2, 0, nil,

From c635573954c340e45c1540d48ad18c66b7764583 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.elhousni@consensys.net>
Date: Wed, 27 May 2026 08:04:10 -0400
Subject: [PATCH 8/8] test: up stats

---
 internal/stats/latest_stats.csv | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/internal/stats/latest_stats.csv b/internal/stats/latest_stats.csv
index 5bb72d4997..c0bb1990e7 100644
--- a/internal/stats/latest_stats.csv
+++ b/internal/stats/latest_stats.csv
@@ -99,12 +99,12 @@ scalar_mul_G1_bn254,bn254,groth16,108168,163915
 scalar_mul_G1_bn254,bn254,plonk,355353,340385
 scalar_mul_G1_bn254_incomplete,bn254,groth16,51579,81902
 scalar_mul_G1_bn254_incomplete,bn254,plonk,185916,179316
-scalar_mul_G2_bls12381,bn254,groth16,303403,456748
-scalar_mul_G2_bls12381,bn254,plonk,989674,947132
-scalar_mul_G2_bn254,bn254,groth16,217144,326127
-scalar_mul_G2_bn254,bn254,plonk,721164,689795
-scalar_mul_G2_bw6761,bn254,groth16,389192,617365
-scalar_mul_G2_bw6761,bn254,plonk,1244527,1192451
+scalar_mul_G2_bls12381,bn254,groth16,303414,456759
+scalar_mul_G2_bls12381,bn254,plonk,989717,947171
+scalar_mul_G2_bn254,bn254,groth16,217155,326138
+scalar_mul_G2_bn254,bn254,plonk,721207,689834
+scalar_mul_G2_bw6761,bn254,groth16,389209,617382
+scalar_mul_G2_bw6761,bn254,plonk,1244592,1192510
 scalar_mul_P256,bn254,groth16,96724,151768
 scalar_mul_P256,bn254,plonk,328895,315729
 scalar_mul_P256_incomplete,bn254,groth16,75542,121798