pq-code-package
diff --git a/‎mldsa/fips202/fips202x4.c
Lines changed: 27 additions & 2 deletions b/‎mldsa/fips202/fips202x4.c
Lines changed: 27 additions & 2 deletions
diff --git a/‎mldsa/fips202/fips202x4.h
Lines changed: 23 additions & 0 deletions b/‎mldsa/fips202/fips202x4.h
Lines changed: 23 additions & 0 deletions
diff --git a/‎mldsa/poly.c
Lines changed: 65 additions & 28 deletions b/‎mldsa/poly.c
Lines changed: 65 additions & 28 deletions
diff --git a/‎mldsa/poly.h
Lines changed: 15 additions & 13 deletions b/‎mldsa/poly.h
Lines changed: 15 additions & 13 deletions
diff --git a/‎mldsa/polyvec.c
Lines changed: 0 additions & 49 deletions b/‎mldsa/polyvec.c
Lines changed: 0 additions & 49 deletions
diff --git a/‎mldsa/polyvec.h
Lines changed: 0 additions & 50 deletions b/‎mldsa/polyvec.h
Lines changed: 0 additions & 50 deletions
@@ -11,8 +11,6 @@
 #include "fips202x4.h"
 #include "keccakf1600.h"
 
-typedef mld_shake128x4ctx mld_shake256x4_ctx;
-
 static void mld_keccak_absorb_once_x4(uint64_t *s, uint32_t r,
                                       const uint8_t *in0, const uint8_t *in1,
                                       const uint8_t *in2, const uint8_t *in3,
@@ -130,3 +128,30 @@ void mld_shake128x4_release(mld_shake128x4ctx *state)
   (void)state;
   /*mld_zeroize(state, sizeof(mld_shake128x4ctx));*/
 }
+
+
+void mld_shake256x4_absorb_once(mld_shake256x4ctx *state, const uint8_t *in0,
+                                const uint8_t *in1, const uint8_t *in2,
+                                const uint8_t *in3, size_t inlen)
+{
+  memset(state, 0, sizeof(mld_shake256x4ctx));
+  mld_keccak_absorb_once_x4(state->ctx, SHAKE256_RATE, in0, in1, in2, in3,
+                            inlen, 0x1F);
+}
+
+void mld_shake256x4_squeezeblocks(uint8_t *out0, uint8_t *out1, uint8_t *out2,
+                                  uint8_t *out3, size_t nblocks,
+                                  mld_shake256x4ctx *state)
+{
+  mld_keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx,
+                              SHAKE256_RATE);
+}
+
+void mld_shake256x4_init(mld_shake256x4ctx *state) { (void)state; }
+void mld_shake256x4_release(mld_shake256x4ctx *state)
+{
+  /* Specification: Partially implements
+   * @[FIPS203, Section 3.3, Destruction of intermediate values] */
+  (void)state;
+  /*mld_zeroize(state, sizeof(mld_shake256x4ctx));*/
+}
@@ -21,6 +21,11 @@ typedef struct
   uint64_t ctx[MLD_KECCAK_LANES * MLD_KECCAK_WAY];
 } mld_shake128x4ctx;
 
+typedef struct
+{
+  uint64_t ctx[MLD_KECCAK_LANES * MLD_KECCAK_WAY];
+} mld_shake256x4ctx;
+
 #define mld_shake128x4_absorb_once FIPS202_NAMESPACE(shake128x4_absorb_once)
 void mld_shake128x4_absorb_once(mld_shake128x4ctx *state, const uint8_t *in0,
                                 const uint8_t *in1, const uint8_t *in2,
@@ -58,4 +63,22 @@ void mld_shake128x4_init(mld_shake128x4ctx *state);
 #define mld_shake128x4_release FIPS202_NAMESPACE(shake128x4_release)
 void mld_shake128x4_release(mld_shake128x4ctx *state);
 
+
+#define mld_shake256x4_absorb_once FIPS202_NAMESPACE(shake256x4_absorb_once)
+void mld_shake256x4_absorb_once(mld_shake256x4ctx *state, const uint8_t *in0,
+                                const uint8_t *in1, const uint8_t *in2,
+                                const uint8_t *in3, size_t inlen);
+
+#define mld_shake256x4_squeezeblocks FIPS202_NAMESPACE(shake256x4_squeezeblocks)
+void mld_shake256x4_squeezeblocks(uint8_t *out0, uint8_t *out1, uint8_t *out2,
+                                  uint8_t *out3, size_t nblocks,
+                                  mld_shake256x4ctx *state);
+
+#define mld_shake256x4_init FIPS202_NAMESPACE(shake256x4_init)
+void mld_shake256x4_init(mld_shake256x4ctx *state);
+
+#define mld_shake256x4_release FIPS202_NAMESPACE(shake256x4_release)
+void mld_shake256x4_release(mld_shake256x4ctx *state);
+
+
 #endif /* !MLD_FIPS202_FIPS202X4_H */
@@ -332,11 +332,11 @@ void poly_uniform(poly *a, const uint8_t seed[MLDSA_SEEDBYTES + 2])
   unsigned int ctr;
   unsigned int buflen = POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES;
   MLD_ALIGN uint8_t buf[POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES];
-  mld_xof_ctx state;
+  mld_xof128_ctx state;
 
-  mld_xof_init(&state);
-  mld_xof_absorb(&state, seed, MLDSA_SEEDBYTES + 2);
-  mld_xof_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state);
+  mld_xof128_init(&state);
+  mld_xof128_absorb(&state, seed, MLDSA_SEEDBYTES + 2);
+  mld_xof128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state);
 
   ctr = rej_uniform(a->coeffs, MLDSA_N, 0, buf, buflen);
   buflen = STREAM128_BLOCKBYTES;
@@ -347,7 +347,7 @@ void poly_uniform(poly *a, const uint8_t seed[MLDSA_SEEDBYTES + 2])
     invariant((&state)->pos <= SHAKE128_RATE)
     invariant(array_bound(a->coeffs, 0, ctr, 0, MLDSA_Q)))
   {
-    mld_xof_squeezeblocks(buf, 1, &state);
+    mld_xof128_squeezeblocks(buf, 1, &state);
     ctr = rej_uniform(a->coeffs, MLDSA_N, ctr, buf, buflen);
   }
 }
@@ -361,20 +361,20 @@ void poly_uniform_4x(poly *vec,
 
   /* Tracks the number of coefficients we have already sampled */
   unsigned ctr[4];
-  mld_xof_x4_ctx state;
+  mld_xof128_x4_ctx state;
   unsigned buflen;
 
 
-  mld_xof_x4_init(&state);
-  mld_xof_x4_absorb(&state, seed, MLDSA_SEEDBYTES + 2);
+  mld_xof128_x4_init(&state);
+  mld_xof128_x4_absorb(&state, seed, MLDSA_SEEDBYTES + 2);
 
   /*
    * Initially, squeeze heuristic number of POLY_UNIFORM_NBLOCKS.
    * This should generate the matrix entries with high probability.
    */
 
 
-  mld_xof_x4_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state);
+  mld_xof128_x4_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state);
   buflen = POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES;
   ctr[0] = rej_uniform(vec[0].coeffs, MLDSA_N, 0, buf[0], buflen);
   ctr[1] = rej_uniform(vec[1].coeffs, MLDSA_N, 0, buf[1], buflen);
@@ -390,13 +390,13 @@ void poly_uniform_4x(poly *vec,
   while (ctr[0] < MLDSA_N || ctr[1] < MLDSA_N || ctr[2] < MLDSA_N ||
          ctr[3] < MLDSA_N)
   {
-    mld_xof_x4_squeezeblocks(buf, 1, &state);
+    mld_xof128_x4_squeezeblocks(buf, 1, &state);
     ctr[0] = rej_uniform(vec[0].coeffs, MLDSA_N, ctr[0], buf[0], buflen);
     ctr[1] = rej_uniform(vec[1].coeffs, MLDSA_N, ctr[1], buf[1], buflen);
     ctr[2] = rej_uniform(vec[2].coeffs, MLDSA_N, ctr[2], buf[2], buflen);
     ctr[3] = rej_uniform(vec[3].coeffs, MLDSA_N, ctr[3], buf[3], buflen);
   }
-  mld_xof_x4_release(&state);
+  mld_xof128_x4_release(&state);
 }
 
 /*************************************************
@@ -486,31 +486,68 @@ __contract__(
   return ctr;
 }
 
-void poly_uniform_eta(poly *a, const uint8_t seed[MLDSA_CRHBYTES],
-                      uint16_t nonce)
+void poly_uniform_eta_4x(poly *r0, poly *r1, poly *r2, poly *r3,
+                         const uint8_t seed[MLDSA_CRHBYTES], uint8_t nonce0,
+                         uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  unsigned int ctr;
-  unsigned int buflen = POLY_UNIFORM_ETA_NBLOCKS * STREAM256_BLOCKBYTES;
-  MLD_ALIGN uint8_t buf[POLY_UNIFORM_ETA_NBLOCKS * STREAM256_BLOCKBYTES];
-  stream256_state state;
+  /* Temporary buffers for XOF output before rejection sampling */
+  MLD_ALIGN uint8_t
+      buf[4][MLD_ALIGN_UP(POLY_UNIFORM_ETA_NBLOCKS * STREAM256_BLOCKBYTES)];
 
-  stream256_init(&state, seed, nonce);
-  stream256_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state);
+  MLD_ALIGN uint8_t extseed[4][MLD_ALIGN_UP(MLDSA_CRHBYTES + 2)];
+
+  /* Tracks the number of coefficients we have already sampled */
+  unsigned ctr[4];
+  mld_xof256_x4_ctx state;
+  unsigned buflen;
+
+  memcpy(extseed[0], seed, MLDSA_CRHBYTES);
+  memcpy(extseed[1], seed, MLDSA_CRHBYTES);
+  memcpy(extseed[2], seed, MLDSA_CRHBYTES);
+  memcpy(extseed[3], seed, MLDSA_CRHBYTES);
+  extseed[0][MLDSA_CRHBYTES] = nonce0;
+  extseed[1][MLDSA_CRHBYTES] = nonce1;
+  extseed[2][MLDSA_CRHBYTES] = nonce2;
+  extseed[3][MLDSA_CRHBYTES] = nonce3;
+  extseed[0][MLDSA_CRHBYTES + 1] = 0;
+  extseed[1][MLDSA_CRHBYTES + 1] = 0;
+  extseed[2][MLDSA_CRHBYTES + 1] = 0;
+  extseed[3][MLDSA_CRHBYTES + 1] = 0;
+
+  mld_xof256_x4_init(&state);
+  mld_xof256_x4_absorb(&state, extseed, MLDSA_CRHBYTES + 2);
+
+  /*
+   * Initially, squeeze heuristic number of POLY_UNIFORM_ETA_NBLOCKS.
+   * This should generate the coefficients with high probability.
+   */
+  mld_xof256_x4_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state);
+  buflen = POLY_UNIFORM_ETA_NBLOCKS * STREAM256_BLOCKBYTES;
 
-  ctr = rej_eta(a->coeffs, MLDSA_N, 0, buf, buflen);
+  ctr[0] = rej_eta(r0->coeffs, MLDSA_N, 0, buf[0], buflen);
+  ctr[1] = rej_eta(r1->coeffs, MLDSA_N, 0, buf[1], buflen);
+  ctr[2] = rej_eta(r2->coeffs, MLDSA_N, 0, buf[2], buflen);
+  ctr[3] = rej_eta(r3->coeffs, MLDSA_N, 0, buf[3], buflen);
+
+  /*
+   * So long as not all entries have been generated, squeeze
+   * one more block a time until we're done.
+   */
   buflen = STREAM256_BLOCKBYTES;
-  while (ctr < MLDSA_N)
-  __loop__(
-    assigns(ctr, state, memory_slice(a, sizeof(poly)), object_whole(buf))
-    invariant(ctr <= MLDSA_N)
-    invariant((state).pos <= SHAKE256_RATE)
-    invariant(array_abs_bound(a->coeffs, 0, ctr, MLDSA_ETA + 1)))
+  while (ctr[0] < MLDSA_N || ctr[1] < MLDSA_N || ctr[2] < MLDSA_N ||
+         ctr[3] < MLDSA_N)
   {
-    stream256_squeezeblocks(buf, 1, &state);
-    ctr = rej_eta(a->coeffs, MLDSA_N, ctr, buf, buflen);
+    mld_xof256_x4_squeezeblocks(buf, 1, &state);
+    ctr[0] = rej_eta(r0->coeffs, MLDSA_N, ctr[0], buf[0], buflen);
+    ctr[1] = rej_eta(r1->coeffs, MLDSA_N, ctr[1], buf[1], buflen);
+    ctr[2] = rej_eta(r2->coeffs, MLDSA_N, ctr[2], buf[2], buflen);
+    ctr[3] = rej_eta(r3->coeffs, MLDSA_N, ctr[3], buf[3], buflen);
   }
+
+  mld_xof256_x4_release(&state);
 }
 
+
 #define POLY_UNIFORM_GAMMA1_NBLOCKS \
   ((MLDSA_POLYZ_PACKEDBYTES + STREAM256_BLOCKBYTES - 1) / STREAM256_BLOCKBYTES)
 void poly_uniform_gamma1(poly *a, const uint8_t seed[MLDSA_CRHBYTES],
 
@@ -320,27 +320,29 @@ __contract__(
 void poly_uniform_4x(poly *vec,
                      uint8_t seed[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)]);
 
-#define poly_uniform_eta MLD_NAMESPACE(poly_uniform_eta)
+
+#define poly_uniform_eta_4x MLD_NAMESPACE(poly_uniform_eta_4x)
 /*************************************************
  * Name:        poly_uniform_eta
  *
- * Description: Sample polynomial with uniformly random coefficients
+ * Description: Sample four polynomials with uniformly random coefficients
  *              in [-MLDSA_ETA,MLDSA_ETA] by performing rejection sampling on
- *              the output stream from SHAKE256(seed|nonce)
+ *              the output stream from SHAKE256(seed|nonce_i)
  *
- * Arguments:   - poly *a: pointer to output polynomial
+ * Arguments:   - poly *r0: pointer to first output polynomial
+ *              - poly *r1: pointer to second output polynomial
+ *              - poly *r2: pointer to third output polynomial
+ *              - poly *r3: pointer to fourth output polynomial
  *              - const uint8_t seed[]: byte array with seed of length
  *                MLDSA_CRHBYTES
- *              - uint16_t nonce: 2-byte nonce
+ *              - uint8_t nonce0: first nonce
+ *              - uint8_t nonce1: second nonce
+ *              - uint8_t nonce2: third nonce
+ *              - uint8_t nonce3: fourth nonce
  **************************************************/
-void poly_uniform_eta(poly *a, const uint8_t seed[MLDSA_CRHBYTES],
-                      uint16_t nonce)
-__contract__(
-  requires(memory_no_alias(a, sizeof(poly)))
-  requires(memory_no_alias(seed, MLDSA_CRHBYTES))
-  assigns(memory_slice(a, sizeof(poly)))
-  ensures(array_abs_bound(a->coeffs, 0, MLDSA_N, MLDSA_ETA + 1))
-);
+void poly_uniform_eta_4x(poly *r0, poly *r1, poly *r2, poly *r3,
+                         const uint8_t seed[MLDSA_CRHBYTES], uint8_t nonce0,
+                         uint8_t nonce1, uint8_t nonce2, uint8_t nonce3);
 
 #define poly_uniform_gamma1 MLD_NAMESPACE(poly_uniform_gamma1)
 /*************************************************
 
@@ -82,31 +82,6 @@ void polyvec_matrix_pointwise_montgomery(polyveck *t,
 /**************************************************************/
 /************ Vectors of polynomials of length MLDSA_L **************/
 /**************************************************************/
-
-void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[MLDSA_CRHBYTES],
-                          uint16_t nonce)
-{
-  unsigned int i;
-  uint16_t n = nonce;
-
-  for (i = 0; i < MLDSA_L; ++i)
-  __loop__(
-    assigns(i, n, object_whole(v))
-    invariant(i <= MLDSA_L)
-    invariant(n == nonce + i) 
-    invariant(forall(k1, 0, i,
-      array_abs_bound(v->vec[k1].coeffs, 0, MLDSA_N, MLDSA_ETA + 1))))
-  {
-    poly t;
-    poly_uniform_eta(&t, seed, n);
-    n++;
-    /* Full struct assignment from local variables to simplify proof */
-    /* TODO: eliminate once CBMC resolves
-     * https://github.com/diffblue/cbmc/issues/8617 */
-    v->vec[i] = t;
-  }
-}
-
 void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[MLDSA_CRHBYTES],
                              uint16_t nonce)
 {
@@ -238,30 +213,6 @@ int polyvecl_chknorm(const polyvecl *v, int32_t bound)
 /************ Vectors of polynomials of length MLDSA_K **************/
 /**************************************************************/
 
-void polyveck_uniform_eta(polyveck *v, const uint8_t seed[MLDSA_CRHBYTES],
-                          uint16_t nonce)
-{
-  unsigned int i;
-  uint16_t n = nonce;
-
-  for (i = 0; i < MLDSA_K; ++i)
-  __loop__(
-    assigns(i, n, object_whole(v))
-    invariant(i <= MLDSA_K)
-    invariant(n == nonce + i) 
-    invariant(forall(k1, 0, i,
-      array_abs_bound(v->vec[k1].coeffs, 0, MLDSA_N, MLDSA_ETA + 1))))
-  {
-    poly t;
-    poly_uniform_eta(&t, seed, n);
-    n++;
-    /* Full struct assignment from local variables to simplify proof */
-    /* TODO: eliminate once CBMC resolves
-     * https://github.com/diffblue/cbmc/issues/8617 */
-    v->vec[i] = t;
-  }
-}
-
 void polyveck_reduce(polyveck *v)
 {
   unsigned int i;
 
@@ -16,31 +16,6 @@ typedef struct
   poly vec[MLDSA_L];
 } polyvecl;
 
-#define polyvecl_uniform_eta MLD_NAMESPACE(polyvecl_uniform_eta)
-/*************************************************
- * Name:        polyvecl_uniform_eta
- *
- * Description: Sample vector of polynomials with uniformly random coefficients
- *              in [-MLDSA_ETA, MLDSA_ETA] by performing rejection sampling on
- *              the output stream from SHAKE256(seed|nonce). The vector has
- *              MLDSA_L polynomials.
- *
- * Arguments:   - polyvecl *v: pointer to output polynomial vector
- *              - const uint8_t seed[]: byte array with seed of length
- *                MLDSA_CRHBYTES
- *              - uint16_t nonce: 2-byte nonce
- **************************************************/
-void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[MLDSA_CRHBYTES],
-                          uint16_t nonce)
-__contract__(
-  requires(memory_no_alias(v, sizeof(polyvecl)))
-  requires(memory_no_alias(seed, MLDSA_CRHBYTES))
-  requires(nonce <= UINT16_MAX - MLDSA_L)
-  assigns(memory_slice(v, sizeof(polyvecl)))
-  ensures(forall(k0, 0, MLDSA_L,
-    array_abs_bound(v->vec[k0].coeffs, 0, MLDSA_N, MLDSA_ETA + 1)))
-);
-
 #define polyvecl_uniform_gamma1 MLD_NAMESPACE(polyvecl_uniform_gamma1)
 void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[MLDSA_CRHBYTES],
                              uint16_t nonce);
@@ -185,31 +160,6 @@ typedef struct
   poly vec[MLDSA_K];
 } polyveck;
 
-#define polyveck_uniform_eta MLD_NAMESPACE(polyveck_uniform_eta)
-/*************************************************
- * Name:        polyveck_uniform_eta
- *
- * Description: Sample vector of polynomials with uniformly random coefficients
- *              in [-MLDSA_ETA, MLDSA_ETA] by performing rejection sampling on
- *              the output stream from SHAKE256(seed|nonce). The vector has
- *              MLDSA_K polynomials.
- *
- * Arguments:   - polyveck *v: pointer to output polynomial vector
- *              - const uint8_t seed[]: byte array with seed of length
- *                MLDSA_CRHBYTES
- *              - uint16_t nonce: 2-byte nonce
- **************************************************/
-void polyveck_uniform_eta(polyveck *v, const uint8_t seed[MLDSA_CRHBYTES],
-                          uint16_t nonce)
-__contract__(
-  requires(memory_no_alias(v, sizeof(polyveck)))
-  requires(memory_no_alias(seed, MLDSA_CRHBYTES))
-  requires(nonce <= UINT16_MAX - MLDSA_K)
-  assigns(memory_slice(v, sizeof(polyveck)))
-  ensures(forall(k0, 0, MLDSA_K,
-  array_abs_bound(v->vec[k0].coeffs, 0, MLDSA_N, MLDSA_ETA + 1)))
-);
-
 #define polyveck_reduce MLD_NAMESPACE(polyveck_reduce)
 /*************************************************
  * Name:        polyveck_reduce