Skip to content

Commit 4acc22a

Browse files
authored
Merge pull request #245 from pq-code-package/poly_uniform_eta_4x
Sample `s1`, `s2` vectors using 4-way batched Keccak
2 parents dec77c3 + 049e603 commit 4acc22a

File tree

14 files changed

+178
-365
lines changed

14 files changed

+178
-365
lines changed

mldsa/fips202/fips202x4.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
#include "fips202x4.h"
1212
#include "keccakf1600.h"
1313

14-
typedef mld_shake128x4ctx mld_shake256x4_ctx;
15-
1614
static void mld_keccak_absorb_once_x4(uint64_t *s, uint32_t r,
1715
const uint8_t *in0, const uint8_t *in1,
1816
const uint8_t *in2, const uint8_t *in3,
@@ -130,3 +128,30 @@ void mld_shake128x4_release(mld_shake128x4ctx *state)
130128
(void)state;
131129
/*mld_zeroize(state, sizeof(mld_shake128x4ctx));*/
132130
}
131+
132+
133+
void mld_shake256x4_absorb_once(mld_shake256x4ctx *state, const uint8_t *in0,
134+
const uint8_t *in1, const uint8_t *in2,
135+
const uint8_t *in3, size_t inlen)
136+
{
137+
memset(state, 0, sizeof(mld_shake256x4ctx));
138+
mld_keccak_absorb_once_x4(state->ctx, SHAKE256_RATE, in0, in1, in2, in3,
139+
inlen, 0x1F);
140+
}
141+
142+
void mld_shake256x4_squeezeblocks(uint8_t *out0, uint8_t *out1, uint8_t *out2,
143+
uint8_t *out3, size_t nblocks,
144+
mld_shake256x4ctx *state)
145+
{
146+
mld_keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx,
147+
SHAKE256_RATE);
148+
}
149+
150+
void mld_shake256x4_init(mld_shake256x4ctx *state) { (void)state; }
151+
void mld_shake256x4_release(mld_shake256x4ctx *state)
152+
{
153+
/* Specification: Partially implements
154+
* @[FIPS203, Section 3.3, Destruction of intermediate values] */
155+
(void)state;
156+
/*mld_zeroize(state, sizeof(mld_shake256x4ctx));*/
157+
}

mldsa/fips202/fips202x4.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ typedef struct
2121
uint64_t ctx[MLD_KECCAK_LANES * MLD_KECCAK_WAY];
2222
} mld_shake128x4ctx;
2323

24+
typedef struct
25+
{
26+
uint64_t ctx[MLD_KECCAK_LANES * MLD_KECCAK_WAY];
27+
} mld_shake256x4ctx;
28+
2429
#define mld_shake128x4_absorb_once FIPS202_NAMESPACE(shake128x4_absorb_once)
2530
void mld_shake128x4_absorb_once(mld_shake128x4ctx *state, const uint8_t *in0,
2631
const uint8_t *in1, const uint8_t *in2,
@@ -58,4 +63,22 @@ void mld_shake128x4_init(mld_shake128x4ctx *state);
5863
#define mld_shake128x4_release FIPS202_NAMESPACE(shake128x4_release)
5964
void mld_shake128x4_release(mld_shake128x4ctx *state);
6065

66+
67+
#define mld_shake256x4_absorb_once FIPS202_NAMESPACE(shake256x4_absorb_once)
68+
void mld_shake256x4_absorb_once(mld_shake256x4ctx *state, const uint8_t *in0,
69+
const uint8_t *in1, const uint8_t *in2,
70+
const uint8_t *in3, size_t inlen);
71+
72+
#define mld_shake256x4_squeezeblocks FIPS202_NAMESPACE(shake256x4_squeezeblocks)
73+
void mld_shake256x4_squeezeblocks(uint8_t *out0, uint8_t *out1, uint8_t *out2,
74+
uint8_t *out3, size_t nblocks,
75+
mld_shake256x4ctx *state);
76+
77+
#define mld_shake256x4_init FIPS202_NAMESPACE(shake256x4_init)
78+
void mld_shake256x4_init(mld_shake256x4ctx *state);
79+
80+
#define mld_shake256x4_release FIPS202_NAMESPACE(shake256x4_release)
81+
void mld_shake256x4_release(mld_shake256x4ctx *state);
82+
83+
6184
#endif /* !MLD_FIPS202_FIPS202X4_H */

mldsa/poly.c

Lines changed: 65 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -332,11 +332,11 @@ void poly_uniform(poly *a, const uint8_t seed[MLDSA_SEEDBYTES + 2])
332332
unsigned int ctr;
333333
unsigned int buflen = POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES;
334334
MLD_ALIGN uint8_t buf[POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES];
335-
mld_xof_ctx state;
335+
mld_xof128_ctx state;
336336

337-
mld_xof_init(&state);
338-
mld_xof_absorb(&state, seed, MLDSA_SEEDBYTES + 2);
339-
mld_xof_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state);
337+
mld_xof128_init(&state);
338+
mld_xof128_absorb(&state, seed, MLDSA_SEEDBYTES + 2);
339+
mld_xof128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state);
340340

341341
ctr = rej_uniform(a->coeffs, MLDSA_N, 0, buf, buflen);
342342
buflen = STREAM128_BLOCKBYTES;
@@ -347,7 +347,7 @@ void poly_uniform(poly *a, const uint8_t seed[MLDSA_SEEDBYTES + 2])
347347
invariant((&state)->pos <= SHAKE128_RATE)
348348
invariant(array_bound(a->coeffs, 0, ctr, 0, MLDSA_Q)))
349349
{
350-
mld_xof_squeezeblocks(buf, 1, &state);
350+
mld_xof128_squeezeblocks(buf, 1, &state);
351351
ctr = rej_uniform(a->coeffs, MLDSA_N, ctr, buf, buflen);
352352
}
353353
}
@@ -361,20 +361,20 @@ void poly_uniform_4x(poly *vec,
361361

362362
/* Tracks the number of coefficients we have already sampled */
363363
unsigned ctr[4];
364-
mld_xof_x4_ctx state;
364+
mld_xof128_x4_ctx state;
365365
unsigned buflen;
366366

367367

368-
mld_xof_x4_init(&state);
369-
mld_xof_x4_absorb(&state, seed, MLDSA_SEEDBYTES + 2);
368+
mld_xof128_x4_init(&state);
369+
mld_xof128_x4_absorb(&state, seed, MLDSA_SEEDBYTES + 2);
370370

371371
/*
372372
* Initially, squeeze heuristic number of POLY_UNIFORM_NBLOCKS.
373373
* This should generate the matrix entries with high probability.
374374
*/
375375

376376

377-
mld_xof_x4_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state);
377+
mld_xof128_x4_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state);
378378
buflen = POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES;
379379
ctr[0] = rej_uniform(vec[0].coeffs, MLDSA_N, 0, buf[0], buflen);
380380
ctr[1] = rej_uniform(vec[1].coeffs, MLDSA_N, 0, buf[1], buflen);
@@ -390,13 +390,13 @@ void poly_uniform_4x(poly *vec,
390390
while (ctr[0] < MLDSA_N || ctr[1] < MLDSA_N || ctr[2] < MLDSA_N ||
391391
ctr[3] < MLDSA_N)
392392
{
393-
mld_xof_x4_squeezeblocks(buf, 1, &state);
393+
mld_xof128_x4_squeezeblocks(buf, 1, &state);
394394
ctr[0] = rej_uniform(vec[0].coeffs, MLDSA_N, ctr[0], buf[0], buflen);
395395
ctr[1] = rej_uniform(vec[1].coeffs, MLDSA_N, ctr[1], buf[1], buflen);
396396
ctr[2] = rej_uniform(vec[2].coeffs, MLDSA_N, ctr[2], buf[2], buflen);
397397
ctr[3] = rej_uniform(vec[3].coeffs, MLDSA_N, ctr[3], buf[3], buflen);
398398
}
399-
mld_xof_x4_release(&state);
399+
mld_xof128_x4_release(&state);
400400
}
401401

402402
/*************************************************
@@ -486,31 +486,68 @@ __contract__(
486486
return ctr;
487487
}
488488

489-
void poly_uniform_eta(poly *a, const uint8_t seed[MLDSA_CRHBYTES],
490-
uint16_t nonce)
489+
void poly_uniform_eta_4x(poly *r0, poly *r1, poly *r2, poly *r3,
490+
const uint8_t seed[MLDSA_CRHBYTES], uint8_t nonce0,
491+
uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
491492
{
492-
unsigned int ctr;
493-
unsigned int buflen = POLY_UNIFORM_ETA_NBLOCKS * STREAM256_BLOCKBYTES;
494-
MLD_ALIGN uint8_t buf[POLY_UNIFORM_ETA_NBLOCKS * STREAM256_BLOCKBYTES];
495-
stream256_state state;
493+
/* Temporary buffers for XOF output before rejection sampling */
494+
MLD_ALIGN uint8_t
495+
buf[4][MLD_ALIGN_UP(POLY_UNIFORM_ETA_NBLOCKS * STREAM256_BLOCKBYTES)];
496496

497-
stream256_init(&state, seed, nonce);
498-
stream256_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state);
497+
MLD_ALIGN uint8_t extseed[4][MLD_ALIGN_UP(MLDSA_CRHBYTES + 2)];
498+
499+
/* Tracks the number of coefficients we have already sampled */
500+
unsigned ctr[4];
501+
mld_xof256_x4_ctx state;
502+
unsigned buflen;
503+
504+
memcpy(extseed[0], seed, MLDSA_CRHBYTES);
505+
memcpy(extseed[1], seed, MLDSA_CRHBYTES);
506+
memcpy(extseed[2], seed, MLDSA_CRHBYTES);
507+
memcpy(extseed[3], seed, MLDSA_CRHBYTES);
508+
extseed[0][MLDSA_CRHBYTES] = nonce0;
509+
extseed[1][MLDSA_CRHBYTES] = nonce1;
510+
extseed[2][MLDSA_CRHBYTES] = nonce2;
511+
extseed[3][MLDSA_CRHBYTES] = nonce3;
512+
extseed[0][MLDSA_CRHBYTES + 1] = 0;
513+
extseed[1][MLDSA_CRHBYTES + 1] = 0;
514+
extseed[2][MLDSA_CRHBYTES + 1] = 0;
515+
extseed[3][MLDSA_CRHBYTES + 1] = 0;
516+
517+
mld_xof256_x4_init(&state);
518+
mld_xof256_x4_absorb(&state, extseed, MLDSA_CRHBYTES + 2);
519+
520+
/*
521+
* Initially, squeeze heuristic number of POLY_UNIFORM_ETA_NBLOCKS.
522+
* This should generate the coefficients with high probability.
523+
*/
524+
mld_xof256_x4_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state);
525+
buflen = POLY_UNIFORM_ETA_NBLOCKS * STREAM256_BLOCKBYTES;
499526

500-
ctr = rej_eta(a->coeffs, MLDSA_N, 0, buf, buflen);
527+
ctr[0] = rej_eta(r0->coeffs, MLDSA_N, 0, buf[0], buflen);
528+
ctr[1] = rej_eta(r1->coeffs, MLDSA_N, 0, buf[1], buflen);
529+
ctr[2] = rej_eta(r2->coeffs, MLDSA_N, 0, buf[2], buflen);
530+
ctr[3] = rej_eta(r3->coeffs, MLDSA_N, 0, buf[3], buflen);
531+
532+
/*
533+
* So long as not all entries have been generated, squeeze
534+
* one more block a time until we're done.
535+
*/
501536
buflen = STREAM256_BLOCKBYTES;
502-
while (ctr < MLDSA_N)
503-
__loop__(
504-
assigns(ctr, state, memory_slice(a, sizeof(poly)), object_whole(buf))
505-
invariant(ctr <= MLDSA_N)
506-
invariant((state).pos <= SHAKE256_RATE)
507-
invariant(array_abs_bound(a->coeffs, 0, ctr, MLDSA_ETA + 1)))
537+
while (ctr[0] < MLDSA_N || ctr[1] < MLDSA_N || ctr[2] < MLDSA_N ||
538+
ctr[3] < MLDSA_N)
508539
{
509-
stream256_squeezeblocks(buf, 1, &state);
510-
ctr = rej_eta(a->coeffs, MLDSA_N, ctr, buf, buflen);
540+
mld_xof256_x4_squeezeblocks(buf, 1, &state);
541+
ctr[0] = rej_eta(r0->coeffs, MLDSA_N, ctr[0], buf[0], buflen);
542+
ctr[1] = rej_eta(r1->coeffs, MLDSA_N, ctr[1], buf[1], buflen);
543+
ctr[2] = rej_eta(r2->coeffs, MLDSA_N, ctr[2], buf[2], buflen);
544+
ctr[3] = rej_eta(r3->coeffs, MLDSA_N, ctr[3], buf[3], buflen);
511545
}
546+
547+
mld_xof256_x4_release(&state);
512548
}
513549

550+
514551
#define POLY_UNIFORM_GAMMA1_NBLOCKS \
515552
((MLDSA_POLYZ_PACKEDBYTES + STREAM256_BLOCKBYTES - 1) / STREAM256_BLOCKBYTES)
516553
void poly_uniform_gamma1(poly *a, const uint8_t seed[MLDSA_CRHBYTES],

mldsa/poly.h

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -320,27 +320,29 @@ __contract__(
320320
void poly_uniform_4x(poly *vec,
321321
uint8_t seed[4][MLD_ALIGN_UP(MLDSA_SEEDBYTES + 2)]);
322322

323-
#define poly_uniform_eta MLD_NAMESPACE(poly_uniform_eta)
323+
324+
#define poly_uniform_eta_4x MLD_NAMESPACE(poly_uniform_eta_4x)
324325
/*************************************************
325326
* Name: poly_uniform_eta
326327
*
327-
* Description: Sample polynomial with uniformly random coefficients
328+
* Description: Sample four polynomials with uniformly random coefficients
328329
* in [-MLDSA_ETA,MLDSA_ETA] by performing rejection sampling on
329-
* the output stream from SHAKE256(seed|nonce)
330+
* the output stream from SHAKE256(seed|nonce_i)
330331
*
331-
* Arguments: - poly *a: pointer to output polynomial
332+
* Arguments: - poly *r0: pointer to first output polynomial
333+
* - poly *r1: pointer to second output polynomial
334+
* - poly *r2: pointer to third output polynomial
335+
* - poly *r3: pointer to fourth output polynomial
332336
* - const uint8_t seed[]: byte array with seed of length
333337
* MLDSA_CRHBYTES
334-
* - uint16_t nonce: 2-byte nonce
338+
* - uint8_t nonce0: first nonce
339+
* - uint8_t nonce1: second nonce
340+
* - uint8_t nonce2: third nonce
341+
* - uint8_t nonce3: fourth nonce
335342
**************************************************/
336-
void poly_uniform_eta(poly *a, const uint8_t seed[MLDSA_CRHBYTES],
337-
uint16_t nonce)
338-
__contract__(
339-
requires(memory_no_alias(a, sizeof(poly)))
340-
requires(memory_no_alias(seed, MLDSA_CRHBYTES))
341-
assigns(memory_slice(a, sizeof(poly)))
342-
ensures(array_abs_bound(a->coeffs, 0, MLDSA_N, MLDSA_ETA + 1))
343-
);
343+
void poly_uniform_eta_4x(poly *r0, poly *r1, poly *r2, poly *r3,
344+
const uint8_t seed[MLDSA_CRHBYTES], uint8_t nonce0,
345+
uint8_t nonce1, uint8_t nonce2, uint8_t nonce3);
344346

345347
#define poly_uniform_gamma1 MLD_NAMESPACE(poly_uniform_gamma1)
346348
/*************************************************

mldsa/polyvec.c

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -82,31 +82,6 @@ void polyvec_matrix_pointwise_montgomery(polyveck *t,
8282
/**************************************************************/
8383
/************ Vectors of polynomials of length MLDSA_L **************/
8484
/**************************************************************/
85-
86-
void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[MLDSA_CRHBYTES],
87-
uint16_t nonce)
88-
{
89-
unsigned int i;
90-
uint16_t n = nonce;
91-
92-
for (i = 0; i < MLDSA_L; ++i)
93-
__loop__(
94-
assigns(i, n, object_whole(v))
95-
invariant(i <= MLDSA_L)
96-
invariant(n == nonce + i)
97-
invariant(forall(k1, 0, i,
98-
array_abs_bound(v->vec[k1].coeffs, 0, MLDSA_N, MLDSA_ETA + 1))))
99-
{
100-
poly t;
101-
poly_uniform_eta(&t, seed, n);
102-
n++;
103-
/* Full struct assignment from local variables to simplify proof */
104-
/* TODO: eliminate once CBMC resolves
105-
* https://github.com/diffblue/cbmc/issues/8617 */
106-
v->vec[i] = t;
107-
}
108-
}
109-
11085
void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[MLDSA_CRHBYTES],
11186
uint16_t nonce)
11287
{
@@ -238,30 +213,6 @@ int polyvecl_chknorm(const polyvecl *v, int32_t bound)
238213
/************ Vectors of polynomials of length MLDSA_K **************/
239214
/**************************************************************/
240215

241-
void polyveck_uniform_eta(polyveck *v, const uint8_t seed[MLDSA_CRHBYTES],
242-
uint16_t nonce)
243-
{
244-
unsigned int i;
245-
uint16_t n = nonce;
246-
247-
for (i = 0; i < MLDSA_K; ++i)
248-
__loop__(
249-
assigns(i, n, object_whole(v))
250-
invariant(i <= MLDSA_K)
251-
invariant(n == nonce + i)
252-
invariant(forall(k1, 0, i,
253-
array_abs_bound(v->vec[k1].coeffs, 0, MLDSA_N, MLDSA_ETA + 1))))
254-
{
255-
poly t;
256-
poly_uniform_eta(&t, seed, n);
257-
n++;
258-
/* Full struct assignment from local variables to simplify proof */
259-
/* TODO: eliminate once CBMC resolves
260-
* https://github.com/diffblue/cbmc/issues/8617 */
261-
v->vec[i] = t;
262-
}
263-
}
264-
265216
void polyveck_reduce(polyveck *v)
266217
{
267218
unsigned int i;

mldsa/polyvec.h

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,6 @@ typedef struct
1616
poly vec[MLDSA_L];
1717
} polyvecl;
1818

19-
#define polyvecl_uniform_eta MLD_NAMESPACE(polyvecl_uniform_eta)
20-
/*************************************************
21-
* Name: polyvecl_uniform_eta
22-
*
23-
* Description: Sample vector of polynomials with uniformly random coefficients
24-
* in [-MLDSA_ETA, MLDSA_ETA] by performing rejection sampling on
25-
* the output stream from SHAKE256(seed|nonce). The vector has
26-
* MLDSA_L polynomials.
27-
*
28-
* Arguments: - polyvecl *v: pointer to output polynomial vector
29-
* - const uint8_t seed[]: byte array with seed of length
30-
* MLDSA_CRHBYTES
31-
* - uint16_t nonce: 2-byte nonce
32-
**************************************************/
33-
void polyvecl_uniform_eta(polyvecl *v, const uint8_t seed[MLDSA_CRHBYTES],
34-
uint16_t nonce)
35-
__contract__(
36-
requires(memory_no_alias(v, sizeof(polyvecl)))
37-
requires(memory_no_alias(seed, MLDSA_CRHBYTES))
38-
requires(nonce <= UINT16_MAX - MLDSA_L)
39-
assigns(memory_slice(v, sizeof(polyvecl)))
40-
ensures(forall(k0, 0, MLDSA_L,
41-
array_abs_bound(v->vec[k0].coeffs, 0, MLDSA_N, MLDSA_ETA + 1)))
42-
);
43-
4419
#define polyvecl_uniform_gamma1 MLD_NAMESPACE(polyvecl_uniform_gamma1)
4520
void polyvecl_uniform_gamma1(polyvecl *v, const uint8_t seed[MLDSA_CRHBYTES],
4621
uint16_t nonce);
@@ -185,31 +160,6 @@ typedef struct
185160
poly vec[MLDSA_K];
186161
} polyveck;
187162

188-
#define polyveck_uniform_eta MLD_NAMESPACE(polyveck_uniform_eta)
189-
/*************************************************
190-
* Name: polyveck_uniform_eta
191-
*
192-
* Description: Sample vector of polynomials with uniformly random coefficients
193-
* in [-MLDSA_ETA, MLDSA_ETA] by performing rejection sampling on
194-
* the output stream from SHAKE256(seed|nonce). The vector has
195-
* MLDSA_K polynomials.
196-
*
197-
* Arguments: - polyveck *v: pointer to output polynomial vector
198-
* - const uint8_t seed[]: byte array with seed of length
199-
* MLDSA_CRHBYTES
200-
* - uint16_t nonce: 2-byte nonce
201-
**************************************************/
202-
void polyveck_uniform_eta(polyveck *v, const uint8_t seed[MLDSA_CRHBYTES],
203-
uint16_t nonce)
204-
__contract__(
205-
requires(memory_no_alias(v, sizeof(polyveck)))
206-
requires(memory_no_alias(seed, MLDSA_CRHBYTES))
207-
requires(nonce <= UINT16_MAX - MLDSA_K)
208-
assigns(memory_slice(v, sizeof(polyveck)))
209-
ensures(forall(k0, 0, MLDSA_K,
210-
array_abs_bound(v->vec[k0].coeffs, 0, MLDSA_N, MLDSA_ETA + 1)))
211-
);
212-
213163
#define polyveck_reduce MLD_NAMESPACE(polyveck_reduce)
214164
/*************************************************
215165
* Name: polyveck_reduce

0 commit comments

Comments
 (0)