Skip to content

Add generalized 2- and 3-layer merging of the forward NTT #784

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
233 changes: 156 additions & 77 deletions mlkem/poly.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,68 +293,17 @@ void mlk_poly_mulcache_compute(mlk_poly_mulcache *x, const mlk_poly *a)
#endif /* MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE */

#if !defined(MLK_USE_NATIVE_NTT)
/*
* Computes a block CT butterflies with a fixed twiddle factor,
* using Montgomery multiplication.
* Parameters:
* - r: Pointer to base of polynomial (_not_ the base of butterfly block)
* - root: Twiddle factor to use for the butterfly. This must be in
* Montgomery form and signed canonical.
* - start: Offset to the beginning of the butterfly block
* - len: Index difference between coefficients subject to a butterfly
* - bound: Ghost variable describing coefficient bound: Prior to `start`,
* coefficients must be bound by `bound + MLKEM_Q`. Post `start`,
* they must be bound by `bound`.
* When this function returns, output coefficients in the index range
* [start, start+2*len) have bound bumped to `bound + MLKEM_Q`.
* Example:
* - start=8, len=4
* This would compute the following four butterflies
* 8 -- 12
* 9 -- 13
* 10 -- 14
* 11 -- 15
* - start=4, len=2
* This would compute the following two butterflies
* 4 -- 6
* 5 -- 7
*/

/* Reference: Embedded in `ntt()` in the reference implementation @[REF]. */
static void mlk_ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta,
unsigned start, unsigned len, int bound)
__contract__(
requires(start < MLKEM_N)
requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
requires(-MLKEM_Q_HALF < zeta && zeta < MLKEM_Q_HALF)
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
requires(array_abs_bound(r, start, MLKEM_N, bound))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
/* Reference: Embedded in `ntt()` in the reference implementation. */
static MLK_INLINE void mlk_ct_butterfly(int16_t r[MLKEM_N],
const unsigned coeff1_index,
const unsigned coeff2_index,
const int16_t zeta)
{
/* `bound` is a ghost variable only needed in the CBMC specification */
unsigned j;
((void)bound);
for (j = start; j < start + len; j++)
__loop__(
invariant(start <= j && j <= start + len)
/*
* Coefficients are updated in strided pairs, so the bounds for the
* intermediate states alternate twice between the old and new bound
*/
invariant(array_abs_bound(r, 0, j, bound + MLKEM_Q))
invariant(array_abs_bound(r, j, start + len, bound))
invariant(array_abs_bound(r, start + len, j + len, bound + MLKEM_Q))
invariant(array_abs_bound(r, j + len, MLKEM_N, bound)))
{
int16_t t;
t = mlk_fqmul(r[j + len], zeta);
r[j + len] = r[j] - t;
r[j] = r[j] + t;
}
int16_t t1 = r[coeff1_index];
int16_t t2 = mlk_fqmul(r[coeff2_index], zeta);
r[coeff1_index] = t1 + t2;
r[coeff2_index] = t1 - t2;
}

/*
Expand All @@ -364,28 +313,162 @@ __contract__(
* - layer: Variable indicating which layer is being applied.
*/

/* Reference: Embedded in `ntt()` in the reference implementation @[REF]. */
static void mlk_ntt_layer(int16_t r[MLKEM_N], unsigned layer)
/* Reference: Embedded in `ntt()` in the reference implementation [@REF]. */
static void mlk_ntt_1_layer(int16_t r[MLKEM_N], unsigned layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(1 <= layer && layer <= 7)
requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q)))
{
unsigned start, k, len;
/* Twiddle factors for layer n are at indices 2^(n-1)..2^n-1. */
k = 1u << (layer - 1);
len = MLKEM_N >> layer;
const unsigned len = MLKEM_N >> layer;
unsigned start, k;
/* Twiddle factors for layer n start at index 2 ** (layer-1) */
k = 1 << (layer - 1);
for (start = 0; start < MLKEM_N; start += 2 * len)
__loop__(
invariant(start < MLKEM_N + 2 * len)
invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q))
invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q)))
invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))
)
{
int16_t zeta = zetas[k++];
mlk_ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q);
const int16_t zeta = zetas[k++];
unsigned j;
for (j = 0; j < len; j++)
__loop__(
invariant(j <= len)
invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q))
invariant(array_abs_bound(r, start, start + j, layer * MLKEM_Q + MLKEM_Q))
invariant(array_abs_bound(r, start + j, start + len, layer * MLKEM_Q))
invariant(array_abs_bound(r, start + len, start + len + j, layer * MLKEM_Q + MLKEM_Q))
invariant(array_abs_bound(r, start + len + j, start + 2 * len, layer * MLKEM_Q))
invariant(array_abs_bound(r, start + 2 * len, MLKEM_N, layer * MLKEM_Q))
)
{
mlk_ct_butterfly(r, start + j, start + len + j, zeta);
}
}
}

static void mlk_ntt_2_layers(int16_t r[MLKEM_N], unsigned layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(1 <= layer && layer <= 6)
requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 2) * MLKEM_Q)))
{
const unsigned len = MLKEM_N >> layer;
unsigned start, k;
/* Twiddle factors for layer n start at index 2 ** (layer-1) */
k = 1 << (layer - 1);
for (start = 0; start < MLKEM_N; start += 2 * len)
__loop__(
invariant(start < MLKEM_N + 2 * len)
invariant(len % 2 == 0)
invariant(len == 4 || len == 8 || len == 16 || len == 32 || len == 64 || len == 128)
invariant(k <= MLKEM_N / 2)
invariant(2 * len * k == start + MLKEM_N)
invariant(array_abs_bound(r, 0, start, (layer + 2) * MLKEM_Q))
invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))
)
{
unsigned j;
const int16_t this_layer_zeta = zetas[k];
const int16_t next_layer_zeta1 = zetas[k * 2];
const int16_t next_layer_zeta2 = zetas[k * 2 + 1];
k++;

for (j = 0; j < len / 2; j++)
__loop__(
invariant(j <= len / 2)
invariant(len % 2 == 0)
invariant(len == 4 || len == 8 || len == 16 || len == 32 || len == 64 || len == 128)

invariant(array_abs_bound(r, 0, start, (layer + 2) * MLKEM_Q))

invariant(array_abs_bound(r, start, start + j, (layer + 2) * MLKEM_Q))
invariant(array_abs_bound(r, start + j, start + len / 2, layer * MLKEM_Q))

invariant(array_abs_bound(r, start + len / 2, start + len / 2 + j, (layer + 2) * MLKEM_Q))
invariant(array_abs_bound(r, start + len / 2 + j, start + len, layer * MLKEM_Q))

invariant(array_abs_bound(r, start + len, start + len + j, (layer + 2) * MLKEM_Q))
invariant(array_abs_bound(r, start + len + j, start + len + len / 2, layer * MLKEM_Q))

invariant(array_abs_bound(r, start + len + len / 2, start + len + len / 2 + j, (layer + 2) * MLKEM_Q))
invariant(array_abs_bound(r, start + len + len / 2 + j, start + 2 * len, layer * MLKEM_Q))

invariant(array_abs_bound(r, start + 2 * len, MLKEM_N, layer * MLKEM_Q))
)
{
const unsigned ci0 = j + start;
const unsigned ci1 = ci0 + len / 2;
const unsigned ci2 = ci1 + len / 2;
const unsigned ci3 = ci2 + len / 2;

mlk_ct_butterfly(r, ci0, ci2, this_layer_zeta);
mlk_ct_butterfly(r, ci1, ci3, this_layer_zeta);
mlk_ct_butterfly(r, ci0, ci1, next_layer_zeta1);
mlk_ct_butterfly(r, ci2, ci3, next_layer_zeta2);
}
}
}

static void mlk_ntt_3_layers(int16_t r[MLKEM_N], unsigned layer)
__contract__(
requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
requires(1 <= layer && layer <= 5)
requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 3) * MLKEM_Q)))
{
const unsigned len = MLKEM_N >> layer;
unsigned start, k;
/* Twiddle factors for layer n start at index 2 ** (layer-1) */
k = 1 << (layer - 1);
for (start = 0; start < MLKEM_N; start += 2 * len)
{
unsigned j;
const int16_t first_layer_zeta = zetas[k];
const unsigned second_layer_zi1 = k * 2;
const unsigned second_layer_zi2 = k * 2 + 1;
const int16_t second_layer_zeta1 = zetas[second_layer_zi1];
const int16_t second_layer_zeta2 = zetas[second_layer_zi2];
const int16_t third_layer_zeta1 = zetas[second_layer_zi1 * 2];
const int16_t third_layer_zeta2 = zetas[second_layer_zi1 * 2 + 1];
const int16_t third_layer_zeta3 = zetas[second_layer_zi2 * 2];
const int16_t third_layer_zeta4 = zetas[second_layer_zi2 * 2 + 1];
k++;

for (j = 0; j < len / 4; j++)
{
const unsigned ci0 = j + start;
const unsigned ci1 = ci0 + len / 4;
const unsigned ci2 = ci1 + len / 4;
const unsigned ci3 = ci2 + len / 4;
const unsigned ci4 = ci3 + len / 4;
const unsigned ci5 = ci4 + len / 4;
const unsigned ci6 = ci5 + len / 4;
const unsigned ci7 = ci6 + len / 4;

mlk_ct_butterfly(r, ci0, ci4, first_layer_zeta);
mlk_ct_butterfly(r, ci1, ci5, first_layer_zeta);
mlk_ct_butterfly(r, ci2, ci6, first_layer_zeta);
mlk_ct_butterfly(r, ci3, ci7, first_layer_zeta);

mlk_ct_butterfly(r, ci0, ci2, second_layer_zeta1);
mlk_ct_butterfly(r, ci1, ci3, second_layer_zeta1);
mlk_ct_butterfly(r, ci4, ci6, second_layer_zeta2);
mlk_ct_butterfly(r, ci5, ci7, second_layer_zeta2);

mlk_ct_butterfly(r, ci0, ci1, third_layer_zeta1);
mlk_ct_butterfly(r, ci2, ci3, third_layer_zeta2);
mlk_ct_butterfly(r, ci4, ci5, third_layer_zeta3);
mlk_ct_butterfly(r, ci6, ci7, third_layer_zeta4);
}
}
}

Expand All @@ -404,18 +487,14 @@ __contract__(
MLK_INTERNAL_API
void mlk_poly_ntt(mlk_poly *p)
{
unsigned layer;
int16_t *r;
mlk_assert_abs_bound(p, MLKEM_N, MLKEM_Q);
r = p->coeffs;

for (layer = 1; layer <= 7; layer++)
__loop__(
invariant(1 <= layer && layer <= 8)
invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)))
{
mlk_ntt_layer(r, layer);
}
mlk_ntt_3_layers(r, 1);
mlk_ntt_2_layers(r, 4);
mlk_ntt_1_layer(r, 6);
mlk_ntt_1_layer(r, 7);

/* Check the stronger bound */
mlk_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND);
Expand Down
54 changes: 54 additions & 0 deletions proofs/cbmc/ntt_1_layer/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# SPDX-License-Identifier: Apache-2.0

include ../Makefile_params.common

HARNESS_ENTRY = harness
HARNESS_FILE = ntt_1_layer_harness

# This should be a unique identifier for this proof, and will appear on the
# Litani dashboard. It can be human-readable and contain spaces if you wish.
PROOF_UID = mlk_ntt_1_layer

DEFINES +=
INCLUDES +=

REMOVE_FUNCTION_BODY +=
UNWINDSET +=

PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
PROJECT_SOURCES += $(SRCDIR)/mlkem/poly.c

CHECK_FUNCTION_CONTRACTS=mlk_ntt_1_layer
USE_FUNCTION_CONTRACTS=
APPLY_LOOP_CONTRACTS=on
USE_DYNAMIC_FRAMES=1

# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
EXTERNAL_SAT_SOLVER=
CBMCFLAGS=--bitwuzla

FUNCTION_NAME = mlk_ntt_1_layer

# If this proof is found to consume huge amounts of RAM, you can set the
# EXPENSIVE variable. With new enough versions of the proof tools, this will
# restrict the number of EXPENSIVE CBMC jobs running at once. See the
# documentation in Makefile.common under the "Job Pools" heading for details.
# EXPENSIVE = true

# This function is large enough to need...
CBMC_OBJECT_BITS = 8

# If you require access to a file-local ("static") function or object to conduct
# your proof, set the following (and do not include the original source file
# ("mlkem/poly.c") in PROJECT_SOURCES).
# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
# include ../Makefile.common
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
# be set before including Makefile.common, but any use of variables on the
# left-hand side requires those variables to be defined. Hence, _SOURCE,
# _FUNCTIONS, _OBJECTS is set after including Makefile.common.

include ../Makefile.common
15 changes: 15 additions & 0 deletions proofs/cbmc/ntt_1_layer/ntt_1_layer_harness.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright (c) 2024-2025 The mlkem-native project authors

Check failure on line 1 in proofs/cbmc/ntt_1_layer/ntt_1_layer_harness.c

View workflow job for this annotation

GitHub Actions / Base / Linting (ubuntu-latest)

Missing copyright header error

proofs/cbmc/ntt_1_layer/ntt_1_layer_harness.c is missing copyright header

Check failure on line 1 in proofs/cbmc/ntt_1_layer/ntt_1_layer_harness.c

View workflow job for this annotation

GitHub Actions / Base / Linting (pqcp-arm64)

Missing copyright header error

proofs/cbmc/ntt_1_layer/ntt_1_layer_harness.c is missing copyright header
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT-0

#include "poly.h"


void mlk_ntt_1_layer(int16_t *p, unsigned layer);

void harness(void)
{
int16_t *a;
unsigned layer;
mlk_ntt_1_layer(a, layer);
}
54 changes: 54 additions & 0 deletions proofs/cbmc/ntt_2_layers/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# SPDX-License-Identifier: Apache-2.0

include ../Makefile_params.common

HARNESS_ENTRY = harness
HARNESS_FILE = ntt_2_layers_harness

# This should be a unique identifier for this proof, and will appear on the
# Litani dashboard. It can be human-readable and contain spaces if you wish.
PROOF_UID = mlk_ntt_2_layers

DEFINES +=
INCLUDES +=

REMOVE_FUNCTION_BODY +=
UNWINDSET +=

PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
PROJECT_SOURCES += $(SRCDIR)/mlkem/poly.c

CHECK_FUNCTION_CONTRACTS=mlk_ntt_2_layers
USE_FUNCTION_CONTRACTS=
APPLY_LOOP_CONTRACTS=on
USE_DYNAMIC_FRAMES=1

# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
EXTERNAL_SAT_SOLVER=
CBMCFLAGS=--smt2

FUNCTION_NAME = mlk_ntt_2_layers

# If this proof is found to consume huge amounts of RAM, you can set the
# EXPENSIVE variable. With new enough versions of the proof tools, this will
# restrict the number of EXPENSIVE CBMC jobs running at once. See the
# documentation in Makefile.common under the "Job Pools" heading for details.
# EXPENSIVE = true

# This function is large enough to need...
CBMC_OBJECT_BITS = 8

# If you require access to a file-local ("static") function or object to conduct
# your proof, set the following (and do not include the original source file
# ("mlkem/poly.c") in PROJECT_SOURCES).
# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
# include ../Makefile.common
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
# be set before including Makefile.common, but any use of variables on the
# left-hand side requires those variables to be defined. Hence, _SOURCE,
# _FUNCTIONS, _OBJECTS is set after including Makefile.common.

include ../Makefile.common
Loading
Loading