Skip to content

Commit e319849

Browse files
authored
Merge pull request #796 from pq-code-package/remove_opt_clean
Remove opt clean suffixes from AArch64 backend
2 parents 227195e + d50368d commit e319849

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+303
-290
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ undefined behaviour in C, including out of bounds memory accesses and integer ov
5959
all C code in [mlkem/*](mlkem) and [mlkem/fips202/*](mlkem/fips202) involved in running mlkem-native with its C backend.
6060
See [proofs/cbmc](proofs/cbmc) for details.
6161

62-
HOL-Light functional correctness proofs for the optimized AArch64 NTT [ntt_opt.S](mlkem/native/aarch64/src/ntt_opt.S) and inverse NTT [intt_opt.S](mlkem/native/aarch64/src/intt_opt.S)
62+
HOL-Light functional correctness proofs for the optimized AArch64 NTT [ntt.S](dev/aarch64_opt/src/ntt.S) and inverse NTT [intt.S](dev/aarch64_opt/src/intt.S)
6363
can be found in [proofs/hol_light/arm](proofs/hol_light/arm). These proofs were contributed by John Harrison, and are
6464
utilizing the verification infrastructure provided by [s2n-bignum](https://github.com/awslabs/s2n-bignum) infrastructure.
6565

@@ -80,8 +80,8 @@ offers three backends for C, AArch64 and x86_64 - if you'd like contribute new b
8080
PR.
8181

8282
Our AArch64 assembly is developed using [SLOTHY](https://github.com/slothy-optimizer/slothy): We write
83-
'clean' assembly by hand and automate micro-optimizations (e.g. see the [clean](dev/aarch64_clean/src/ntt_clean.S)
84-
vs [optimized](mlkem/native/aarch64/src/ntt_opt.S) AArch64 NTT). See [dev/README.md](dev/README.md) for more details.
83+
'clean' assembly by hand and automate micro-optimizations (e.g. see the [clean](dev/aarch64_clean/src/ntt.S)
84+
vs [optimized](dev/aarch64_opt/src/ntt.S) AArch64 NTT). See [dev/README.md](dev/README.md) for more details.
8585

8686
## How should I use mlkem-native?
8787

dev/aarch64_clean/src/arith_native_aarch64.h

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -29,44 +29,49 @@ extern const int16_t mlk_aarch64_zetas_mulcache_native[];
2929
extern const int16_t mlk_aarch64_zetas_mulcache_twisted_native[];
3030
extern const uint8_t mlk_rej_uniform_table[];
3131

32-
#define mlk_ntt_asm_clean MLK_NAMESPACE(ntt_asm_clean)
33-
void mlk_ntt_asm_clean(int16_t *, const int16_t *, const int16_t *);
32+
#define mlk_ntt_asm MLK_NAMESPACE(ntt_asm)
33+
void mlk_ntt_asm(int16_t *, const int16_t *, const int16_t *);
3434

35-
#define mlk_intt_asm_clean MLK_NAMESPACE(intt_asm_clean)
36-
void mlk_intt_asm_clean(int16_t *, const int16_t *, const int16_t *);
35+
#define mlk_intt_asm MLK_NAMESPACE(intt_asm)
36+
void mlk_intt_asm(int16_t *, const int16_t *, const int16_t *);
3737

38-
#define mlk_rej_uniform_asm_clean MLK_NAMESPACE(rej_uniform_asm_clean)
39-
unsigned mlk_rej_uniform_asm_clean(int16_t *r, const uint8_t *buf,
40-
unsigned buflen, const uint8_t *table);
38+
#define mlk_rej_uniform_asm MLK_NAMESPACE(rej_uniform_asm)
39+
unsigned mlk_rej_uniform_asm(int16_t *r, const uint8_t *buf, unsigned buflen,
40+
const uint8_t *table);
4141

42-
#define mlk_poly_reduce_asm_clean MLK_NAMESPACE(poly_reduce_asm_clean)
43-
void mlk_poly_reduce_asm_clean(int16_t *);
42+
#define mlk_poly_reduce_asm MLK_NAMESPACE(poly_reduce_asm)
43+
void mlk_poly_reduce_asm(int16_t *);
4444

45-
#define mlk_poly_tomont_asm_clean MLK_NAMESPACE(poly_tomont_asm_clean)
46-
void mlk_poly_tomont_asm_clean(int16_t *);
45+
#define mlk_poly_tomont_asm MLK_NAMESPACE(poly_tomont_asm)
46+
void mlk_poly_tomont_asm(int16_t *);
4747

48-
#define mlk_poly_mulcache_compute_asm_clean \
49-
MLK_NAMESPACE(poly_mulcache_compute_asm_clean)
50-
void mlk_poly_mulcache_compute_asm_clean(int16_t *, const int16_t *,
51-
const int16_t *, const int16_t *);
48+
#define mlk_poly_mulcache_compute_asm MLK_NAMESPACE(poly_mulcache_compute_asm)
49+
void mlk_poly_mulcache_compute_asm(int16_t *, const int16_t *, const int16_t *,
50+
const int16_t *);
5251

5352

54-
#define mlk_poly_tobytes_asm_clean MLK_NAMESPACE(poly_tobytes_asm_clean)
55-
void mlk_poly_tobytes_asm_clean(uint8_t *r, const int16_t *a);
53+
#define mlk_poly_tobytes_asm MLK_NAMESPACE(poly_tobytes_asm)
54+
void mlk_poly_tobytes_asm(uint8_t *r, const int16_t *a);
5655

57-
#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k2_clean \
58-
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
59-
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k2_clean(
60-
int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache);
56+
#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k2 \
57+
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2)
58+
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k2(int16_t *r,
59+
const int16_t *a,
60+
const int16_t *b,
61+
const int16_t *b_cache);
6162

62-
#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k3_clean \
63-
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
64-
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k3_clean(
65-
int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache);
63+
#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k3 \
64+
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3)
65+
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k3(int16_t *r,
66+
const int16_t *a,
67+
const int16_t *b,
68+
const int16_t *b_cache);
6669

67-
#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k4_clean \
68-
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
69-
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k4_clean(
70-
int16_t *r, const int16_t *a, const int16_t *b, const int16_t *b_cache);
70+
#define mlk_polyvec_basemul_acc_montgomery_cached_asm_k4 \
71+
MLK_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4)
72+
void mlk_polyvec_basemul_acc_montgomery_cached_asm_k4(int16_t *r,
73+
const int16_t *a,
74+
const int16_t *b,
75+
const int16_t *b_cache);
7176

7277
#endif /* MLK_DEV_AARCH64_CLEAN_SRC_ARITH_NATIVE_AARCH64_H */

dev/aarch64_clean/src/clean_impl.h

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,59 +26,58 @@
2626

2727
static MLK_INLINE void mlk_ntt_native(int16_t data[MLKEM_N])
2828
{
29-
mlk_ntt_asm_clean(data, mlk_aarch64_ntt_zetas_layer12345,
30-
mlk_aarch64_ntt_zetas_layer67);
29+
mlk_ntt_asm(data, mlk_aarch64_ntt_zetas_layer12345,
30+
mlk_aarch64_ntt_zetas_layer67);
3131
}
3232

3333
static MLK_INLINE void mlk_intt_native(int16_t data[MLKEM_N])
3434
{
35-
mlk_intt_asm_clean(data, mlk_aarch64_invntt_zetas_layer12345,
36-
mlk_aarch64_invntt_zetas_layer67);
35+
mlk_intt_asm(data, mlk_aarch64_invntt_zetas_layer12345,
36+
mlk_aarch64_invntt_zetas_layer67);
3737
}
3838

3939
static MLK_INLINE void mlk_poly_reduce_native(int16_t data[MLKEM_N])
4040
{
41-
mlk_poly_reduce_asm_clean(data);
41+
mlk_poly_reduce_asm(data);
4242
}
4343

4444
static MLK_INLINE void mlk_poly_tomont_native(int16_t data[MLKEM_N])
4545
{
46-
mlk_poly_tomont_asm_clean(data);
46+
mlk_poly_tomont_asm(data);
4747
}
4848

4949
static MLK_INLINE void mlk_poly_mulcache_compute_native(
5050
int16_t x[MLKEM_N / 2], const int16_t y[MLKEM_N])
5151
{
52-
mlk_poly_mulcache_compute_asm_clean(
53-
x, y, mlk_aarch64_zetas_mulcache_native,
54-
mlk_aarch64_zetas_mulcache_twisted_native);
52+
mlk_poly_mulcache_compute_asm(x, y, mlk_aarch64_zetas_mulcache_native,
53+
mlk_aarch64_zetas_mulcache_twisted_native);
5554
}
5655

5756
static MLK_INLINE void mlk_polyvec_basemul_acc_montgomery_cached_k2_native(
5857
int16_t r[MLKEM_N], const int16_t a[2 * MLKEM_N],
5958
const int16_t b[2 * MLKEM_N], const int16_t b_cache[2 * (MLKEM_N / 2)])
6059
{
61-
mlk_polyvec_basemul_acc_montgomery_cached_asm_k2_clean(r, a, b, b_cache);
60+
mlk_polyvec_basemul_acc_montgomery_cached_asm_k2(r, a, b, b_cache);
6261
}
6362

6463
static MLK_INLINE void mlk_polyvec_basemul_acc_montgomery_cached_k3_native(
6564
int16_t r[MLKEM_N], const int16_t a[3 * MLKEM_N],
6665
const int16_t b[3 * MLKEM_N], const int16_t b_cache[3 * (MLKEM_N / 2)])
6766
{
68-
mlk_polyvec_basemul_acc_montgomery_cached_asm_k3_clean(r, a, b, b_cache);
67+
mlk_polyvec_basemul_acc_montgomery_cached_asm_k3(r, a, b, b_cache);
6968
}
7069

7170
static MLK_INLINE void mlk_polyvec_basemul_acc_montgomery_cached_k4_native(
7271
int16_t r[MLKEM_N], const int16_t a[4 * MLKEM_N],
7372
const int16_t b[4 * MLKEM_N], const int16_t b_cache[4 * (MLKEM_N / 2)])
7473
{
75-
mlk_polyvec_basemul_acc_montgomery_cached_asm_k4_clean(r, a, b, b_cache);
74+
mlk_polyvec_basemul_acc_montgomery_cached_asm_k4(r, a, b, b_cache);
7675
}
7776

7877
static MLK_INLINE void mlk_poly_tobytes_native(uint8_t r[MLKEM_POLYBYTES],
7978
const int16_t a[MLKEM_N])
8079
{
81-
mlk_poly_tobytes_asm_clean(r, a);
80+
mlk_poly_tobytes_asm(r, a);
8281
}
8382

8483
static MLK_INLINE int mlk_rej_uniform_native(int16_t *r, unsigned len,
@@ -89,7 +88,7 @@ static MLK_INLINE int mlk_rej_uniform_native(int16_t *r, unsigned len,
8988
{
9089
return -1;
9190
}
92-
return (int)mlk_rej_uniform_asm_clean(r, buf, buflen, mlk_rej_uniform_table);
91+
return (int)mlk_rej_uniform_asm(r, buf, buflen, mlk_rej_uniform_table);
9392
}
9493

9594
#endif /* MLK_ARITH_PROFILE_IMPL_H */

dev/aarch64_clean/src/intt_clean.S renamed to dev/aarch64_clean/src/intt.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,9 @@
194194
ninv_tw .req v30
195195

196196
.text
197-
.global MLK_ASM_NAMESPACE(intt_asm_clean)
197+
.global MLK_ASM_NAMESPACE(intt_asm)
198198
.balign 4
199-
MLK_ASM_FN_SYMBOL(intt_asm_clean)
199+
MLK_ASM_FN_SYMBOL(intt_asm)
200200
push_stack
201201

202202
// Setup constants

dev/aarch64_clean/src/ntt_clean.S renamed to dev/aarch64_clean/src/ntt.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,9 @@
166166
t3 .req v28
167167

168168
.text
169-
.global MLK_ASM_NAMESPACE(ntt_asm_clean)
169+
.global MLK_ASM_NAMESPACE(ntt_asm)
170170
.balign 4
171-
MLK_ASM_FN_SYMBOL(ntt_asm_clean)
171+
MLK_ASM_FN_SYMBOL(ntt_asm)
172172
push_stack
173173

174174
mov wtmp, #3329

dev/aarch64_clean/src/poly_mulcache_compute_asm_clean.S renamed to dev/aarch64_clean/src/poly_mulcache_compute_asm.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@
4343
modulus .req v6
4444
modulus_twisted .req v7
4545

46-
.global MLK_ASM_NAMESPACE(poly_mulcache_compute_asm_clean)
46+
.global MLK_ASM_NAMESPACE(poly_mulcache_compute_asm)
4747
.text
4848
.balign 4
49-
MLK_ASM_FN_SYMBOL(poly_mulcache_compute_asm_clean)
49+
MLK_ASM_FN_SYMBOL(poly_mulcache_compute_asm)
5050
mov wtmp, #3329
5151
dup modulus.8h, wtmp
5252

dev/aarch64_clean/src/poly_reduce_asm_clean.S renamed to dev/aarch64_clean/src/poly_reduce_asm.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@
4242
modulus_twisted .req v4
4343

4444
.text
45-
.global MLK_ASM_NAMESPACE(poly_reduce_asm_clean)
45+
.global MLK_ASM_NAMESPACE(poly_reduce_asm)
4646
.balign 4
47-
MLK_ASM_FN_SYMBOL(poly_reduce_asm_clean)
47+
MLK_ASM_FN_SYMBOL(poly_reduce_asm)
4848

4949
mov wtmp, #3329 // ML-KEM modulus
5050
dup modulus.8h, wtmp

dev/aarch64_clean/src/poly_tobytes_asm_clean.S renamed to dev/aarch64_clean/src/poly_tobytes_asm.S

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
count .req x2
2525

2626
.text
27-
.global MLK_ASM_NAMESPACE(poly_tobytes_asm_clean)
27+
.global MLK_ASM_NAMESPACE(poly_tobytes_asm)
2828
.balign 4
29-
MLK_ASM_FN_SYMBOL(poly_tobytes_asm_clean)
29+
MLK_ASM_FN_SYMBOL(poly_tobytes_asm)
3030

3131
mov count, #16
32-
poly_tobytes_asm_clean_asm_loop_start:
32+
poly_tobytes_asm_asm_loop_start:
3333
ld2 {data0.8h, data1.8h}, [src], #32
3434

3535
// r[3 * i + 0] = (t0 >> 0);
@@ -47,7 +47,7 @@ poly_tobytes_asm_clean_asm_loop_start:
4747
st3 {out0.8b, out1.8b, out2.8b}, [dst], #24
4848

4949
subs count, count, #1
50-
cbnz count, poly_tobytes_asm_clean_asm_loop_start
50+
cbnz count, poly_tobytes_asm_asm_loop_start
5151
ret
5252

5353
.unreq data0

dev/aarch64_clean/src/poly_tomont_asm_clean.S renamed to dev/aarch64_clean/src/poly_tomont_asm.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@
3737
tmp0 .req v6
3838

3939
.text
40-
.global MLK_ASM_NAMESPACE(poly_tomont_asm_clean)
40+
.global MLK_ASM_NAMESPACE(poly_tomont_asm)
4141
.balign 4
42-
MLK_ASM_FN_SYMBOL(poly_tomont_asm_clean)
42+
MLK_ASM_FN_SYMBOL(poly_tomont_asm)
4343

4444
mov wtmp, #3329 // ML-KEM modulus
4545
dup modulus.8h, wtmp

dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k2_clean.S renamed to dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k2.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,9 @@
141141
t0 .req v28
142142

143143
.text
144-
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
144+
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k2)
145145
.balign 4
146-
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k2_clean)
146+
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k2)
147147
push_stack
148148

149149
mov wtmp, #3329

dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k3_clean.S renamed to dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k3.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,9 @@
141141
t0 .req v28
142142

143143
.text
144-
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
144+
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k3)
145145
.balign 4
146-
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k3_clean)
146+
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k3)
147147
push_stack
148148
mov wtmp, #3329
149149
dup modulus.8h, wtmp

dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k4_clean.S renamed to dev/aarch64_clean/src/polyvec_basemul_acc_montgomery_cached_asm_k4.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,9 @@
141141
t0 .req v28
142142

143143
.text
144-
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
144+
.global MLK_ASM_NAMESPACE(polyvec_basemul_acc_montgomery_cached_asm_k4)
145145
.balign 4
146-
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k4_clean)
146+
MLK_ASM_FN_SYMBOL(polyvec_basemul_acc_montgomery_cached_asm_k4)
147147
push_stack
148148
mov wtmp, #3329
149149
dup modulus.8h, wtmp

dev/aarch64_clean/src/rej_uniform_asm_clean.S renamed to dev/aarch64_clean/src/rej_uniform_asm.S

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
*/
55

66
/*************************************************
7-
* Name: mlk_rej_uniform_asm_clean
7+
* Name: mlk_rej_uniform_asm
88
*
99
* Description: Run rejection sampling on uniform random bytes to generate
1010
* uniform random integers mod q
@@ -114,9 +114,9 @@
114114
bits .req v31
115115

116116
.text
117-
.global MLK_ASM_NAMESPACE(rej_uniform_asm_clean)
117+
.global MLK_ASM_NAMESPACE(rej_uniform_asm)
118118
.balign 4
119-
MLK_ASM_FN_SYMBOL(rej_uniform_asm_clean)
119+
MLK_ASM_FN_SYMBOL(rej_uniform_asm)
120120
push_stack
121121

122122
// Load 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80

0 commit comments

Comments
 (0)