From 0e935e01e42bf2349adee26cafcc1536efad4e82 Mon Sep 17 00:00:00 2001 From: Yan Peng Date: Fri, 14 Feb 2025 00:33:22 +0000 Subject: [PATCH 1/8] aes-xts experiments --- arm/Makefile | 5 +- arm/aes-xts/aes-xts-armv8.S | 654 ++++++++++++++++++++++++++++++++++++ 2 files changed, 658 insertions(+), 1 deletion(-) create mode 100644 arm/aes-xts/aes-xts-armv8.S diff --git a/arm/Makefile b/arm/Makefile index e1d37985d..18bf512db 100644 --- a/arm/Makefile +++ b/arm/Makefile @@ -385,7 +385,10 @@ UNOPT_OBJ = p256/unopt/bignum_montmul_p256_base.o \ fastmul/unopt/bignum_mul_8_16_base.o \ fastmul/unopt/bignum_sqr_8_16_base.o -OBJ = $(POINT_OBJ) $(BIGNUM_OBJ) +AES_XTS_OBJ = aes-xts/aes-xts-armv8.o + +# OBJ = $(POINT_OBJ) $(BIGNUM_OBJ) $(AES_XTS_OBJ) +OBJ = $(AES_XTS_OBJ) # Tutorial assembly files diff --git a/arm/aes-xts/aes-xts-armv8.S b/arm/aes-xts/aes-xts-armv8.S new file mode 100644 index 000000000..04ffb0bbe --- /dev/null +++ b/arm/aes-xts/aes-xts-armv8.S @@ -0,0 +1,654 @@ +#include "_internal_s2n_bignum.h" + +aes_hw_xts_encrypt: + // AARCH64_VALID_CALL_TARGET + cmp x2,#16 + // Original input data size bigger than 16, jump to big size processing. + b.ne .Lxts_enc_big_size + // Encrypt the iv with key2, as the first XEX iv. + ldr w6,[x4,#240] + ld1 {v0.16b},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.16b},[x4],#16 + +.Loop_enc_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt .Loop_enc_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + ld1 {v0.16b},[x0] + eor v0.16b,v6.16b,v0.16b + + ldr w6,[x3,#240] + ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... + + aese v0.16b,v28.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... + aese v0.16b,v29.16b + aesmc v0.16b,v0.16b + subs w6,w6,#10 // if rounds==10, jump to aes-128-xts processing + b.eq .Lxts_128_enc +.Lxts_enc_round_loop: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x3],#16 // load key schedule... + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3],#16 // load key schedule... + subs w6,w6,#2 // bias + b.gt .Lxts_enc_round_loop +.Lxts_128_enc: + ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v7.4s},[x3] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v0.16b,v0.16b,v7.16b + eor v0.16b,v0.16b,v6.16b + st1 {v0.16b},[x1] + b .Lxts_enc_final_abort + +.align 4 +.Lxts_enc_big_size: + // Encrypt input size > 16 bytes + stp x19,x20,[sp,#-64]! + stp x21,x22,[sp,#48] + stp d8,d9,[sp,#32] + stp d10,d11,[sp,#16] + + // tailcnt store the tail value of length%16. + and x21,x2,#0xf + and x2,x2,#-16 // len &= 0x1..110000, now divisible by 16 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lxts_abort // if !(len > 16): error + csel x8,xzr,x8,eq // if (len == 16): step = 0 + + // Firstly, encrypt the iv with key2, as the first iv of XEX. + ldr w6,[x4,#240] + ld1 {v0.4s},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.4s},[x4],#16 + +.Loop_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt .Loop_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + // The iv for second block + // x9- iv(low), x10 - iv(high) + // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b + fmov x9,d6 + fmov x10,v6.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d8,x9 + fmov v8.d[1],x10 + + ldr w5,[x3,#240] // next starting point + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + + // Encryption +.Lxts_enc: + ld1 {v24.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v28.16b,v0.16b,v0.16b + orr v27.16b,v24.16b,v24.16b + orr v29.16b,v24.16b,v24.16b + b.lo .Lxts_inner_enc_tail // when input size % 5 = 1 or 2 + // (with tail or not) + eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv + eor v24.16b,v24.16b,v8.16b + + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d9,x9 + fmov v9.d[1],x10 + + + orr v1.16b,v24.16b,v24.16b + ld1 {v24.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + eor v27.16b,v24.16b,v9.16b // the third block + eor v24.16b,v24.16b,v9.16b + cmp x2,#32 + b.lo .Lxts_outer_enc_tail + + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d10,x9 + fmov v10.d[1],x10 + + ld1 {v25.16b},[x0],#16 + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v26.16b},[x0],#16 + eor v25.16b,v25.16b,v10.16b // the fourth block + eor v26.16b,v26.16b,v11.16b + sub x2,x2,#32 // bias + mov w6,w5 + b .Loop5x_xts_enc + +.align 4 +.Loop5x_xts_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + aese v26.16b,v16.16b + aesmc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + aese v26.16b,v17.16b + aesmc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop5x_xts_enc + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + aese v26.16b,v16.16b + aesmc v26.16b,v26.16b + subs x2,x2,#0x50 // because .Lxts_enc_tail4x + + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + aese v26.16b,v17.16b + aesmc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v1.16b,v18.16b + aesmc v1.16b,v1.16b + aese v24.16b,v18.16b + aesmc v24.16b,v24.16b + aese v25.16b,v18.16b + aesmc v25.16b,v25.16b + aese v26.16b,v18.16b + aesmc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because .Lxts_enc_tail4x + + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + aese v1.16b,v19.16b + aesmc v1.16b,v1.16b + aese v24.16b,v19.16b + aesmc v24.16b,v24.16b + aese v25.16b,v19.16b + aesmc v25.16b,v25.16b + aese v26.16b,v19.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + aese v25.16b,v20.16b + aesmc v25.16b,v25.16b + aese v26.16b,v20.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v25.16b,v21.16b + aesmc v25.16b,v25.16b + aese v26.16b,v21.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + aese v25.16b,v22.16b + aesmc v25.16b,v25.16b + aese v26.16b,v22.16b + aesmc v26.16b,v26.16b + + eor v4.16b,v7.16b,v6.16b + aese v0.16b,v23.16b + // The iv for first block of one iteration + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v7.16b,v8.16b + ld1 {v2.16b},[x0],#16 + aese v1.16b,v23.16b + // The iv for second block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d8,x9 + fmov v8.d[1],x10 + eor v17.16b,v7.16b,v9.16b + ld1 {v3.16b},[x0],#16 + aese v24.16b,v23.16b + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d9,x9 + fmov v9.d[1],x10 + eor v30.16b,v7.16b,v10.16b + ld1 {v27.16b},[x0],#16 + aese v25.16b,v23.16b + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d10,x9 + fmov v10.d[1],x10 + eor v31.16b,v7.16b,v11.16b + ld1 {v28.16b},[x0],#16 + aese v26.16b,v23.16b + + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v29.16b},[x0],#16 + cbz x6,.Lxts_enc_tail4x + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + eor v0.16b,v2.16b,v6.16b + eor v5.16b,v5.16b,v1.16b + eor v1.16b,v3.16b,v8.16b + eor v17.16b,v17.16b,v24.16b + eor v24.16b,v27.16b,v9.16b + eor v30.16b,v30.16b,v25.16b + eor v25.16b,v28.16b,v10.16b + eor v31.16b,v31.16b,v26.16b + st1 {v4.16b},[x1],#16 + eor v26.16b,v29.16b,v11.16b + st1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v17.16b},[x1],#16 + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + b.hs .Loop5x_xts_enc + + + // If left 4 blocks, borrow the five block's processing. + // This means if (x2 + 1 block) == 0, which is the case + // when input size % 5 = 4, continue processing and do + // another iteration in Loop5x_xts_enc which will exit from + // cbz x6,.Lxts_enc_tail4x. + // Otherwise, this is the end of the loop continue processing + // 0, 1, 2 or 3 blocks (with or without tail) starting at + // Loop5x_enc_after + cmn x2,#0x10 + b.ne .Loop5x_enc_after + orr v11.16b,v10.16b,v10.16b + orr v10.16b,v9.16b,v9.16b + orr v9.16b,v8.16b,v8.16b + orr v8.16b,v6.16b,v6.16b + fmov x9,d11 + fmov x10,v11.d[1] + eor v0.16b,v6.16b,v2.16b + eor v1.16b,v8.16b,v3.16b + eor v24.16b,v27.16b,v9.16b + eor v25.16b,v28.16b,v10.16b + eor v26.16b,v29.16b,v11.16b + b.eq .Loop5x_xts_enc + +.Loop5x_enc_after: + add x2,x2,#0x50 + cbz x2,.Lxts_enc_done // no blocks left + + add w6,w5,#2 + subs x2,x2,#0x30 + b.lo .Lxts_inner_enc_tail // 1 or 2 blocks left + // (with tail or not) + + eor v0.16b,v6.16b,v27.16b // 3 blocks left + eor v1.16b,v8.16b,v28.16b + eor v24.16b,v29.16b,v9.16b + b .Lxts_outer_enc_tail + +.align 4 +.Lxts_enc_tail4x: + add x0,x0,#16 + eor v5.16b,v1.16b,v5.16b + st1 {v5.16b},[x1],#16 + eor v17.16b,v24.16b,v17.16b + st1 {v17.16b},[x1],#16 + eor v30.16b,v25.16b,v30.16b + eor v31.16b,v26.16b,v31.16b + st1 {v30.16b,v31.16b},[x1],#32 + b .Lxts_enc_done +.align 4 +.Lxts_outer_enc_tail: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lxts_outer_enc_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + // The iv for first block + fmov x9,d9 + fmov x10,v9.d[1] + //mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v8.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + eor v17.16b,v9.16b,v7.16b + + add x6,x6,#0x20 + add x0,x0,x6 + mov x7,x3 + + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + ld1 {v27.16b},[x0],#16 + add w6,w5,#2 + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v24.16b,v24.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + st1 {v5.16b},[x1],#16 + st1 {v24.16b},[x1],#16 + cmn x2,#0x30 + b.eq .Lxts_enc_done +.Lxts_encxor_one: + orr v28.16b,v3.16b,v3.16b + orr v29.16b,v27.16b,v27.16b + nop + +.Lxts_inner_enc_tail: + cmn x2,#0x10 + eor v1.16b,v28.16b,v6.16b + eor v24.16b,v29.16b,v8.16b + b.eq .Lxts_enc_tail_loop + eor v24.16b,v29.16b,v6.16b +.Lxts_enc_tail_loop: + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lxts_enc_tail_loop + + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + cmn x2,#0x20 + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + eor v5.16b,v6.16b,v7.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + eor v17.16b,v8.16b,v7.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + b.eq .Lxts_enc_one + eor v5.16b,v5.16b,v1.16b + st1 {v5.16b},[x1],#16 + eor v17.16b,v17.16b,v24.16b + orr v6.16b,v8.16b,v8.16b + st1 {v17.16b},[x1],#16 + fmov x9,d8 + fmov x10,v8.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + b .Lxts_enc_done + +.Lxts_enc_one: + eor v5.16b,v5.16b,v24.16b + orr v6.16b,v6.16b,v6.16b + st1 {v5.16b},[x1],#16 + fmov x9,d6 + fmov x10,v6.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + b .Lxts_enc_done +.align 5 +.Lxts_enc_done: + // Process the tail block with cipher stealing. + tst x21,#0xf + b.eq .Lxts_abort + + mov x20,x0 + mov x13,x1 + sub x1,x1,#16 +.composite_enc_loop: + subs x21,x21,#1 + ldrb w15,[x1,x21] + ldrb w14,[x20,x21] + strb w15,[x13,x21] + strb w14,[x1,x21] + b.gt .composite_enc_loop +.Lxts_enc_load_done: + ld1 {v26.16b},[x1] + eor v26.16b,v26.16b,v6.16b + + // Encrypt the composite block to get the last second encrypted text block + ldr w6,[x3,#240] // load key schedule... + ld1 {v0.16b},[x3],#16 + sub w6,w6,#2 + ld1 {v1.16b},[x3],#16 // load key schedule... +.Loop_final_enc: + aese v26.16b,v0.16b + aesmc v26.16b,v26.16b + ld1 {v0.4s},[x3],#16 + subs w6,w6,#2 + aese v26.16b,v1.16b + aesmc v26.16b,v26.16b + ld1 {v1.4s},[x3],#16 + b.gt .Loop_final_enc + + aese v26.16b,v0.16b + aesmc v26.16b,v26.16b + ld1 {v0.4s},[x3] + aese v26.16b,v1.16b + eor v26.16b,v26.16b,v0.16b + eor v26.16b,v26.16b,v6.16b + st1 {v26.16b},[x1] + +.Lxts_abort: + ldp x21,x22,[sp,#48] + ldp d8,d9,[sp,#32] + ldp d10,d11,[sp,#16] + ldp x19,x20,[sp],#64 +.Lxts_enc_final_abort: + ret +.size aes_hw_xts_encrypt,.-aes_hw_xts_encrypt + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif \ No newline at end of file From 0327788de473041f50cd5c11770fa1d8a72863e8 Mon Sep 17 00:00:00 2001 From: Yan Peng Date: Fri, 14 Feb 2025 01:00:34 +0000 Subject: [PATCH 2/8] More experiments --- arm/Makefile | 4 ++-- arm/aes-xts/aes-xts-armv8.S | 2 +- arm/proofs/aes-xts.ml | 8 ++++++++ 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 arm/proofs/aes-xts.ml diff --git a/arm/Makefile b/arm/Makefile index 18bf512db..16edebe65 100644 --- a/arm/Makefile +++ b/arm/Makefile @@ -401,9 +401,9 @@ TUTORIAL_OBJ = $(TUTORIAL_PROOFS:.ml=.o) tutorial/rel_loop2.o tutorial/rel_simp2 # x18 should not be used for Apple platforms. Check this using grep. %.o : %.S - cat $< | $(PREPROCESS) | $(SPLIT) | grep -v -E '^\s+.quad\s+0x[0-9a-f]+$$' | $(ASSEMBLE) -o $@ - + cat $< | $(PREPROCESS) | $(SPLIT) | grep -v -E '^\s+.quad\s+0x[0-9a-f]+$$' | $(ASSEMBLE) -march=armv8-a+crypto -o $@ - $(OBJDUMP) $@ | ( ( ! grep --ignore-case -E 'w18|[^0]x18' ) || ( rm $@ ; exit 1 ) ) - cat $< | $(PREPROCESS) | $(SPLIT) | $(ASSEMBLE) -o $@ - + cat $< | $(PREPROCESS) | $(SPLIT) | $(ASSEMBLE) -march=armv8-a+crypto -o $@ - libs2nbignum.a: $(OBJ) ; ar -rc libs2nbignum.a $(OBJ) diff --git a/arm/aes-xts/aes-xts-armv8.S b/arm/aes-xts/aes-xts-armv8.S index 04ffb0bbe..fb0fd46b0 100644 --- a/arm/aes-xts/aes-xts-armv8.S +++ b/arm/aes-xts/aes-xts-armv8.S @@ -651,4 +651,4 @@ aes_hw_xts_encrypt: #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits -#endif \ No newline at end of file +#endif diff --git a/arm/proofs/aes-xts.ml b/arm/proofs/aes-xts.ml new file mode 100644 index 000000000..8a245be7a --- /dev/null +++ b/arm/proofs/aes-xts.ml @@ -0,0 +1,8 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +needs "arm/proofs/base.ml";; + +print_literal_from_elf "arm/aes-xts/aes-xts-armv8.o";; From 675355129dcdb4140d7fa7fb0e4045f0758b8cad Mon Sep 17 00:00:00 2001 From: Nevine Ebeid Date: Tue, 18 Mar 2025 10:28:32 -0400 Subject: [PATCH 3/8] Add Decrypt function- its instructions are all understood by the decoder. "Remove `.size` as not passing on MacOS with this build. --- arm/aes-xts/aes-xts-armv8.S | 684 +++++++++++++++++++++++++++++++++++- 1 file changed, 683 insertions(+), 1 deletion(-) diff --git a/arm/aes-xts/aes-xts-armv8.S b/arm/aes-xts/aes-xts-armv8.S index fb0fd46b0..ca34d8484 100644 --- a/arm/aes-xts/aes-xts-armv8.S +++ b/arm/aes-xts/aes-xts-armv8.S @@ -647,7 +647,689 @@ aes_hw_xts_encrypt: ldp x19,x20,[sp],#64 .Lxts_enc_final_abort: ret -.size aes_hw_xts_encrypt,.-aes_hw_xts_encrypt + +aes_hw_xts_decrypt: + cmp x2,#16 + // Original input data size bigger than 16, jump to big size processing. + b.ne Lxts_dec_big_size + // Encrypt the iv with key2, as the first XEX iv. + ldr w6,[x4,#240] + ld1 {v0.16b},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.16b},[x4],#16 + +Loop_dec_small_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt Loop_dec_small_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + ld1 {v0.16b},[x0] + eor v0.16b,v6.16b,v0.16b + + ldr w6,[x3,#240] + ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... + + aesd v0.16b,v28.16b + aesimc v0.16b,v0.16b + ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... + aesd v0.16b,v29.16b + aesimc v0.16b,v0.16b + subs w6,w6,#10 // bias + b.eq Lxts_128_dec +Lxts_dec_round_loop: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + ld1 {v16.4s},[x3],#16 // load key schedule... + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + ld1 {v17.4s},[x3],#16 // load key schedule... + subs w6,w6,#2 // bias + b.gt Lxts_dec_round_loop +Lxts_128_dec: + ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + ld1 {v7.4s},[x3] + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v23.16b + eor v0.16b,v0.16b,v7.16b + eor v0.16b,v6.16b,v0.16b + st1 {v0.16b},[x1] + b Lxts_dec_final_abort +Lxts_dec_big_size: + stp x19,x20,[sp,#-64]! + stp x21,x22,[sp,#48] + stp d8,d9,[sp,#32] + stp d10,d11,[sp,#16] + + and x21,x2,#0xf + and x2,x2,#-16 + subs x2,x2,#16 + mov x8,#16 + b.lo Lxts_dec_abort + + // Encrypt the iv with key2, as the first XEX iv + ldr w6,[x4,#240] + ld1 {v0.16b},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.16b},[x4],#16 + +Loop_dec_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt Loop_dec_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + // The iv for second block + // x9- iv(low), x10 - iv(high) + // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b + fmov x9,d6 + fmov x10,v6.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d8,x9 + fmov v8.d[1],x10 + + ldr w5,[x3,#240] // load rounds number + + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d9,x9 + fmov v9.d[1],x10 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 // load key schedule... + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d10,x9 + fmov v10.d[1],x10 + + add x7,x3,#32 + mov w6,w5 + b Lxts_dec + + // Decryption +.align 5 +Lxts_dec: + tst x21,#0xf + b.eq Lxts_dec_begin + subs x2,x2,#16 + csel x8,xzr,x8,eq + ld1 {v0.16b},[x0],#16 + b.lo Lxts_done + sub x0,x0,#16 +Lxts_dec_begin: + ld1 {v0.16b},[x0],x8 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v28.16b,v0.16b,v0.16b + ld1 {v24.16b},[x0],#16 + orr v27.16b,v24.16b,v24.16b + orr v29.16b,v24.16b,v24.16b + b.lo Lxts_inner_dec_tail + eor v0.16b,v0.16b,v6.16b // before decryt, xor with iv + eor v24.16b,v24.16b,v8.16b + + orr v1.16b,v24.16b,v24.16b + ld1 {v24.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + eor v27.16b,v24.16b,v9.16b // third block xox with third iv + eor v24.16b,v24.16b,v9.16b + cmp x2,#32 + b.lo Lxts_outer_dec_tail + + ld1 {v25.16b},[x0],#16 + + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v26.16b},[x0],#16 + eor v25.16b,v25.16b,v10.16b // the fourth block + eor v26.16b,v26.16b,v11.16b + sub x2,x2,#32 // bias + mov w6,w5 + b Loop5x_xts_dec + +.align 4 +Loop5x_xts_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 // load key schedule... + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 // load key schedule... + b.gt Loop5x_xts_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + subs x2,x2,#0x50 // because Lxts_dec_tail4x + + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v18.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v18.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v18.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v18.16b + aesimc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because Lxts_dec_tail4x + + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v19.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v19.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v19.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v19.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v20.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v20.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v21.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v21.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v22.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v22.16b + aesimc v26.16b,v26.16b + + eor v4.16b,v7.16b,v6.16b + aesd v0.16b,v23.16b + // The iv for first block of next iteration. + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v7.16b,v8.16b + ld1 {v2.16b},[x0],#16 + aesd v1.16b,v23.16b + // The iv for second block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d8,x9 + fmov v8.d[1],x10 + eor v17.16b,v7.16b,v9.16b + ld1 {v3.16b},[x0],#16 + aesd v24.16b,v23.16b + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d9,x9 + fmov v9.d[1],x10 + eor v30.16b,v7.16b,v10.16b + ld1 {v27.16b},[x0],#16 + aesd v25.16b,v23.16b + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d10,x9 + fmov v10.d[1],x10 + eor v31.16b,v7.16b,v11.16b + ld1 {v28.16b},[x0],#16 + aesd v26.16b,v23.16b + + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v29.16b},[x0],#16 + cbz x6,Lxts_dec_tail4x + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + eor v0.16b,v2.16b,v6.16b + eor v5.16b,v5.16b,v1.16b + eor v1.16b,v3.16b,v8.16b + eor v17.16b,v17.16b,v24.16b + eor v24.16b,v27.16b,v9.16b + eor v30.16b,v30.16b,v25.16b + eor v25.16b,v28.16b,v10.16b + eor v31.16b,v31.16b,v26.16b + st1 {v4.16b},[x1],#16 + eor v26.16b,v29.16b,v11.16b + st1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v17.16b},[x1],#16 + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + b.hs Loop5x_xts_dec + + cmn x2,#0x10 + b.ne Loop5x_dec_after + // If x2(x2) equal to -0x10, the left blocks is 4. + // After specially processing, utilize the five blocks processing again. + // It will use the following IVs: v6.16b,v6.16b,v8.16b,v9.16b,v10.16b. + orr v11.16b,v10.16b,v10.16b + orr v10.16b,v9.16b,v9.16b + orr v9.16b,v8.16b,v8.16b + orr v8.16b,v6.16b,v6.16b + fmov x9,d11 + fmov x10,v11.d[1] + eor v0.16b,v6.16b,v2.16b + eor v1.16b,v8.16b,v3.16b + eor v24.16b,v27.16b,v9.16b + eor v25.16b,v28.16b,v10.16b + eor v26.16b,v29.16b,v11.16b + b.eq Loop5x_xts_dec + +Loop5x_dec_after: + add x2,x2,#0x50 + cbz x2,Lxts_done + + add w6,w5,#2 + subs x2,x2,#0x30 + b.lo Lxts_inner_dec_tail + + eor v0.16b,v6.16b,v27.16b + eor v1.16b,v8.16b,v28.16b + eor v24.16b,v29.16b,v9.16b + b Lxts_outer_dec_tail + +.align 4 +Lxts_dec_tail4x: + add x0,x0,#16 + tst x21,#0xf + eor v5.16b,v1.16b,v4.16b + st1 {v5.16b},[x1],#16 + eor v17.16b,v24.16b,v17.16b + st1 {v17.16b},[x1],#16 + eor v30.16b,v25.16b,v30.16b + eor v31.16b,v26.16b,v31.16b + st1 {v30.16b,v31.16b},[x1],#32 + + b.eq Lxts_dec_abort + ld1 {v0.4s},[x0],#16 + b Lxts_done +.align 4 +Lxts_outer_dec_tail: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt Lxts_outer_dec_tail + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + // The iv for first block + fmov x9,d9 + fmov x10,v9.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v8.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + eor v17.16b,v9.16b,v7.16b + // The iv for second block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d8,x9 + fmov v8.d[1],x10 + + add x6,x6,#0x20 + add x0,x0,x6 // x0 is adjusted to the last data + + mov x7,x3 + + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d9,x9 + fmov v9.d[1],x10 + + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + ld1 {v27.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v24.16b,v24.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + st1 {v5.16b},[x1],#16 + st1 {v24.16b},[x1],#16 + + cmn x2,#0x30 + add x2,x2,#0x30 + b.eq Lxts_done + sub x2,x2,#0x30 + orr v28.16b,v3.16b,v3.16b + orr v29.16b,v27.16b,v27.16b + nop + +Lxts_inner_dec_tail: + // x2 == -0x10 means two blocks left. + cmn x2,#0x10 + eor v1.16b,v28.16b,v6.16b + eor v24.16b,v29.16b,v8.16b + b.eq Lxts_dec_tail_loop + eor v24.16b,v29.16b,v6.16b +Lxts_dec_tail_loop: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt Lxts_dec_tail_loop + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + eor v17.16b,v8.16b,v7.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + b.eq Lxts_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v24.16b + orr v6.16b,v9.16b,v9.16b + orr v8.16b,v10.16b,v10.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + add x2,x2,#16 + b Lxts_done + +Lxts_dec_one: + eor v5.16b,v5.16b,v24.16b + orr v6.16b,v8.16b,v8.16b + orr v8.16b,v9.16b,v9.16b + st1 {v5.16b},[x1],#16 + add x2,x2,#32 + +Lxts_done: + tst x21,#0xf + b.eq Lxts_dec_abort + // Processing the last two blocks with cipher stealing. + mov x7,x3 + cbnz x2,Lxts_dec_1st_done + ld1 {v0.4s},[x0],#16 + + // Decrypt the last secod block to get the last plain text block +Lxts_dec_1st_done: + eor v26.16b,v0.16b,v8.16b + ldr w6,[x3,#240] + ld1 {v0.4s},[x3],#16 + sub w6,w6,#2 + ld1 {v1.4s},[x3],#16 +Loop_final_2nd_dec: + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x3],#16 // load key schedule... + subs w6,w6,#2 + aesd v26.16b,v1.16b + aesimc v26.16b,v26.16b + ld1 {v1.4s},[x3],#16 // load key schedule... + b.gt Loop_final_2nd_dec + + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x3] + aesd v26.16b,v1.16b + eor v26.16b,v26.16b,v0.16b + eor v26.16b,v26.16b,v8.16b + st1 {v26.16b},[x1] + + mov x20,x0 + add x13,x1,#16 + + // Composite the tailcnt "16 byte not aligned block" into the last second plain blocks + // to get the last encrypted block. +.composite_dec_loop: + subs x21,x21,#1 + ldrb w15,[x1,x21] + ldrb w14,[x20,x21] + strb w15,[x13,x21] + strb w14,[x1,x21] + b.gt .composite_dec_loop +Lxts_dec_load_done: + ld1 {v26.16b},[x1] + eor v26.16b,v26.16b,v6.16b + + // Decrypt the composite block to get the last second plain text block + ldr w6,[x7,#240] + ld1 {v0.16b},[x7],#16 + sub w6,w6,#2 + ld1 {v1.16b},[x7],#16 +Loop_final_dec: + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x7],#16 // load key schedule... + subs w6,w6,#2 + aesd v26.16b,v1.16b + aesimc v26.16b,v26.16b + ld1 {v1.4s},[x7],#16 // load key schedule... + b.gt Loop_final_dec + + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x7] + aesd v26.16b,v1.16b + eor v26.16b,v26.16b,v0.16b + eor v26.16b,v26.16b,v6.16b + st1 {v26.16b},[x1] + +Lxts_dec_abort: + ldp x21,x22,[sp,#48] + ldp d8,d9,[sp,#32] + ldp d10,d11,[sp,#16] + ldp x19,x20,[sp],#64 + +Lxts_dec_final_abort: + ret #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits From d0b8e21b66eedd7204f6c3d3717f80831d1f84f9 Mon Sep 17 00:00:00 2001 From: Nevine Ebeid Date: Tue, 18 Mar 2025 10:41:21 -0400 Subject: [PATCH 4/8] The decrypt version is now from linux-aarch64. It's from AWS-LC's generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S It was previously taken from win-aarch64. --- arm/aes-xts/aes-xts-armv8.S | 117 ++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/arm/aes-xts/aes-xts-armv8.S b/arm/aes-xts/aes-xts-armv8.S index ca34d8484..8c6587053 100644 --- a/arm/aes-xts/aes-xts-armv8.S +++ b/arm/aes-xts/aes-xts-armv8.S @@ -648,10 +648,13 @@ aes_hw_xts_encrypt: .Lxts_enc_final_abort: ret +# Decrypt is taken from +# https://github.com/aws/aws-lc/blob/804a11b6f965365156b0a8b6d958233e1372a2e2/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S#L1475 + aes_hw_xts_decrypt: - cmp x2,#16 + cmp x2,#16 // Original input data size bigger than 16, jump to big size processing. - b.ne Lxts_dec_big_size + b.ne .Lxts_dec_big_size // Encrypt the iv with key2, as the first XEX iv. ldr w6,[x4,#240] ld1 {v0.16b},[x4],#16 @@ -659,7 +662,7 @@ aes_hw_xts_decrypt: sub w6,w6,#2 ld1 {v1.16b},[x4],#16 -Loop_dec_small_iv_enc: +.Loop_dec_small_iv_enc: aese v6.16b,v0.16b aesmc v6.16b,v6.16b ld1 {v0.4s},[x4],#16 @@ -667,7 +670,7 @@ Loop_dec_small_iv_enc: aese v6.16b,v1.16b aesmc v6.16b,v6.16b ld1 {v1.4s},[x4],#16 - b.gt Loop_dec_small_iv_enc + b.gt .Loop_dec_small_iv_enc aese v6.16b,v0.16b aesmc v6.16b,v6.16b @@ -687,8 +690,8 @@ Loop_dec_small_iv_enc: aesd v0.16b,v29.16b aesimc v0.16b,v0.16b subs w6,w6,#10 // bias - b.eq Lxts_128_dec -Lxts_dec_round_loop: + b.eq .Lxts_128_dec +.Lxts_dec_round_loop: aesd v0.16b,v16.16b aesimc v0.16b,v0.16b ld1 {v16.4s},[x3],#16 // load key schedule... @@ -696,8 +699,8 @@ Lxts_dec_round_loop: aesimc v0.16b,v0.16b ld1 {v17.4s},[x3],#16 // load key schedule... subs w6,w6,#2 // bias - b.gt Lxts_dec_round_loop -Lxts_128_dec: + b.gt .Lxts_dec_round_loop +.Lxts_128_dec: ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... aesd v0.16b,v16.16b aesimc v0.16b,v0.16b @@ -720,8 +723,8 @@ Lxts_128_dec: eor v0.16b,v0.16b,v7.16b eor v0.16b,v6.16b,v0.16b st1 {v0.16b},[x1] - b Lxts_dec_final_abort -Lxts_dec_big_size: + b .Lxts_dec_final_abort +.Lxts_dec_big_size: stp x19,x20,[sp,#-64]! stp x21,x22,[sp,#48] stp d8,d9,[sp,#32] @@ -731,7 +734,7 @@ Lxts_dec_big_size: and x2,x2,#-16 subs x2,x2,#16 mov x8,#16 - b.lo Lxts_dec_abort + b.lo .Lxts_dec_abort // Encrypt the iv with key2, as the first XEX iv ldr w6,[x4,#240] @@ -740,7 +743,7 @@ Lxts_dec_big_size: sub w6,w6,#2 ld1 {v1.16b},[x4],#16 -Loop_dec_iv_enc: +.Loop_dec_iv_enc: aese v6.16b,v0.16b aesmc v6.16b,v6.16b ld1 {v0.4s},[x4],#16 @@ -748,7 +751,7 @@ Loop_dec_iv_enc: aese v6.16b,v1.16b aesmc v6.16b,v6.16b ld1 {v1.4s},[x4],#16 - b.gt Loop_dec_iv_enc + b.gt .Loop_dec_iv_enc aese v6.16b,v0.16b aesmc v6.16b,v6.16b @@ -798,19 +801,19 @@ Loop_dec_iv_enc: add x7,x3,#32 mov w6,w5 - b Lxts_dec + b .Lxts_dec // Decryption .align 5 -Lxts_dec: +.Lxts_dec: tst x21,#0xf - b.eq Lxts_dec_begin + b.eq .Lxts_dec_begin subs x2,x2,#16 csel x8,xzr,x8,eq ld1 {v0.16b},[x0],#16 - b.lo Lxts_done + b.lo .Lxts_done sub x0,x0,#16 -Lxts_dec_begin: +.Lxts_dec_begin: ld1 {v0.16b},[x0],x8 subs x2,x2,#32 // bias add w6,w5,#2 @@ -820,7 +823,7 @@ Lxts_dec_begin: ld1 {v24.16b},[x0],#16 orr v27.16b,v24.16b,v24.16b orr v29.16b,v24.16b,v24.16b - b.lo Lxts_inner_dec_tail + b.lo .Lxts_inner_dec_tail eor v0.16b,v0.16b,v6.16b // before decryt, xor with iv eor v24.16b,v24.16b,v8.16b @@ -831,7 +834,7 @@ Lxts_dec_begin: eor v27.16b,v24.16b,v9.16b // third block xox with third iv eor v24.16b,v24.16b,v9.16b cmp x2,#32 - b.lo Lxts_outer_dec_tail + b.lo .Lxts_outer_dec_tail ld1 {v25.16b},[x0],#16 @@ -848,10 +851,10 @@ Lxts_dec_begin: eor v26.16b,v26.16b,v11.16b sub x2,x2,#32 // bias mov w6,w5 - b Loop5x_xts_dec + b .Loop5x_xts_dec .align 4 -Loop5x_xts_dec: +.Loop5x_xts_dec: aesd v0.16b,v16.16b aesimc v0.16b,v0.16b aesd v1.16b,v16.16b @@ -875,7 +878,7 @@ Loop5x_xts_dec: aesd v26.16b,v17.16b aesimc v26.16b,v26.16b ld1 {v17.4s},[x7],#16 // load key schedule... - b.gt Loop5x_xts_dec + b.gt .Loop5x_xts_dec aesd v0.16b,v16.16b aesimc v0.16b,v0.16b @@ -887,7 +890,7 @@ Loop5x_xts_dec: aesimc v25.16b,v25.16b aesd v26.16b,v16.16b aesimc v26.16b,v26.16b - subs x2,x2,#0x50 // because Lxts_dec_tail4x + subs x2,x2,#0x50 // because .Lxts_dec_tail4x aesd v0.16b,v17.16b aesimc v0.16b,v0.16b @@ -915,7 +918,7 @@ Loop5x_xts_dec: add x0,x0,x6 // x0 is adjusted in such way that // at exit from the loop v1.16b-v26.16b // are loaded with last "words" - add x6,x2,#0x60 // because Lxts_dec_tail4x + add x6,x2,#0x60 // because .Lxts_dec_tail4x aesd v0.16b,v19.16b aesimc v0.16b,v0.16b @@ -1013,7 +1016,7 @@ Loop5x_xts_dec: fmov v11.d[1],x10 ld1 {v29.16b},[x0],#16 - cbz x6,Lxts_dec_tail4x + cbz x6,.Lxts_dec_tail4x ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] eor v4.16b,v4.16b,v0.16b eor v0.16b,v2.16b,v6.16b @@ -1032,10 +1035,10 @@ Loop5x_xts_dec: ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] st1 {v30.16b},[x1],#16 st1 {v31.16b},[x1],#16 - b.hs Loop5x_xts_dec + b.hs .Loop5x_xts_dec cmn x2,#0x10 - b.ne Loop5x_dec_after + b.ne .Loop5x_dec_after // If x2(x2) equal to -0x10, the left blocks is 4. // After specially processing, utilize the five blocks processing again. // It will use the following IVs: v6.16b,v6.16b,v8.16b,v9.16b,v10.16b. @@ -1050,23 +1053,23 @@ Loop5x_xts_dec: eor v24.16b,v27.16b,v9.16b eor v25.16b,v28.16b,v10.16b eor v26.16b,v29.16b,v11.16b - b.eq Loop5x_xts_dec + b.eq .Loop5x_xts_dec -Loop5x_dec_after: +.Loop5x_dec_after: add x2,x2,#0x50 - cbz x2,Lxts_done + cbz x2,.Lxts_done add w6,w5,#2 subs x2,x2,#0x30 - b.lo Lxts_inner_dec_tail + b.lo .Lxts_inner_dec_tail eor v0.16b,v6.16b,v27.16b eor v1.16b,v8.16b,v28.16b eor v24.16b,v29.16b,v9.16b - b Lxts_outer_dec_tail + b .Lxts_outer_dec_tail .align 4 -Lxts_dec_tail4x: +.Lxts_dec_tail4x: add x0,x0,#16 tst x21,#0xf eor v5.16b,v1.16b,v4.16b @@ -1077,11 +1080,11 @@ Lxts_dec_tail4x: eor v31.16b,v26.16b,v31.16b st1 {v30.16b,v31.16b},[x1],#32 - b.eq Lxts_dec_abort + b.eq .Lxts_dec_abort ld1 {v0.4s},[x0],#16 - b Lxts_done + b .Lxts_done .align 4 -Lxts_outer_dec_tail: +.Lxts_outer_dec_tail: aesd v0.16b,v16.16b aesimc v0.16b,v0.16b aesd v1.16b,v16.16b @@ -1097,7 +1100,7 @@ Lxts_outer_dec_tail: aesd v24.16b,v17.16b aesimc v24.16b,v24.16b ld1 {v17.4s},[x7],#16 - b.gt Lxts_outer_dec_tail + b.gt .Lxts_outer_dec_tail aesd v0.16b,v16.16b aesimc v0.16b,v0.16b @@ -1181,20 +1184,20 @@ Lxts_outer_dec_tail: cmn x2,#0x30 add x2,x2,#0x30 - b.eq Lxts_done + b.eq .Lxts_done sub x2,x2,#0x30 orr v28.16b,v3.16b,v3.16b orr v29.16b,v27.16b,v27.16b nop -Lxts_inner_dec_tail: +.Lxts_inner_dec_tail: // x2 == -0x10 means two blocks left. cmn x2,#0x10 eor v1.16b,v28.16b,v6.16b eor v24.16b,v29.16b,v8.16b - b.eq Lxts_dec_tail_loop + b.eq .Lxts_dec_tail_loop eor v24.16b,v29.16b,v6.16b -Lxts_dec_tail_loop: +.Lxts_dec_tail_loop: aesd v1.16b,v16.16b aesimc v1.16b,v1.16b aesd v24.16b,v16.16b @@ -1206,7 +1209,7 @@ Lxts_dec_tail_loop: aesd v24.16b,v17.16b aesimc v24.16b,v24.16b ld1 {v17.4s},[x7],#16 - b.gt Lxts_dec_tail_loop + b.gt .Lxts_dec_tail_loop aesd v1.16b,v16.16b aesimc v1.16b,v1.16b @@ -1233,7 +1236,7 @@ Lxts_dec_tail_loop: eor v17.16b,v8.16b,v7.16b aesd v1.16b,v23.16b aesd v24.16b,v23.16b - b.eq Lxts_dec_one + b.eq .Lxts_dec_one eor v5.16b,v5.16b,v1.16b eor v17.16b,v17.16b,v24.16b orr v6.16b,v9.16b,v9.16b @@ -1241,31 +1244,31 @@ Lxts_dec_tail_loop: st1 {v5.16b},[x1],#16 st1 {v17.16b},[x1],#16 add x2,x2,#16 - b Lxts_done + b .Lxts_done -Lxts_dec_one: +.Lxts_dec_one: eor v5.16b,v5.16b,v24.16b orr v6.16b,v8.16b,v8.16b orr v8.16b,v9.16b,v9.16b st1 {v5.16b},[x1],#16 add x2,x2,#32 -Lxts_done: +.Lxts_done: tst x21,#0xf - b.eq Lxts_dec_abort + b.eq .Lxts_dec_abort // Processing the last two blocks with cipher stealing. mov x7,x3 - cbnz x2,Lxts_dec_1st_done + cbnz x2,.Lxts_dec_1st_done ld1 {v0.4s},[x0],#16 // Decrypt the last secod block to get the last plain text block -Lxts_dec_1st_done: +.Lxts_dec_1st_done: eor v26.16b,v0.16b,v8.16b ldr w6,[x3,#240] ld1 {v0.4s},[x3],#16 sub w6,w6,#2 ld1 {v1.4s},[x3],#16 -Loop_final_2nd_dec: +.Loop_final_2nd_dec: aesd v26.16b,v0.16b aesimc v26.16b,v26.16b ld1 {v0.4s},[x3],#16 // load key schedule... @@ -1273,7 +1276,7 @@ Loop_final_2nd_dec: aesd v26.16b,v1.16b aesimc v26.16b,v26.16b ld1 {v1.4s},[x3],#16 // load key schedule... - b.gt Loop_final_2nd_dec + b.gt .Loop_final_2nd_dec aesd v26.16b,v0.16b aesimc v26.16b,v26.16b @@ -1295,7 +1298,7 @@ Loop_final_2nd_dec: strb w15,[x13,x21] strb w14,[x1,x21] b.gt .composite_dec_loop -Lxts_dec_load_done: +.Lxts_dec_load_done: ld1 {v26.16b},[x1] eor v26.16b,v26.16b,v6.16b @@ -1304,7 +1307,7 @@ Lxts_dec_load_done: ld1 {v0.16b},[x7],#16 sub w6,w6,#2 ld1 {v1.16b},[x7],#16 -Loop_final_dec: +.Loop_final_dec: aesd v26.16b,v0.16b aesimc v26.16b,v26.16b ld1 {v0.4s},[x7],#16 // load key schedule... @@ -1312,7 +1315,7 @@ Loop_final_dec: aesd v26.16b,v1.16b aesimc v26.16b,v26.16b ld1 {v1.4s},[x7],#16 // load key schedule... - b.gt Loop_final_dec + b.gt .Loop_final_dec aesd v26.16b,v0.16b aesimc v26.16b,v26.16b @@ -1322,13 +1325,13 @@ Loop_final_dec: eor v26.16b,v26.16b,v6.16b st1 {v26.16b},[x1] -Lxts_dec_abort: +.Lxts_dec_abort: ldp x21,x22,[sp,#48] ldp d8,d9,[sp,#32] ldp d10,d11,[sp,#16] ldp x19,x20,[sp],#64 -Lxts_dec_final_abort: +.Lxts_dec_final_abort: ret #if defined(__linux__) && defined(__ELF__) From 71894bc432aaa8646de597eb65f0f96b08fe59c7 Mon Sep 17 00:00:00 2001 From: Nevine Ebeid Date: Thu, 20 Mar 2025 17:21:50 -0400 Subject: [PATCH 5/8] enable stopping at first error. --- arm/proofs/{aes-xts.ml => aes-xts-armv8.ml} | 2 ++ 1 file changed, 2 insertions(+) rename arm/proofs/{aes-xts.ml => aes-xts-armv8.ml} (86%) diff --git a/arm/proofs/aes-xts.ml b/arm/proofs/aes-xts-armv8.ml similarity index 86% rename from arm/proofs/aes-xts.ml rename to arm/proofs/aes-xts-armv8.ml index 8a245be7a..4b4df2eb9 100644 --- a/arm/proofs/aes-xts.ml +++ b/arm/proofs/aes-xts-armv8.ml @@ -3,6 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 *) +use_file_raise_failure := true;; + needs "arm/proofs/base.ml";; print_literal_from_elf "arm/aes-xts/aes-xts-armv8.o";; From 4d77fdb1222037bb61c19f13f5265762d1f4c9b1 Mon Sep 17 00:00:00 2001 From: Nevine Ebeid Date: Fri, 21 Mar 2025 11:41:20 -0400 Subject: [PATCH 6/8] Save decoded output to a text file. --- arm/aes-xts/aes-xts-armv8.txt | 1124 +++++++++++++++++++++++++++++++++ arm/proofs/aes-xts-armv8.ml | 5 +- 2 files changed, 1128 insertions(+), 1 deletion(-) create mode 100644 arm/aes-xts/aes-xts-armv8.txt diff --git a/arm/aes-xts/aes-xts-armv8.txt b/arm/aes-xts/aes-xts-armv8.txt new file mode 100644 index 000000000..16e1b3ad7 --- /dev/null +++ b/arm/aes-xts/aes-xts-armv8.txt @@ -0,0 +1,1124 @@ +[ + 0xf100405f; (* arm_CMP X2 (rvalue (word 16)) *) + 0x540007e1; (* arm_BNE (word 252) *) + 0xb940f086; (* arm_LDR W6 X4 (Immediate_Offset (word 240)) *) + 0x4cdf7080; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x4c4070a6; (* arm_LDR Q6 X5 No_Offset *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7081; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4c407880; (* arm_LDR Q0 X4 No_Offset *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x6e201cc6; (* arm_EOR_VEC Q6 Q6 Q0 128 *) + 0x4c407000; (* arm_LDR Q0 X0 No_Offset *) + 0x6e201cc0; (* arm_EOR_VEC Q0 Q6 Q0 128 *) + 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) + 0x4cdfa87c; (* arm_ldstp_2q true (word 28) X3 (Postimmediate_Offset (word 32)) *) + 0x4e284b80; (* arm_AESE Q0 Q28 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdfa870; (* arm_ldstp_2q true (word 16) X3 (Postimmediate_Offset (word 32)) *) + 0x4e284ba0; (* arm_AESE Q0 Q29 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x710028c6; (* arm_SUBS W6 W6 (rvalue (word 10)) *) + 0x54000120; (* arm_BEQ (word 36) *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdf7870; (* arm_LDR Q16 X3 (Postimmediate_Offset (word 16)) *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdf7871; (* arm_LDR Q17 X3 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4cdfa872; (* arm_ldstp_2q true (word 18) X3 (Postimmediate_Offset (word 32)) *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdfa874; (* arm_ldstp_2q true (word 20) X3 (Postimmediate_Offset (word 32)) *) + 0x4e284a40; (* arm_AESE Q0 Q18 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a60; (* arm_AESE Q0 Q19 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4cdfa876; (* arm_ldstp_2q true (word 22) X3 (Postimmediate_Offset (word 32)) *) + 0x4e284a80; (* arm_AESE Q0 Q20 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284aa0; (* arm_AESE Q0 Q21 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4c407867; (* arm_LDR Q7 X3 No_Offset *) + 0x4e284ac0; (* arm_AESE Q0 Q22 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284ae0; (* arm_AESE Q0 Q23 *) + 0x6e271c00; (* arm_EOR_VEC Q0 Q0 Q7 128 *) + 0x6e261c00; (* arm_EOR_VEC Q0 Q0 Q6 128 *) + 0x4c007020; (* arm_STR Q0 X1 No_Offset *) + 0x140001e7; (* arm_B (word 1948) *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xa9bc53f3; (* arm_STP X19 X20 SP (Preimmediate_Offset (iword (-- &64))) *) + 0xa9035bf5; (* arm_STP X21 X22 SP (Immediate_Offset (iword (&48))) *) + 0x6d0227e8; (* arm_STP D8 D9 SP (Immediate_Offset (iword (&32))) *) + 0x6d012fea; (* arm_STP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x92400c55; (* arm_AND X21 X2 (rvalue (word 15)) *) + 0x927cec42; (* arm_AND X2 X2 (rvalue (word 18446744073709551600)) *) + 0xf1004042; (* arm_SUBS X2 X2 (rvalue (word 16)) *) + 0xd2800208; (* arm_MOV X8 (rvalue (word 16)) *) + 0x54003b03; (* arm_BCC (word 1888) *) + 0x9a8803e8; (* arm_CSEL X8 XZR X8 Condition_EQ *) + 0xb940f086; (* arm_LDR W6 X4 (Immediate_Offset (word 240)) *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x4c4070a6; (* arm_LDR Q6 X5 No_Offset *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4c407880; (* arm_LDR Q0 X4 No_Offset *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x6e201cc6; (* arm_EOR_VEC Q6 Q6 Q0 128 *) + 0x9e6600c9; (* arm_FMOV_FtoI X9 Q6 0 *) + 0x9eae00ca; (* arm_FMOV_FtoI X10 Q6 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0xb940f065; (* arm_LDR W5 X3 (Immediate_Offset (word 240)) *) + 0x4cc87000; (* arm_LDR Q0 X0 (Register_Offset X8) *) + 0x4c40a870; (* arm_ldstp_2q true (word 16) X3 No_Offset *) + 0x510018a5; (* arm_SUB W5 W5 (rvalue (word 6)) *) + 0x8b051067; (* arm_ADD X7 X3 (Shiftedreg X5 LSL 4) *) + 0x510008a5; (* arm_SUB W5 W5 (rvalue (word 2)) *) + 0x4cdfa8f2; (* arm_ldstp_2q true (word 18) X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f4; (* arm_ldstp_2q true (word 20) X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f6; (* arm_ldstp_2q true (word 22) X7 (Postimmediate_Offset (word 32)) *) + 0x4c4078e7; (* arm_LDR Q7 X7 No_Offset *) + 0x91008067; (* arm_ADD X7 X3 (rvalue (word 32)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x4cdf7018; (* arm_LDR Q24 X0 (Postimmediate_Offset (word 16)) *) + 0xf1008042; (* arm_SUBS X2 X2 (rvalue (word 32)) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0x4ea01c03; (* arm_MOV_VEC Q3 Q0 128 *) + 0x4ea01c01; (* arm_MOV_VEC Q1 Q0 128 *) + 0x4ea01c1c; (* arm_MOV_VEC Q28 Q0 128 *) + 0x4eb81f1b; (* arm_MOV_VEC Q27 Q24 128 *) + 0x4eb81f1d; (* arm_MOV_VEC Q29 Q24 128 *) + 0x540027e3; (* arm_BCC (word 1276) *) + 0x6e261c00; (* arm_EOR_VEC Q0 Q0 Q6 128 *) + 0x6e281f18; (* arm_EOR_VEC Q24 Q24 Q8 128 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x4eb81f01; (* arm_MOV_VEC Q1 Q24 128 *) + 0x4cdf7018; (* arm_LDR Q24 X0 (Postimmediate_Offset (word 16)) *) + 0x4ea01c02; (* arm_MOV_VEC Q2 Q0 128 *) + 0x4ea11c23; (* arm_MOV_VEC Q3 Q1 128 *) + 0x6e291f1b; (* arm_EOR_VEC Q27 Q24 Q9 128 *) + 0x6e291f18; (* arm_EOR_VEC Q24 Q24 Q9 128 *) + 0xf100805f; (* arm_CMP X2 (rvalue (word 32)) *) + 0x54001be3; (* arm_BCC (word 892) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012a; (* arm_FMOV_ItoF Q10 X9 0 *) + 0x9eaf014a; (* arm_FMOV_ItoF Q10 X10 1 *) + 0x4cdf7019; (* arm_LDR Q25 X0 (Postimmediate_Offset (word 16)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012b; (* arm_FMOV_ItoF Q11 X9 0 *) + 0x9eaf014b; (* arm_FMOV_ItoF Q11 X10 1 *) + 0x4cdf701a; (* arm_LDR Q26 X0 (Postimmediate_Offset (word 16)) *) + 0x6e2a1f39; (* arm_EOR_VEC Q25 Q25 Q10 128 *) + 0x6e2b1f5a; (* arm_EOR_VEC Q26 Q26 Q11 128 *) + 0xd1008042; (* arm_SUB X2 X2 (rvalue (word 32)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x14000004; (* arm_B (word 16) *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a19; (* arm_AESE Q25 Q16 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a1a; (* arm_AESE Q26 Q16 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a39; (* arm_AESE Q25 Q17 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a3a; (* arm_AESE Q26 Q17 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffd2c; (* arm_BGT (word 2097060) *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a19; (* arm_AESE Q25 Q16 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a1a; (* arm_AESE Q26 Q16 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0xf1014042; (* arm_SUBS X2 X2 (rvalue (word 80)) *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a39; (* arm_AESE Q25 Q17 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a3a; (* arm_AESE Q26 Q17 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x9a82c3e6; (* arm_CSEL X6 XZR X2 Condition_GT *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0x4e284a40; (* arm_AESE Q0 Q18 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a41; (* arm_AESE Q1 Q18 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a58; (* arm_AESE Q24 Q18 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a59; (* arm_AESE Q25 Q18 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a5a; (* arm_AESE Q26 Q18 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x8b060000; (* arm_ADD X0 X0 X6 *) + 0x91018046; (* arm_ADD X6 X2 (rvalue (word 96)) *) + 0x4e284a60; (* arm_AESE Q0 Q19 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a61; (* arm_AESE Q1 Q19 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a78; (* arm_AESE Q24 Q19 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a79; (* arm_AESE Q25 Q19 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a7a; (* arm_AESE Q26 Q19 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4e284a80; (* arm_AESE Q0 Q20 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a81; (* arm_AESE Q1 Q20 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a98; (* arm_AESE Q24 Q20 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a99; (* arm_AESE Q25 Q20 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284a9a; (* arm_AESE Q26 Q20 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4e284aa0; (* arm_AESE Q0 Q21 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284aa1; (* arm_AESE Q1 Q21 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ab8; (* arm_AESE Q24 Q21 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284ab9; (* arm_AESE Q25 Q21 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284aba; (* arm_AESE Q26 Q21 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4e284ac0; (* arm_AESE Q0 Q22 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284ac1; (* arm_AESE Q1 Q22 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ad8; (* arm_AESE Q24 Q22 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284ad9; (* arm_AESE Q25 Q22 *) + 0x4e286b39; (* arm_AESMC Q25 Q25 *) + 0x4e284ada; (* arm_AESE Q26 Q22 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x6e261ce4; (* arm_EOR_VEC Q4 Q7 Q6 128 *) + 0x4e284ae0; (* arm_AESE Q0 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x6e281ce5; (* arm_EOR_VEC Q5 Q7 Q8 128 *) + 0x4cdf7002; (* arm_LDR Q2 X0 (Postimmediate_Offset (word 16)) *) + 0x4e284ae1; (* arm_AESE Q1 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0x6e291cf1; (* arm_EOR_VEC Q17 Q7 Q9 128 *) + 0x4cdf7003; (* arm_LDR Q3 X0 (Postimmediate_Offset (word 16)) *) + 0x4e284af8; (* arm_AESE Q24 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x6e2a1cfe; (* arm_EOR_VEC Q30 Q7 Q10 128 *) + 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) + 0x4e284af9; (* arm_AESE Q25 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012a; (* arm_FMOV_ItoF Q10 X9 0 *) + 0x9eaf014a; (* arm_FMOV_ItoF Q10 X10 1 *) + 0x6e2b1cff; (* arm_EOR_VEC Q31 Q7 Q11 128 *) + 0x4cdf701c; (* arm_LDR Q28 X0 (Postimmediate_Offset (word 16)) *) + 0x4e284afa; (* arm_AESE Q26 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012b; (* arm_FMOV_ItoF Q11 X9 0 *) + 0x9eaf014b; (* arm_FMOV_ItoF Q11 X10 1 *) + 0x4cdf701d; (* arm_LDR Q29 X0 (Postimmediate_Offset (word 16)) *) + 0xb4000586; (* arm_CBZ X6 (word 176) *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) + 0x6e261c40; (* arm_EOR_VEC Q0 Q2 Q6 128 *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e281c61; (* arm_EOR_VEC Q1 Q3 Q8 128 *) + 0x6e381e31; (* arm_EOR_VEC Q17 Q17 Q24 128 *) + 0x6e291f78; (* arm_EOR_VEC Q24 Q27 Q9 128 *) + 0x6e391fde; (* arm_EOR_VEC Q30 Q30 Q25 128 *) + 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) + 0x6e3a1fff; (* arm_EOR_VEC Q31 Q31 Q26 128 *) + 0x4c9f7024; (* arm_STR Q4 X1 (Postimmediate_Offset (word 16)) *) + 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x4c9f703e; (* arm_STR Q30 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f703f; (* arm_STR Q31 X1 (Postimmediate_Offset (word 16)) *) + 0x54ffeba2; (* arm_BCS (word 2096500) *) + 0xb100405f; (* arm_CMN X2 (rvalue (word 16)) *) + 0x540001a1; (* arm_BNE (word 52) *) + 0x4eaa1d4b; (* arm_MOV_VEC Q11 Q10 128 *) + 0x4ea91d2a; (* arm_MOV_VEC Q10 Q9 128 *) + 0x4ea81d09; (* arm_MOV_VEC Q9 Q8 128 *) + 0x4ea61cc8; (* arm_MOV_VEC Q8 Q6 128 *) + 0x9e660169; (* arm_FMOV_FtoI X9 Q11 0 *) + 0x9eae016a; (* arm_FMOV_FtoI X10 Q11 1 *) + 0x6e221cc0; (* arm_EOR_VEC Q0 Q6 Q2 128 *) + 0x6e231d01; (* arm_EOR_VEC Q1 Q8 Q3 128 *) + 0x6e291f78; (* arm_EOR_VEC Q24 Q27 Q9 128 *) + 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) + 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) + 0x54ffe9e0; (* arm_BEQ (word 2096444) *) + 0x91014042; (* arm_ADD X2 X2 (rvalue (word 80)) *) + 0xb40015a2; (* arm_CBZ X2 (word 692) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) + 0x54000c43; (* arm_BCC (word 392) *) + 0x6e3b1cc0; (* arm_EOR_VEC Q0 Q6 Q27 128 *) + 0x6e3c1d01; (* arm_EOR_VEC Q1 Q8 Q28 128 *) + 0x6e291fb8; (* arm_EOR_VEC Q24 Q29 Q9 128 *) + 0x1400000e; (* arm_B (word 56) *) + 0xd503201f; (* arm_NOP *) + 0x91004000; (* arm_ADD X0 X0 (rvalue (word 16)) *) + 0x6e251c25; (* arm_EOR_VEC Q5 Q1 Q5 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x6e311f11; (* arm_EOR_VEC Q17 Q24 Q17 128 *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x6e3e1f3e; (* arm_EOR_VEC Q30 Q25 Q30 128 *) + 0x6e3f1f5f; (* arm_EOR_VEC Q31 Q26 Q31 128 *) + 0x4c9fa03e; (* arm_ldstp_2q false (word 30) X1 (Postimmediate_Offset (word 32)) *) + 0x1400009c; (* arm_B (word 624) *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffe2c; (* arm_BGT (word 2097092) *) + 0x4e284a00; (* arm_AESE Q0 Q16 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x6e271cc4; (* arm_EOR_VEC Q4 Q6 Q7 128 *) + 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) + 0x9e660129; (* arm_FMOV_FtoI X9 Q9 0 *) + 0x9eae012a; (* arm_FMOV_FtoI X10 Q9 1 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x6e271d05; (* arm_EOR_VEC Q5 Q8 Q7 128 *) + 0x9a863046; (* arm_CSEL X6 X2 X6 Condition_CC *) + 0x4e284a20; (* arm_AESE Q0 Q17 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x6e271d31; (* arm_EOR_VEC Q17 Q9 Q7 128 *) + 0x910080c6; (* arm_ADD X6 X6 (rvalue (word 32)) *) + 0x8b060000; (* arm_ADD X0 X0 X6 *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0x4e284a80; (* arm_AESE Q0 Q20 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284a81; (* arm_AESE Q1 Q20 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a98; (* arm_AESE Q24 Q20 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284aa0; (* arm_AESE Q0 Q21 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284aa1; (* arm_AESE Q1 Q21 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ab8; (* arm_AESE Q24 Q21 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284ac0; (* arm_AESE Q0 Q22 *) + 0x4e286800; (* arm_AESMC Q0 Q0 *) + 0x4e284ac1; (* arm_AESE Q1 Q22 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ad8; (* arm_AESE Q24 Q22 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284ae0; (* arm_AESE Q0 Q23 *) + 0x4e284ae1; (* arm_AESE Q1 Q23 *) + 0x4e284af8; (* arm_AESE Q24 Q23 *) + 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e311f18; (* arm_EOR_VEC Q24 Q24 Q17 128 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x4c9f7024; (* arm_STR Q4 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f7038; (* arm_STR Q24 X1 (Postimmediate_Offset (word 16)) *) + 0xb100c05f; (* arm_CMN X2 (rvalue (word 48)) *) + 0x54000980; (* arm_BEQ (word 304) *) + 0x4ea31c7c; (* arm_MOV_VEC Q28 Q3 128 *) + 0x4ebb1f7d; (* arm_MOV_VEC Q29 Q27 128 *) + 0xd503201f; (* arm_NOP *) + 0xb100405f; (* arm_CMN X2 (rvalue (word 16)) *) + 0x6e261f81; (* arm_EOR_VEC Q1 Q28 Q6 128 *) + 0x6e281fb8; (* arm_EOR_VEC Q24 Q29 Q8 128 *) + 0x54000040; (* arm_BEQ (word 8) *) + 0x6e261fb8; (* arm_EOR_VEC Q24 Q29 Q6 128 *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffeac; (* arm_BGT (word 2097108) *) + 0x4e284a01; (* arm_AESE Q1 Q16 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a18; (* arm_AESE Q24 Q16 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a21; (* arm_AESE Q1 Q17 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a38; (* arm_AESE Q24 Q17 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x4e284a81; (* arm_AESE Q1 Q20 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284a98; (* arm_AESE Q24 Q20 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0xb100805f; (* arm_CMN X2 (rvalue (word 32)) *) + 0x4e284aa1; (* arm_AESE Q1 Q21 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ab8; (* arm_AESE Q24 Q21 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x6e271cc5; (* arm_EOR_VEC Q5 Q6 Q7 128 *) + 0x4e284ac1; (* arm_AESE Q1 Q22 *) + 0x4e286821; (* arm_AESMC Q1 Q1 *) + 0x4e284ad8; (* arm_AESE Q24 Q22 *) + 0x4e286b18; (* arm_AESMC Q24 Q24 *) + 0x6e271d11; (* arm_EOR_VEC Q17 Q8 Q7 128 *) + 0x4e284ae1; (* arm_AESE Q1 Q23 *) + 0x4e284af8; (* arm_AESE Q24 Q23 *) + 0x54000200; (* arm_BEQ (word 64) *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x6e381e31; (* arm_EOR_VEC Q17 Q17 Q24 128 *) + 0x4ea81d06; (* arm_MOV_VEC Q6 Q8 128 *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x9e660109; (* arm_FMOV_FtoI X9 Q8 0 *) + 0x9eae010a; (* arm_FMOV_FtoI X10 Q8 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x1400000f; (* arm_B (word 60) *) + 0x6e381ca5; (* arm_EOR_VEC Q5 Q5 Q24 128 *) + 0x4ea61cc6; (* arm_MOV_VEC Q6 Q6 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x9e6600c9; (* arm_FMOV_FtoI X9 Q6 0 *) + 0x9eae00ca; (* arm_FMOV_FtoI X10 Q6 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x14000002; (* arm_B (word 8) *) + 0xd503201f; (* arm_NOP *) + 0xf2400ebf; (* arm_TST X21 (rvalue (word 15)) *) + 0x540003e0; (* arm_BEQ (word 124) *) + 0xaa0003f4; (* arm_MOV X20 X0 *) + 0xaa0103ed; (* arm_MOV X13 X1 *) + 0xd1004021; (* arm_SUB X1 X1 (rvalue (word 16)) *) + 0xf10006b5; (* arm_SUBS X21 X21 (rvalue (word 1)) *) + 0x3875682f; (* arm_LDRB W15 X1 (Register_Offset X21) *) + 0x38756a8e; (* arm_LDRB W14 X20 (Register_Offset X21) *) + 0x383569af; (* arm_STRB W15 X13 (Register_Offset X21) *) + 0x3835682e; (* arm_STRB W14 X1 (Register_Offset X21) *) + 0x54ffff6c; (* arm_BGT (word 2097132) *) + 0x4c40703a; (* arm_LDR Q26 X1 No_Offset *) + 0x6e261f5a; (* arm_EOR_VEC Q26 Q26 Q6 128 *) + 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) + 0x4cdf7060; (* arm_LDR Q0 X3 (Postimmediate_Offset (word 16)) *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7061; (* arm_LDR Q1 X3 (Postimmediate_Offset (word 16)) *) + 0x4e28481a; (* arm_AESE Q26 Q0 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4cdf7860; (* arm_LDR Q0 X3 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e28483a; (* arm_AESE Q26 Q1 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4cdf7861; (* arm_LDR Q1 X3 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e28481a; (* arm_AESE Q26 Q0 *) + 0x4e286b5a; (* arm_AESMC Q26 Q26 *) + 0x4c407860; (* arm_LDR Q0 X3 No_Offset *) + 0x4e28483a; (* arm_AESE Q26 Q1 *) + 0x6e201f5a; (* arm_EOR_VEC Q26 Q26 Q0 128 *) + 0x6e261f5a; (* arm_EOR_VEC Q26 Q26 Q6 128 *) + 0x4c00703a; (* arm_STR Q26 X1 No_Offset *) + 0xa9435bf5; (* arm_LDP X21 X22 SP (Immediate_Offset (iword (&48))) *) + 0x6d4227e8; (* arm_LDP D8 D9 SP (Immediate_Offset (iword (&32))) *) + 0x6d412fea; (* arm_LDP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0xa8c453f3; (* arm_LDP X19 X20 SP (Postimmediate_Offset (iword (&64))) *) + 0xd65f03c0; (* arm_RET X30 *) + 0xf100405f; (* arm_CMP X2 (rvalue (word 16)) *) + 0x540007a1; (* arm_BNE (word 244) *) + 0xb940f086; (* arm_LDR W6 X4 (Immediate_Offset (word 240)) *) + 0x4cdf7080; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x4c4070a6; (* arm_LDR Q6 X5 No_Offset *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7081; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4c407880; (* arm_LDR Q0 X4 No_Offset *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x6e201cc6; (* arm_EOR_VEC Q6 Q6 Q0 128 *) + 0x4c407000; (* arm_LDR Q0 X0 No_Offset *) + 0x6e201cc0; (* arm_EOR_VEC Q0 Q6 Q0 128 *) + 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) + 0x4cdfa87c; (* arm_ldstp_2q true (word 28) X3 (Postimmediate_Offset (word 32)) *) + 0x4e285b80; (* arm_AESD Q0 Q28 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdfa870; (* arm_ldstp_2q true (word 16) X3 (Postimmediate_Offset (word 32)) *) + 0x4e285ba0; (* arm_AESD Q0 Q29 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x710028c6; (* arm_SUBS W6 W6 (rvalue (word 10)) *) + 0x54000120; (* arm_BEQ (word 36) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdf7870; (* arm_LDR Q16 X3 (Postimmediate_Offset (word 16)) *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdf7871; (* arm_LDR Q17 X3 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4cdfa872; (* arm_ldstp_2q true (word 18) X3 (Postimmediate_Offset (word 32)) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdfa874; (* arm_ldstp_2q true (word 20) X3 (Postimmediate_Offset (word 32)) *) + 0x4e285a40; (* arm_AESD Q0 Q18 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a60; (* arm_AESD Q0 Q19 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4cdfa876; (* arm_ldstp_2q true (word 22) X3 (Postimmediate_Offset (word 32)) *) + 0x4e285a80; (* arm_AESD Q0 Q20 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285aa0; (* arm_AESD Q0 Q21 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4c407867; (* arm_LDR Q7 X3 No_Offset *) + 0x4e285ac0; (* arm_AESD Q0 Q22 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285ae0; (* arm_AESD Q0 Q23 *) + 0x6e271c00; (* arm_EOR_VEC Q0 Q0 Q7 128 *) + 0x6e201cc0; (* arm_EOR_VEC Q0 Q6 Q0 128 *) + 0x4c007020; (* arm_STR Q0 X1 No_Offset *) + 0x140001ff; (* arm_B (word 2044) *) + 0xa9bc53f3; (* arm_STP X19 X20 SP (Preimmediate_Offset (iword (-- &64))) *) + 0xa9035bf5; (* arm_STP X21 X22 SP (Immediate_Offset (iword (&48))) *) + 0x6d0227e8; (* arm_STP D8 D9 SP (Immediate_Offset (iword (&32))) *) + 0x6d012fea; (* arm_STP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x92400c55; (* arm_AND X21 X2 (rvalue (word 15)) *) + 0x927cec42; (* arm_AND X2 X2 (rvalue (word 18446744073709551600)) *) + 0xf1004042; (* arm_SUBS X2 X2 (rvalue (word 16)) *) + 0xd2800208; (* arm_MOV X8 (rvalue (word 16)) *) + 0x54003e43; (* arm_BCC (word 1992) *) + 0xb940f086; (* arm_LDR W6 X4 (Immediate_Offset (word 240)) *) + 0x4cdf7080; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x4c4070a6; (* arm_LDR Q6 X5 No_Offset *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7081; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7880; (* arm_LDR Q0 X4 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4cdf7881; (* arm_LDR Q1 X4 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e284806; (* arm_AESE Q6 Q0 *) + 0x4e2868c6; (* arm_AESMC Q6 Q6 *) + 0x4c407880; (* arm_LDR Q0 X4 No_Offset *) + 0x4e284826; (* arm_AESE Q6 Q1 *) + 0x6e201cc6; (* arm_EOR_VEC Q6 Q6 Q0 128 *) + 0x9e6600c9; (* arm_FMOV_FtoI X9 Q6 0 *) + 0x9eae00ca; (* arm_FMOV_FtoI X10 Q6 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0xb940f065; (* arm_LDR W5 X3 (Immediate_Offset (word 240)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x4c40a870; (* arm_ldstp_2q true (word 16) X3 No_Offset *) + 0x510018a5; (* arm_SUB W5 W5 (rvalue (word 6)) *) + 0x8b051067; (* arm_ADD X7 X3 (Shiftedreg X5 LSL 4) *) + 0x510008a5; (* arm_SUB W5 W5 (rvalue (word 2)) *) + 0x4cdfa8f2; (* arm_ldstp_2q true (word 18) X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f4; (* arm_ldstp_2q true (word 20) X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f6; (* arm_ldstp_2q true (word 22) X7 (Postimmediate_Offset (word 32)) *) + 0x4c4078e7; (* arm_LDR Q7 X7 No_Offset *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012a; (* arm_FMOV_ItoF Q10 X9 0 *) + 0x9eaf014a; (* arm_FMOV_ItoF Q10 X10 1 *) + 0x91008067; (* arm_ADD X7 X3 (rvalue (word 32)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x14000002; (* arm_B (word 8) *) + 0xd503201f; (* arm_NOP *) + 0xf2400ebf; (* arm_TST X21 (rvalue (word 15)) *) + 0x540000c0; (* arm_BEQ (word 24) *) + 0xf1004042; (* arm_SUBS X2 X2 (rvalue (word 16)) *) + 0x9a8803e8; (* arm_CSEL X8 XZR X8 Condition_EQ *) + 0x4cdf7000; (* arm_LDR Q0 X0 (Postimmediate_Offset (word 16)) *) + 0x54003043; (* arm_BCC (word 1544) *) + 0xd1004000; (* arm_SUB X0 X0 (rvalue (word 16)) *) + 0x4cc87000; (* arm_LDR Q0 X0 (Register_Offset X8) *) + 0xf1008042; (* arm_SUBS X2 X2 (rvalue (word 32)) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0x4ea01c03; (* arm_MOV_VEC Q3 Q0 128 *) + 0x4ea01c01; (* arm_MOV_VEC Q1 Q0 128 *) + 0x4ea01c1c; (* arm_MOV_VEC Q28 Q0 128 *) + 0x4cdf7018; (* arm_LDR Q24 X0 (Postimmediate_Offset (word 16)) *) + 0x4eb81f1b; (* arm_MOV_VEC Q27 Q24 128 *) + 0x4eb81f1d; (* arm_MOV_VEC Q29 Q24 128 *) + 0x540027e3; (* arm_BCC (word 1276) *) + 0x6e261c00; (* arm_EOR_VEC Q0 Q0 Q6 128 *) + 0x6e281f18; (* arm_EOR_VEC Q24 Q24 Q8 128 *) + 0x4eb81f01; (* arm_MOV_VEC Q1 Q24 128 *) + 0x4cdf7018; (* arm_LDR Q24 X0 (Postimmediate_Offset (word 16)) *) + 0x4ea01c02; (* arm_MOV_VEC Q2 Q0 128 *) + 0x4ea11c23; (* arm_MOV_VEC Q3 Q1 128 *) + 0x6e291f1b; (* arm_EOR_VEC Q27 Q24 Q9 128 *) + 0x6e291f18; (* arm_EOR_VEC Q24 Q24 Q9 128 *) + 0xf100805f; (* arm_CMP X2 (rvalue (word 32)) *) + 0x54001ac3; (* arm_BCC (word 856) *) + 0x4cdf7019; (* arm_LDR Q25 X0 (Postimmediate_Offset (word 16)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012b; (* arm_FMOV_ItoF Q11 X9 0 *) + 0x9eaf014b; (* arm_FMOV_ItoF Q11 X10 1 *) + 0x4cdf701a; (* arm_LDR Q26 X0 (Postimmediate_Offset (word 16)) *) + 0x6e2a1f39; (* arm_EOR_VEC Q25 Q25 Q10 128 *) + 0x6e2b1f5a; (* arm_EOR_VEC Q26 Q26 Q11 128 *) + 0xd1008042; (* arm_SUB X2 X2 (rvalue (word 32)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x14000001; (* arm_B (word 4) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a19; (* arm_AESD Q25 Q16 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a1a; (* arm_AESD Q26 Q16 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a39; (* arm_AESD Q25 Q17 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a3a; (* arm_AESD Q26 Q17 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffd2c; (* arm_BGT (word 2097060) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a19; (* arm_AESD Q25 Q16 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a1a; (* arm_AESD Q26 Q16 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0xf1014042; (* arm_SUBS X2 X2 (rvalue (word 80)) *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a39; (* arm_AESD Q25 Q17 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a3a; (* arm_AESD Q26 Q17 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x9a82c3e6; (* arm_CSEL X6 XZR X2 Condition_GT *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0x4e285a40; (* arm_AESD Q0 Q18 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a41; (* arm_AESD Q1 Q18 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a58; (* arm_AESD Q24 Q18 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a59; (* arm_AESD Q25 Q18 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a5a; (* arm_AESD Q26 Q18 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x8b060000; (* arm_ADD X0 X0 X6 *) + 0x91018046; (* arm_ADD X6 X2 (rvalue (word 96)) *) + 0x4e285a60; (* arm_AESD Q0 Q19 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a61; (* arm_AESD Q1 Q19 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a78; (* arm_AESD Q24 Q19 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a79; (* arm_AESD Q25 Q19 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a7a; (* arm_AESD Q26 Q19 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4e285a80; (* arm_AESD Q0 Q20 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a81; (* arm_AESD Q1 Q20 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a98; (* arm_AESD Q24 Q20 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a99; (* arm_AESD Q25 Q20 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285a9a; (* arm_AESD Q26 Q20 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4e285aa0; (* arm_AESD Q0 Q21 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285aa1; (* arm_AESD Q1 Q21 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ab8; (* arm_AESD Q24 Q21 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285ab9; (* arm_AESD Q25 Q21 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285aba; (* arm_AESD Q26 Q21 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4e285ac0; (* arm_AESD Q0 Q22 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285ac1; (* arm_AESD Q1 Q22 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ad8; (* arm_AESD Q24 Q22 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285ad9; (* arm_AESD Q25 Q22 *) + 0x4e287b39; (* arm_AESIMC Q25 Q25 *) + 0x4e285ada; (* arm_AESD Q26 Q22 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x6e261ce4; (* arm_EOR_VEC Q4 Q7 Q6 128 *) + 0x4e285ae0; (* arm_AESD Q0 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x6e281ce5; (* arm_EOR_VEC Q5 Q7 Q8 128 *) + 0x4cdf7002; (* arm_LDR Q2 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285ae1; (* arm_AESD Q1 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0x6e291cf1; (* arm_EOR_VEC Q17 Q7 Q9 128 *) + 0x4cdf7003; (* arm_LDR Q3 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285af8; (* arm_AESD Q24 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x6e2a1cfe; (* arm_EOR_VEC Q30 Q7 Q10 128 *) + 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285af9; (* arm_AESD Q25 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012a; (* arm_FMOV_ItoF Q10 X9 0 *) + 0x9eaf014a; (* arm_FMOV_ItoF Q10 X10 1 *) + 0x6e2b1cff; (* arm_EOR_VEC Q31 Q7 Q11 128 *) + 0x4cdf701c; (* arm_LDR Q28 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285afa; (* arm_AESD Q26 Q23 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e67012b; (* arm_FMOV_ItoF Q11 X9 0 *) + 0x9eaf014b; (* arm_FMOV_ItoF Q11 X10 1 *) + 0x4cdf701d; (* arm_LDR Q29 X0 (Postimmediate_Offset (word 16)) *) + 0xb4000586; (* arm_CBZ X6 (word 176) *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) + 0x6e261c40; (* arm_EOR_VEC Q0 Q2 Q6 128 *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e281c61; (* arm_EOR_VEC Q1 Q3 Q8 128 *) + 0x6e381e31; (* arm_EOR_VEC Q17 Q17 Q24 128 *) + 0x6e291f78; (* arm_EOR_VEC Q24 Q27 Q9 128 *) + 0x6e391fde; (* arm_EOR_VEC Q30 Q30 Q25 128 *) + 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) + 0x6e3a1fff; (* arm_EOR_VEC Q31 Q31 Q26 128 *) + 0x4c9f7024; (* arm_STR Q4 X1 (Postimmediate_Offset (word 16)) *) + 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x4c9f703e; (* arm_STR Q30 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f703f; (* arm_STR Q31 X1 (Postimmediate_Offset (word 16)) *) + 0x54ffeba2; (* arm_BCS (word 2096500) *) + 0xb100405f; (* arm_CMN X2 (rvalue (word 16)) *) + 0x540001a1; (* arm_BNE (word 52) *) + 0x4eaa1d4b; (* arm_MOV_VEC Q11 Q10 128 *) + 0x4ea91d2a; (* arm_MOV_VEC Q10 Q9 128 *) + 0x4ea81d09; (* arm_MOV_VEC Q9 Q8 128 *) + 0x4ea61cc8; (* arm_MOV_VEC Q8 Q6 128 *) + 0x9e660169; (* arm_FMOV_FtoI X9 Q11 0 *) + 0x9eae016a; (* arm_FMOV_FtoI X10 Q11 1 *) + 0x6e221cc0; (* arm_EOR_VEC Q0 Q6 Q2 128 *) + 0x6e231d01; (* arm_EOR_VEC Q1 Q8 Q3 128 *) + 0x6e291f78; (* arm_EOR_VEC Q24 Q27 Q9 128 *) + 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) + 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) + 0x54ffe9e0; (* arm_BEQ (word 2096444) *) + 0x91014042; (* arm_ADD X2 X2 (rvalue (word 80)) *) + 0xb4001582; (* arm_CBZ X2 (word 688) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) + 0x54000e23; (* arm_BCC (word 452) *) + 0x6e3b1cc0; (* arm_EOR_VEC Q0 Q6 Q27 128 *) + 0x6e3c1d01; (* arm_EOR_VEC Q1 Q8 Q28 128 *) + 0x6e291fb8; (* arm_EOR_VEC Q24 Q29 Q9 128 *) + 0x1400000e; (* arm_B (word 56) *) + 0xd503201f; (* arm_NOP *) + 0x91004000; (* arm_ADD X0 X0 (rvalue (word 16)) *) + 0xf2400ebf; (* arm_TST X21 (rvalue (word 15)) *) + 0x6e241c25; (* arm_EOR_VEC Q5 Q1 Q4 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x6e311f11; (* arm_EOR_VEC Q17 Q24 Q17 128 *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x6e3e1f3e; (* arm_EOR_VEC Q30 Q25 Q30 128 *) + 0x6e3f1f5f; (* arm_EOR_VEC Q31 Q26 Q31 128 *) + 0x4c9fa03e; (* arm_ldstp_2q false (word 30) X1 (Postimmediate_Offset (word 32)) *) + 0x54001a00; (* arm_BEQ (word 832) *) + 0x4cdf7800; (* arm_LDR Q0 X0 (Postimmediate_Offset (word 16)) *) + 0x14000098; (* arm_B (word 608) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffe2c; (* arm_BGT (word 2097092) *) + 0x4e285a00; (* arm_AESD Q0 Q16 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x6e271cc4; (* arm_EOR_VEC Q4 Q6 Q7 128 *) + 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) + 0x9e660129; (* arm_FMOV_FtoI X9 Q9 0 *) + 0x9eae012a; (* arm_FMOV_FtoI X10 Q9 1 *) + 0x528010f3; (* arm_MOV W19 (rvalue (word 135)) *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x6e271d05; (* arm_EOR_VEC Q5 Q8 Q7 128 *) + 0x9a863046; (* arm_CSEL X6 X2 X6 Condition_CC *) + 0x4e285a20; (* arm_AESD Q0 Q17 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x6e271d31; (* arm_EOR_VEC Q17 Q9 Q7 128 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) + 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) + 0x910080c6; (* arm_ADD X6 X6 (rvalue (word 32)) *) + 0x8b060000; (* arm_ADD X0 X0 X6 *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) + 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) + 0x4e285a80; (* arm_AESD Q0 Q20 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285a81; (* arm_AESD Q1 Q20 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a98; (* arm_AESD Q24 Q20 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285aa0; (* arm_AESD Q0 Q21 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285aa1; (* arm_AESD Q1 Q21 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ab8; (* arm_AESD Q24 Q21 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285ac0; (* arm_AESD Q0 Q22 *) + 0x4e287800; (* arm_AESIMC Q0 Q0 *) + 0x4e285ac1; (* arm_AESD Q1 Q22 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ad8; (* arm_AESD Q24 Q22 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) + 0x4e285ae0; (* arm_AESD Q0 Q23 *) + 0x4e285ae1; (* arm_AESD Q1 Q23 *) + 0x4e285af8; (* arm_AESD Q24 Q23 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) + 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e311f18; (* arm_EOR_VEC Q24 Q24 Q17 128 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x4c9f7024; (* arm_STR Q4 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f7038; (* arm_STR Q24 X1 (Postimmediate_Offset (word 16)) *) + 0xb100c05f; (* arm_CMN X2 (rvalue (word 48)) *) + 0x9100c042; (* arm_ADD X2 X2 (rvalue (word 48)) *) + 0x540007a0; (* arm_BEQ (word 244) *) + 0xd100c042; (* arm_SUB X2 X2 (rvalue (word 48)) *) + 0x4ea31c7c; (* arm_MOV_VEC Q28 Q3 128 *) + 0x4ebb1f7d; (* arm_MOV_VEC Q29 Q27 128 *) + 0xd503201f; (* arm_NOP *) + 0xb100405f; (* arm_CMN X2 (rvalue (word 16)) *) + 0x6e261f81; (* arm_EOR_VEC Q1 Q28 Q6 128 *) + 0x6e281fb8; (* arm_EOR_VEC Q24 Q29 Q8 128 *) + 0x54000040; (* arm_BEQ (word 8) *) + 0x6e261fb8; (* arm_EOR_VEC Q24 Q29 Q6 128 *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) + 0x54fffeac; (* arm_BGT (word 2097108) *) + 0x4e285a01; (* arm_AESD Q1 Q16 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a18; (* arm_AESD Q24 Q16 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a21; (* arm_AESD Q1 Q17 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a38; (* arm_AESD Q24 Q17 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x4e285a81; (* arm_AESD Q1 Q20 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285a98; (* arm_AESD Q24 Q20 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0xb100805f; (* arm_CMN X2 (rvalue (word 32)) *) + 0x4e285aa1; (* arm_AESD Q1 Q21 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ab8; (* arm_AESD Q24 Q21 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x6e271cc5; (* arm_EOR_VEC Q5 Q6 Q7 128 *) + 0x4e285ac1; (* arm_AESD Q1 Q22 *) + 0x4e287821; (* arm_AESIMC Q1 Q1 *) + 0x4e285ad8; (* arm_AESD Q24 Q22 *) + 0x4e287b18; (* arm_AESIMC Q24 Q24 *) + 0x6e271d11; (* arm_EOR_VEC Q17 Q8 Q7 128 *) + 0x4e285ae1; (* arm_AESD Q1 Q23 *) + 0x4e285af8; (* arm_AESD Q24 Q23 *) + 0x54000120; (* arm_BEQ (word 36) *) + 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) + 0x6e381e31; (* arm_EOR_VEC Q17 Q17 Q24 128 *) + 0x4ea91d26; (* arm_MOV_VEC Q6 Q9 128 *) + 0x4eaa1d48; (* arm_MOV_VEC Q8 Q10 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x91004042; (* arm_ADD X2 X2 (rvalue (word 16)) *) + 0x14000006; (* arm_B (word 24) *) + 0x6e381ca5; (* arm_EOR_VEC Q5 Q5 Q24 128 *) + 0x4ea81d06; (* arm_MOV_VEC Q6 Q8 128 *) + 0x4ea91d28; (* arm_MOV_VEC Q8 Q9 128 *) + 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x91008042; (* arm_ADD X2 X2 (rvalue (word 32)) *) + 0xf2400ebf; (* arm_TST X21 (rvalue (word 15)) *) + 0x540006a0; (* arm_BEQ (word 212) *) + 0xaa0303e7; (* arm_MOV X7 X3 *) + 0xb5000042; (* arm_CBNZ X2 (word 8) *) + 0x4cdf7800; (* arm_LDR Q0 X0 (Postimmediate_Offset (word 16)) *) + 0x6e281c1a; (* arm_EOR_VEC Q26 Q0 Q8 128 *) + 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) + 0x4cdf7860; (* arm_LDR Q0 X3 (Postimmediate_Offset (word 16)) *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf7861; (* arm_LDR Q1 X3 (Postimmediate_Offset (word 16)) *) + 0x4e28581a; (* arm_AESD Q26 Q0 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf7860; (* arm_LDR Q0 X3 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e28583a; (* arm_AESD Q26 Q1 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf7861; (* arm_LDR Q1 X3 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e28581a; (* arm_AESD Q26 Q0 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4c407860; (* arm_LDR Q0 X3 No_Offset *) + 0x4e28583a; (* arm_AESD Q26 Q1 *) + 0x6e201f5a; (* arm_EOR_VEC Q26 Q26 Q0 128 *) + 0x6e281f5a; (* arm_EOR_VEC Q26 Q26 Q8 128 *) + 0x4c00703a; (* arm_STR Q26 X1 No_Offset *) + 0xaa0003f4; (* arm_MOV X20 X0 *) + 0x9100402d; (* arm_ADD X13 X1 (rvalue (word 16)) *) + 0xf10006b5; (* arm_SUBS X21 X21 (rvalue (word 1)) *) + 0x3875682f; (* arm_LDRB W15 X1 (Register_Offset X21) *) + 0x38756a8e; (* arm_LDRB W14 X20 (Register_Offset X21) *) + 0x383569af; (* arm_STRB W15 X13 (Register_Offset X21) *) + 0x3835682e; (* arm_STRB W14 X1 (Register_Offset X21) *) + 0x54ffff6c; (* arm_BGT (word 2097132) *) + 0x4c40703a; (* arm_LDR Q26 X1 No_Offset *) + 0x6e261f5a; (* arm_EOR_VEC Q26 Q26 Q6 128 *) + 0xb940f0e6; (* arm_LDR W6 X7 (Immediate_Offset (word 240)) *) + 0x4cdf70e0; (* arm_LDR Q0 X7 (Postimmediate_Offset (word 16)) *) + 0x510008c6; (* arm_SUB W6 W6 (rvalue (word 2)) *) + 0x4cdf70e1; (* arm_LDR Q1 X7 (Postimmediate_Offset (word 16)) *) + 0x4e28581a; (* arm_AESD Q26 Q0 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf78e0; (* arm_LDR Q0 X7 (Postimmediate_Offset (word 16)) *) + 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) + 0x4e28583a; (* arm_AESD Q26 Q1 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4cdf78e1; (* arm_LDR Q1 X7 (Postimmediate_Offset (word 16)) *) + 0x54ffff2c; (* arm_BGT (word 2097124) *) + 0x4e28581a; (* arm_AESD Q26 Q0 *) + 0x4e287b5a; (* arm_AESIMC Q26 Q26 *) + 0x4c4078e0; (* arm_LDR Q0 X7 No_Offset *) + 0x4e28583a; (* arm_AESD Q26 Q1 *) + 0x6e201f5a; (* arm_EOR_VEC Q26 Q26 Q0 128 *) + 0x6e261f5a; (* arm_EOR_VEC Q26 Q26 Q6 128 *) + 0x4c00703a; (* arm_STR Q26 X1 No_Offset *) + 0xa9435bf5; (* arm_LDP X21 X22 SP (Immediate_Offset (iword (&48))) *) + 0x6d4227e8; (* arm_LDP D8 D9 SP (Immediate_Offset (iword (&32))) *) + 0x6d412fea; (* arm_LDP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0xa8c453f3; (* arm_LDP X19 X20 SP (Postimmediate_Offset (iword (&64))) *) + 0xd65f03c0 (* arm_RET X30 *) +];; diff --git a/arm/proofs/aes-xts-armv8.ml b/arm/proofs/aes-xts-armv8.ml index 4b4df2eb9..1a181bbbd 100644 --- a/arm/proofs/aes-xts-armv8.ml +++ b/arm/proofs/aes-xts-armv8.ml @@ -7,4 +7,7 @@ use_file_raise_failure := true;; needs "arm/proofs/base.ml";; -print_literal_from_elf "arm/aes-xts/aes-xts-armv8.o";; +(* print_literal_from_elf "arm/aes-xts/aes-xts-armv8.o";; *) +save_literal_from_elf "arm/aes-xts/aes-xts-armv8.txt" "arm/aes-xts/aes-xts-armv8.o";; + +(* let aes_xts_armv8 = define_assert_from_elf "aes_xts_armv8" "arm/aes-xts/aes-xts-armv8.o" ..*) From 75d747d7a3aa3ae2f7f86df28098bd8730a26310 Mon Sep 17 00:00:00 2001 From: Nevine Ebeid Date: Tue, 25 Mar 2025 11:34:49 -0400 Subject: [PATCH 7/8] Update decoded output after adding post-index register offset to the decoder. --- arm/aes-xts/aes-xts-armv8.txt | 4 ++-- arm/proofs/aes-xts-armv8.ml | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arm/aes-xts/aes-xts-armv8.txt b/arm/aes-xts/aes-xts-armv8.txt index 16e1b3ad7..191dcb8fb 100644 --- a/arm/aes-xts/aes-xts-armv8.txt +++ b/arm/aes-xts/aes-xts-armv8.txt @@ -101,7 +101,7 @@ 0x9e670128; (* arm_FMOV_ItoF Q8 X9 0 *) 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) 0xb940f065; (* arm_LDR W5 X3 (Immediate_Offset (word 240)) *) - 0x4cc87000; (* arm_LDR Q0 X0 (Register_Offset X8) *) + 0x4cc87000; (* arm_LDR Q0 X0 (Postreg_Offset X8) *) 0x4c40a870; (* arm_ldstp_2q true (word 16) X3 No_Offset *) 0x510018a5; (* arm_SUB W5 W5 (rvalue (word 6)) *) 0x8b051067; (* arm_ADD X7 X3 (Shiftedreg X5 LSL 4) *) @@ -678,7 +678,7 @@ 0x4cdf7000; (* arm_LDR Q0 X0 (Postimmediate_Offset (word 16)) *) 0x54003043; (* arm_BCC (word 1544) *) 0xd1004000; (* arm_SUB X0 X0 (rvalue (word 16)) *) - 0x4cc87000; (* arm_LDR Q0 X0 (Register_Offset X8) *) + 0x4cc87000; (* arm_LDR Q0 X0 (Postreg_Offset X8) *) 0xf1008042; (* arm_SUBS X2 X2 (rvalue (word 32)) *) 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) 0x4ea01c03; (* arm_MOV_VEC Q3 Q0 128 *) diff --git a/arm/proofs/aes-xts-armv8.ml b/arm/proofs/aes-xts-armv8.ml index 1a181bbbd..cbba0da7d 100644 --- a/arm/proofs/aes-xts-armv8.ml +++ b/arm/proofs/aes-xts-armv8.ml @@ -11,3 +11,12 @@ needs "arm/proofs/base.ml";; save_literal_from_elf "arm/aes-xts/aes-xts-armv8.txt" "arm/aes-xts/aes-xts-armv8.o";; (* let aes_xts_armv8 = define_assert_from_elf "aes_xts_armv8" "arm/aes-xts/aes-xts-armv8.o" ..*) + +(* Missing instructions that were added in PR#211 +4c4070a6 10: 4c4070a6 ld1.16b { v6 }, [x5] +4cdfa87c 5c: 4cdfa87c ld1.4s { v28, v29 }, [x3], #32 +d503201f f8: d503201f nop +4cc87000 198: 4cc87000 ld1.16b { v0 }, [x0], x8 +4c40a870 19c: 4c40a870 ld1.4s { v16, v17 }, [x3] +3875682f 818: 3875682f ldrb w15, [x1, x21] +*) \ No newline at end of file From 842ec348569e2d89fbfa19f135b0d7ce1c55b5b5 Mon Sep 17 00:00:00 2001 From: Nevine Ebeid Date: Fri, 4 Apr 2025 10:47:24 -0400 Subject: [PATCH 8/8] New decoded output after shuffling around some instructions. In some places, only comments are different after the decoder was updated. --- arm/aes-xts/aes-xts-armv8.S | 864 ++++++++++++++++++++++++++++++---- arm/aes-xts/aes-xts-armv8.txt | 132 +++--- 2 files changed, 844 insertions(+), 152 deletions(-) diff --git a/arm/aes-xts/aes-xts-armv8.S b/arm/aes-xts/aes-xts-armv8.S index 8c6587053..25701e691 100644 --- a/arm/aes-xts/aes-xts-armv8.S +++ b/arm/aes-xts/aes-xts-armv8.S @@ -1,10 +1,12 @@ #include "_internal_s2n_bignum.h" -aes_hw_xts_encrypt: - // AARCH64_VALID_CALL_TARGET +# The following xts encrypt is from MacBook M3 build folder +# after moving around some instructions +_aes_hw_xts_encrypt: + # AARCH64_VALID_CALL_TARGET cmp x2,#16 // Original input data size bigger than 16, jump to big size processing. - b.ne .Lxts_enc_big_size + b.ne Lxts_enc_big_size // Encrypt the iv with key2, as the first XEX iv. ldr w6,[x4,#240] ld1 {v0.16b},[x4],#16 @@ -12,7 +14,7 @@ aes_hw_xts_encrypt: sub w6,w6,#2 ld1 {v1.16b},[x4],#16 -.Loop_enc_iv_enc: +Loop_enc_iv_enc: aese v6.16b,v0.16b aesmc v6.16b,v6.16b ld1 {v0.4s},[x4],#16 @@ -20,7 +22,7 @@ aes_hw_xts_encrypt: aese v6.16b,v1.16b aesmc v6.16b,v6.16b ld1 {v1.4s},[x4],#16 - b.gt .Loop_enc_iv_enc + b.gt Loop_enc_iv_enc aese v6.16b,v0.16b aesmc v6.16b,v6.16b @@ -39,9 +41,9 @@ aes_hw_xts_encrypt: ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... aese v0.16b,v29.16b aesmc v0.16b,v0.16b - subs w6,w6,#10 // if rounds==10, jump to aes-128-xts processing - b.eq .Lxts_128_enc -.Lxts_enc_round_loop: + subs w6,w6,#10 //// if rounds==10, jump to aes-128-xts processing +// b.eq .Lxts_128_enc +Lxts_enc_round_loop: aese v0.16b,v16.16b aesmc v0.16b,v0.16b ld1 {v16.4s},[x3],#16 // load key schedule... @@ -49,8 +51,8 @@ aes_hw_xts_encrypt: aesmc v0.16b,v0.16b ld1 {v17.4s},[x3],#16 // load key schedule... subs w6,w6,#2 // bias - b.gt .Lxts_enc_round_loop -.Lxts_128_enc: + b.gt Lxts_enc_round_loop +//.Lxts_128_enc: ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... aese v0.16b,v16.16b aesmc v0.16b,v0.16b @@ -73,10 +75,10 @@ aes_hw_xts_encrypt: eor v0.16b,v0.16b,v7.16b eor v0.16b,v0.16b,v6.16b st1 {v0.16b},[x1] - b .Lxts_enc_final_abort + b Lxts_enc_final_abort .align 4 -.Lxts_enc_big_size: +Lxts_enc_big_size: // Encrypt input size > 16 bytes stp x19,x20,[sp,#-64]! stp x21,x22,[sp,#48] @@ -88,7 +90,7 @@ aes_hw_xts_encrypt: and x2,x2,#-16 // len &= 0x1..110000, now divisible by 16 subs x2,x2,#16 mov x8,#16 - b.lo .Lxts_abort // if !(len > 16): error + b.lo Lxts_abort // if !(len > 16): error csel x8,xzr,x8,eq // if (len == 16): step = 0 // Firstly, encrypt the iv with key2, as the first iv of XEX. @@ -98,7 +100,7 @@ aes_hw_xts_encrypt: sub w6,w6,#2 ld1 {v1.4s},[x4],#16 -.Loop_iv_enc: +Loop_iv_enc: aese v6.16b,v0.16b aesmc v6.16b,v6.16b ld1 {v0.4s},[x4],#16 @@ -106,7 +108,7 @@ aes_hw_xts_encrypt: aese v6.16b,v1.16b aesmc v6.16b,v6.16b ld1 {v1.4s},[x4],#16 - b.gt .Loop_iv_enc + b.gt Loop_iv_enc aese v6.16b,v0.16b aesmc v6.16b,v6.16b @@ -133,17 +135,17 @@ aes_hw_xts_encrypt: ld1 {v16.4s,v17.4s},[x3] // load key schedule... sub w5,w5,#6 add x7,x3,x5,lsl#4 // pointer to last 7 round keys - sub w5,w5,#2 ld1 {v18.4s,v19.4s},[x7],#32 ld1 {v20.4s,v21.4s},[x7],#32 ld1 {v22.4s,v23.4s},[x7],#32 ld1 {v7.4s},[x7] + sub w5,w5,#2 add x7,x3,#32 mov w6,w5 // Encryption -.Lxts_enc: +Lxts_enc: ld1 {v24.16b},[x0],#16 subs x2,x2,#32 // bias add w6,w5,#2 @@ -152,7 +154,7 @@ aes_hw_xts_encrypt: orr v28.16b,v0.16b,v0.16b orr v27.16b,v24.16b,v24.16b orr v29.16b,v24.16b,v24.16b - b.lo .Lxts_inner_enc_tail // when input size % 5 = 1 or 2 + b.lo Lxts_inner_enc_tail // when input size % 5 = 1 or 2 // (with tail or not) eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv eor v24.16b,v24.16b,v8.16b @@ -173,7 +175,7 @@ aes_hw_xts_encrypt: eor v27.16b,v24.16b,v9.16b // the third block eor v24.16b,v24.16b,v9.16b cmp x2,#32 - b.lo .Lxts_outer_enc_tail + b.lo Lxts_outer_enc_tail // The iv for fourth block extr x22,x10,x10,#32 @@ -197,10 +199,10 @@ aes_hw_xts_encrypt: eor v26.16b,v26.16b,v11.16b sub x2,x2,#32 // bias mov w6,w5 - b .Loop5x_xts_enc + // b .Loop5x_xts_enc .align 4 -.Loop5x_xts_enc: +Loop5x_xts_enc: aese v0.16b,v16.16b aesmc v0.16b,v0.16b aese v1.16b,v16.16b @@ -224,7 +226,7 @@ aes_hw_xts_encrypt: aese v26.16b,v17.16b aesmc v26.16b,v26.16b ld1 {v17.4s},[x7],#16 - b.gt .Loop5x_xts_enc + b.gt Loop5x_xts_enc aese v0.16b,v16.16b aesmc v0.16b,v0.16b @@ -236,7 +238,7 @@ aes_hw_xts_encrypt: aesmc v25.16b,v25.16b aese v26.16b,v16.16b aesmc v26.16b,v26.16b - subs x2,x2,#0x50 // because .Lxts_enc_tail4x + subs x2,x2,#0x50 // because Lxts_enc_tail4x aese v0.16b,v17.16b aesmc v0.16b,v0.16b @@ -264,7 +266,7 @@ aes_hw_xts_encrypt: add x0,x0,x6 // x0 is adjusted in such way that // at exit from the loop v1.16b-v26.16b // are loaded with last "words" - add x6,x2,#0x60 // because .Lxts_enc_tail4x + add x6,x2,#0x60 // because Lxts_enc_tail4x aese v0.16b,v19.16b aesmc v0.16b,v0.16b @@ -310,8 +312,14 @@ aes_hw_xts_encrypt: aese v26.16b,v22.16b aesmc v26.16b,v26.16b - eor v4.16b,v7.16b,v6.16b aese v0.16b,v23.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + aese v25.16b,v23.16b + aese v26.16b,v23.16b + + eor v4.16b,v7.16b,v6.16b + // aese v0.16b,v23.16b // The iv for first block of one iteration extr x22,x10,x10,#32 extr x10,x10,x9,#63 @@ -321,7 +329,7 @@ aes_hw_xts_encrypt: fmov v6.d[1],x10 eor v5.16b,v7.16b,v8.16b ld1 {v2.16b},[x0],#16 - aese v1.16b,v23.16b + // aese v1.16b,v23.16b // The iv for second block extr x22,x10,x10,#32 extr x10,x10,x9,#63 @@ -331,7 +339,7 @@ aes_hw_xts_encrypt: fmov v8.d[1],x10 eor v17.16b,v7.16b,v9.16b ld1 {v3.16b},[x0],#16 - aese v24.16b,v23.16b + // aese v24.16b,v23.16b // The iv for third block extr x22,x10,x10,#32 extr x10,x10,x9,#63 @@ -341,7 +349,7 @@ aes_hw_xts_encrypt: fmov v9.d[1],x10 eor v30.16b,v7.16b,v10.16b ld1 {v27.16b},[x0],#16 - aese v25.16b,v23.16b + // aese v25.16b,v23.16b // The iv for fourth block extr x22,x10,x10,#32 extr x10,x10,x9,#63 @@ -351,7 +359,7 @@ aes_hw_xts_encrypt: fmov v10.d[1],x10 eor v31.16b,v7.16b,v11.16b ld1 {v28.16b},[x0],#16 - aese v26.16b,v23.16b + // aese v26.16b,v23.16b // The iv for fifth block extr x22,x10,x10,#32 @@ -362,8 +370,8 @@ aes_hw_xts_encrypt: fmov v11.d[1],x10 ld1 {v29.16b},[x0],#16 - cbz x6,.Lxts_enc_tail4x - ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + cbz x6,Lxts_enc_tail4x +// vld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] eor v4.16b,v4.16b,v0.16b eor v0.16b,v2.16b,v6.16b eor v5.16b,v5.16b,v1.16b @@ -373,15 +381,19 @@ aes_hw_xts_encrypt: eor v30.16b,v30.16b,v25.16b eor v25.16b,v28.16b,v10.16b eor v31.16b,v31.16b,v26.16b - st1 {v4.16b},[x1],#16 + //vst1 {v4.16b},[x1],#16 eor v26.16b,v29.16b,v11.16b - st1 {v5.16b},[x1],#16 + //vst1 {v5.16b},[x1],#16 mov w6,w5 + st1 {v4.16b,v5.16b},[x1],#32 st1 {v17.16b},[x1],#16 +// vld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] +// vst1 {v30.16b},[x1],#16 +// vst1 {v31.16b},[x1],#16 + st1 {v30.16b,v31.16b},[x1],#32 + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - st1 {v30.16b},[x1],#16 - st1 {v31.16b},[x1],#16 - b.hs .Loop5x_xts_enc + b.hs Loop5x_xts_enc // If left 4 blocks, borrow the five block's processing. @@ -393,7 +405,7 @@ aes_hw_xts_encrypt: // 0, 1, 2 or 3 blocks (with or without tail) starting at // Loop5x_enc_after cmn x2,#0x10 - b.ne .Loop5x_enc_after + b.ne Loop5x_enc_after orr v11.16b,v10.16b,v10.16b orr v10.16b,v9.16b,v9.16b orr v9.16b,v8.16b,v8.16b @@ -405,24 +417,24 @@ aes_hw_xts_encrypt: eor v24.16b,v27.16b,v9.16b eor v25.16b,v28.16b,v10.16b eor v26.16b,v29.16b,v11.16b - b.eq .Loop5x_xts_enc + b.eq Loop5x_xts_enc -.Loop5x_enc_after: +Loop5x_enc_after: add x2,x2,#0x50 - cbz x2,.Lxts_enc_done // no blocks left + cbz x2,Lxts_enc_done // no blocks left add w6,w5,#2 subs x2,x2,#0x30 - b.lo .Lxts_inner_enc_tail // 1 or 2 blocks left + b.lo Lxts_inner_enc_tail // 1 or 2 blocks left // (with tail or not) eor v0.16b,v6.16b,v27.16b // 3 blocks left eor v1.16b,v8.16b,v28.16b eor v24.16b,v29.16b,v9.16b - b .Lxts_outer_enc_tail + b Lxts_outer_enc_tail .align 4 -.Lxts_enc_tail4x: +Lxts_enc_tail4x: add x0,x0,#16 eor v5.16b,v1.16b,v5.16b st1 {v5.16b},[x1],#16 @@ -431,9 +443,9 @@ aes_hw_xts_encrypt: eor v30.16b,v25.16b,v30.16b eor v31.16b,v26.16b,v31.16b st1 {v30.16b,v31.16b},[x1],#32 - b .Lxts_enc_done + b Lxts_enc_done .align 4 -.Lxts_outer_enc_tail: +Lxts_outer_enc_tail: aese v0.16b,v16.16b aesmc v0.16b,v0.16b aese v1.16b,v16.16b @@ -449,7 +461,7 @@ aes_hw_xts_encrypt: aese v24.16b,v17.16b aesmc v24.16b,v24.16b ld1 {v17.4s},[x7],#16 - b.gt .Lxts_outer_enc_tail + b.gt Lxts_outer_enc_tail aese v0.16b,v16.16b aesmc v0.16b,v0.16b @@ -458,31 +470,12 @@ aes_hw_xts_encrypt: aese v24.16b,v16.16b aesmc v24.16b,v24.16b eor v4.16b,v6.16b,v7.16b - subs x2,x2,#0x30 - // The iv for first block - fmov x9,d9 - fmov x10,v9.d[1] - //mov w19,#0x87 - extr x22,x10,x10,#32 - extr x10,x10,x9,#63 - and w11,w19,w22,asr#31 - eor x9,x11,x9,lsl#1 - fmov d6,x9 - fmov v6.d[1],x10 - eor v5.16b,v8.16b,v7.16b - csel x6,x2,x6,lo // x6, w6, is zero at this point aese v0.16b,v17.16b aesmc v0.16b,v0.16b aese v1.16b,v17.16b aesmc v1.16b,v1.16b aese v24.16b,v17.16b aesmc v24.16b,v24.16b - eor v17.16b,v9.16b,v7.16b - - add x6,x6,#0x20 - add x0,x0,x6 - mov x7,x3 - aese v0.16b,v20.16b aesmc v0.16b,v0.16b aese v1.16b,v20.16b @@ -504,30 +497,80 @@ aes_hw_xts_encrypt: aese v0.16b,v23.16b aese v1.16b,v23.16b aese v24.16b,v23.16b + + eor v17.16b,v9.16b,v7.16b + subs x2,x2,#0x30 + // The iv for first block + fmov x9,d9 + fmov x10,v9.d[1] + //mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v8.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point +// aese v0.16b,v17.16b +// aesmc v0.16b,v0.16b +// aese v1.16b,v17.16b +// aesmc v1.16b,v1.16b +// aese v24.16b,v17.16b +// aesmc v24.16b,v24.16b +// veor v17.16b,v9.16b,v7.16b + + add x6,x6,#0x20 + add x0,x0,x6 + mov x7,x3 + +// aese v0.16b,v20.16b +// aesmc v0.16b,v0.16b +// aese v1.16b,v20.16b +// aesmc v1.16b,v1.16b +// aese v24.16b,v20.16b +// aesmc v24.16b,v24.16b +// aese v0.16b,v21.16b +// aesmc v0.16b,v0.16b +// aese v1.16b,v21.16b +// aesmc v1.16b,v1.16b +// aese v24.16b,v21.16b +// aesmc v24.16b,v24.16b +// aese v0.16b,v22.16b +// aesmc v0.16b,v0.16b +// aese v1.16b,v22.16b +// aesmc v1.16b,v1.16b +// aese v24.16b,v22.16b +// aesmc v24.16b,v24.16b +// aese v0.16b,v23.16b +// aese v1.16b,v23.16b +// aese v24.16b,v23.16b ld1 {v27.16b},[x0],#16 add w6,w5,#2 - ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] +// vld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] eor v4.16b,v4.16b,v0.16b eor v5.16b,v5.16b,v1.16b eor v24.16b,v24.16b,v17.16b - ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - st1 {v4.16b},[x1],#16 - st1 {v5.16b},[x1],#16 +// vld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + ld1 {v16.4s,v17.4s},[x7],#32 +// vst1 {v4.16b},[x1],#16 +// vst1 {v5.16b},[x1],#16 + st1 {v4.16b,v5.16b},[x1],#32 st1 {v24.16b},[x1],#16 cmn x2,#0x30 - b.eq .Lxts_enc_done -.Lxts_encxor_one: + b.eq Lxts_enc_done +Lxts_encxor_one: orr v28.16b,v3.16b,v3.16b orr v29.16b,v27.16b,v27.16b nop -.Lxts_inner_enc_tail: +Lxts_inner_enc_tail: cmn x2,#0x10 eor v1.16b,v28.16b,v6.16b eor v24.16b,v29.16b,v8.16b - b.eq .Lxts_enc_tail_loop + b.eq Lxts_enc_tail_loop eor v24.16b,v29.16b,v6.16b -.Lxts_enc_tail_loop: +Lxts_enc_tail_loop: aese v1.16b,v16.16b aesmc v1.16b,v1.16b aese v24.16b,v16.16b @@ -539,7 +582,7 @@ aes_hw_xts_encrypt: aese v24.16b,v17.16b aesmc v24.16b,v24.16b ld1 {v17.4s},[x7],#16 - b.gt .Lxts_enc_tail_loop + b.gt Lxts_enc_tail_loop aese v1.16b,v16.16b aesmc v1.16b,v1.16b @@ -566,7 +609,7 @@ aes_hw_xts_encrypt: eor v17.16b,v8.16b,v7.16b aese v1.16b,v23.16b aese v24.16b,v23.16b - b.eq .Lxts_enc_one + b.eq Lxts_enc_one eor v5.16b,v5.16b,v1.16b st1 {v5.16b},[x1],#16 eor v17.16b,v17.16b,v24.16b @@ -581,9 +624,9 @@ aes_hw_xts_encrypt: eor x9,x11,x9,lsl #1 fmov d6,x9 fmov v6.d[1],x10 - b .Lxts_enc_done + b Lxts_enc_done -.Lxts_enc_one: +Lxts_enc_one: eor v5.16b,v5.16b,v24.16b orr v6.16b,v6.16b,v6.16b st1 {v5.16b},[x1],#16 @@ -596,12 +639,12 @@ aes_hw_xts_encrypt: eor x9,x11,x9,lsl #1 fmov d6,x9 fmov v6.d[1],x10 - b .Lxts_enc_done + b Lxts_enc_done .align 5 -.Lxts_enc_done: +Lxts_enc_done: // Process the tail block with cipher stealing. tst x21,#0xf - b.eq .Lxts_abort + b.eq Lxts_abort mov x20,x0 mov x13,x1 @@ -613,7 +656,7 @@ aes_hw_xts_encrypt: strb w15,[x13,x21] strb w14,[x1,x21] b.gt .composite_enc_loop -.Lxts_enc_load_done: +Lxts_enc_load_done: ld1 {v26.16b},[x1] eor v26.16b,v26.16b,v6.16b @@ -622,7 +665,7 @@ aes_hw_xts_encrypt: ld1 {v0.16b},[x3],#16 sub w6,w6,#2 ld1 {v1.16b},[x3],#16 // load key schedule... -.Loop_final_enc: +Loop_final_enc: aese v26.16b,v0.16b aesmc v26.16b,v26.16b ld1 {v0.4s},[x3],#16 @@ -630,7 +673,7 @@ aes_hw_xts_encrypt: aese v26.16b,v1.16b aesmc v26.16b,v26.16b ld1 {v1.4s},[x3],#16 - b.gt .Loop_final_enc + b.gt Loop_final_enc aese v26.16b,v0.16b aesmc v26.16b,v26.16b @@ -640,14 +683,663 @@ aes_hw_xts_encrypt: eor v26.16b,v26.16b,v6.16b st1 {v26.16b},[x1] -.Lxts_abort: +Lxts_abort: ldp x21,x22,[sp,#48] ldp d8,d9,[sp,#32] ldp d10,d11,[sp,#16] ldp x19,x20,[sp],#64 -.Lxts_enc_final_abort: +Lxts_enc_final_abort: ret +## aes_hw_xts_encrypt: +## // AARCH64_VALID_CALL_TARGET +## cmp x2,#16 +## // Original input data size bigger than 16, jump to big size processing. +## b.ne .Lxts_enc_big_size +## // Encrypt the iv with key2, as the first XEX iv. +## ldr w6,[x4,#240] +## ld1 {v0.16b},[x4],#16 +## ld1 {v6.16b},[x5] +## sub w6,w6,#2 +## ld1 {v1.16b},[x4],#16 +## +## .Loop_enc_iv_enc: +## aese v6.16b,v0.16b +## aesmc v6.16b,v6.16b +## ld1 {v0.4s},[x4],#16 +## subs w6,w6,#2 +## aese v6.16b,v1.16b +## aesmc v6.16b,v6.16b +## ld1 {v1.4s},[x4],#16 +## b.gt .Loop_enc_iv_enc +## +## aese v6.16b,v0.16b +## aesmc v6.16b,v6.16b +## ld1 {v0.4s},[x4] +## aese v6.16b,v1.16b +## eor v6.16b,v6.16b,v0.16b +## +## ld1 {v0.16b},[x0] +## eor v0.16b,v6.16b,v0.16b +## +## ldr w6,[x3,#240] +## ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... +## +## aese v0.16b,v28.16b +## aesmc v0.16b,v0.16b +## ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... +## aese v0.16b,v29.16b +## aesmc v0.16b,v0.16b +## subs w6,w6,#10 // if rounds==10, jump to aes-128-xts processing +## b.eq .Lxts_128_enc +## .Lxts_enc_round_loop: +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## ld1 {v16.4s},[x3],#16 // load key schedule... +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## ld1 {v17.4s},[x3],#16 // load key schedule... +## subs w6,w6,#2 // bias +## b.gt .Lxts_enc_round_loop +## .Lxts_128_enc: +## ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... +## aese v0.16b,v18.16b +## aesmc v0.16b,v0.16b +## aese v0.16b,v19.16b +## aesmc v0.16b,v0.16b +## ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... +## aese v0.16b,v20.16b +## aesmc v0.16b,v0.16b +## aese v0.16b,v21.16b +## aesmc v0.16b,v0.16b +## ld1 {v7.4s},[x3] +## aese v0.16b,v22.16b +## aesmc v0.16b,v0.16b +## aese v0.16b,v23.16b +## eor v0.16b,v0.16b,v7.16b +## eor v0.16b,v0.16b,v6.16b +## st1 {v0.16b},[x1] +## b .Lxts_enc_final_abort +## +## .align 4 +## .Lxts_enc_big_size: +## // Encrypt input size > 16 bytes +## stp x19,x20,[sp,#-64]! +## stp x21,x22,[sp,#48] +## stp d8,d9,[sp,#32] +## stp d10,d11,[sp,#16] +## +## // tailcnt store the tail value of length%16. +## and x21,x2,#0xf +## and x2,x2,#-16 // len &= 0x1..110000, now divisible by 16 +## subs x2,x2,#16 +## mov x8,#16 +## b.lo .Lxts_abort // if !(len > 16): error +## csel x8,xzr,x8,eq // if (len == 16): step = 0 +## +## // Firstly, encrypt the iv with key2, as the first iv of XEX. +## ldr w6,[x4,#240] +## ld1 {v0.4s},[x4],#16 +## ld1 {v6.16b},[x5] +## sub w6,w6,#2 +## ld1 {v1.4s},[x4],#16 +## +## .Loop_iv_enc: +## aese v6.16b,v0.16b +## aesmc v6.16b,v6.16b +## ld1 {v0.4s},[x4],#16 +## subs w6,w6,#2 +## aese v6.16b,v1.16b +## aesmc v6.16b,v6.16b +## ld1 {v1.4s},[x4],#16 +## b.gt .Loop_iv_enc +## +## aese v6.16b,v0.16b +## aesmc v6.16b,v6.16b +## ld1 {v0.4s},[x4] +## aese v6.16b,v1.16b +## eor v6.16b,v6.16b,v0.16b +## +## // The iv for second block +## // x9- iv(low), x10 - iv(high) +## // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b +## fmov x9,d6 +## fmov x10,v6.d[1] +## mov w19,#0x87 +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d8,x9 +## fmov v8.d[1],x10 +## +## ldr w5,[x3,#240] // next starting point +## ld1 {v0.16b},[x0],x8 +## +## ld1 {v16.4s,v17.4s},[x3] // load key schedule... +## sub w5,w5,#6 +## add x7,x3,x5,lsl#4 // pointer to last 7 round keys +## sub w5,w5,#2 +## ld1 {v18.4s,v19.4s},[x7],#32 +## ld1 {v20.4s,v21.4s},[x7],#32 +## ld1 {v22.4s,v23.4s},[x7],#32 +## ld1 {v7.4s},[x7] +## +## add x7,x3,#32 +## mov w6,w5 +## +## // Encryption +## .Lxts_enc: +## ld1 {v24.16b},[x0],#16 +## subs x2,x2,#32 // bias +## add w6,w5,#2 +## orr v3.16b,v0.16b,v0.16b +## orr v1.16b,v0.16b,v0.16b +## orr v28.16b,v0.16b,v0.16b +## orr v27.16b,v24.16b,v24.16b +## orr v29.16b,v24.16b,v24.16b +## b.lo .Lxts_inner_enc_tail // when input size % 5 = 1 or 2 +## // (with tail or not) +## eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv +## eor v24.16b,v24.16b,v8.16b +## +## // The iv for third block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d9,x9 +## fmov v9.d[1],x10 +## +## +## orr v1.16b,v24.16b,v24.16b +## ld1 {v24.16b},[x0],#16 +## orr v2.16b,v0.16b,v0.16b +## orr v3.16b,v1.16b,v1.16b +## eor v27.16b,v24.16b,v9.16b // the third block +## eor v24.16b,v24.16b,v9.16b +## cmp x2,#32 +## b.lo .Lxts_outer_enc_tail +## +## // The iv for fourth block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d10,x9 +## fmov v10.d[1],x10 +## +## ld1 {v25.16b},[x0],#16 +## // The iv for fifth block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d11,x9 +## fmov v11.d[1],x10 +## +## ld1 {v26.16b},[x0],#16 +## eor v25.16b,v25.16b,v10.16b // the fourth block +## eor v26.16b,v26.16b,v11.16b +## sub x2,x2,#32 // bias +## mov w6,w5 +## b .Loop5x_xts_enc +## +## .align 4 +## .Loop5x_xts_enc: +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v16.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v16.16b +## aesmc v26.16b,v26.16b +## ld1 {v16.4s},[x7],#16 +## subs w6,w6,#2 +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v17.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v17.16b +## aesmc v26.16b,v26.16b +## ld1 {v17.4s},[x7],#16 +## b.gt .Loop5x_xts_enc +## +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v16.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v16.16b +## aesmc v26.16b,v26.16b +## subs x2,x2,#0x50 // because .Lxts_enc_tail4x +## +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v17.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v17.16b +## aesmc v26.16b,v26.16b +## csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo +## mov x7,x3 +## +## aese v0.16b,v18.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v18.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v18.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v18.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v18.16b +## aesmc v26.16b,v26.16b +## add x0,x0,x6 // x0 is adjusted in such way that +## // at exit from the loop v1.16b-v26.16b +## // are loaded with last "words" +## add x6,x2,#0x60 // because .Lxts_enc_tail4x +## +## aese v0.16b,v19.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v19.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v19.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v19.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v19.16b +## aesmc v26.16b,v26.16b +## +## aese v0.16b,v20.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v20.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v20.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v20.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v20.16b +## aesmc v26.16b,v26.16b +## +## aese v0.16b,v21.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v21.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v21.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v21.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v21.16b +## aesmc v26.16b,v26.16b +## +## aese v0.16b,v22.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v22.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v22.16b +## aesmc v24.16b,v24.16b +## aese v25.16b,v22.16b +## aesmc v25.16b,v25.16b +## aese v26.16b,v22.16b +## aesmc v26.16b,v26.16b +## +## eor v4.16b,v7.16b,v6.16b +## aese v0.16b,v23.16b +## // The iv for first block of one iteration +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d6,x9 +## fmov v6.d[1],x10 +## eor v5.16b,v7.16b,v8.16b +## ld1 {v2.16b},[x0],#16 +## aese v1.16b,v23.16b +## // The iv for second block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d8,x9 +## fmov v8.d[1],x10 +## eor v17.16b,v7.16b,v9.16b +## ld1 {v3.16b},[x0],#16 +## aese v24.16b,v23.16b +## // The iv for third block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d9,x9 +## fmov v9.d[1],x10 +## eor v30.16b,v7.16b,v10.16b +## ld1 {v27.16b},[x0],#16 +## aese v25.16b,v23.16b +## // The iv for fourth block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d10,x9 +## fmov v10.d[1],x10 +## eor v31.16b,v7.16b,v11.16b +## ld1 {v28.16b},[x0],#16 +## aese v26.16b,v23.16b +## +## // The iv for fifth block +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr #31 +## eor x9,x11,x9,lsl #1 +## fmov d11,x9 +## fmov v11.d[1],x10 +## +## ld1 {v29.16b},[x0],#16 +## cbz x6,.Lxts_enc_tail4x +## ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] +## eor v4.16b,v4.16b,v0.16b +## eor v0.16b,v2.16b,v6.16b +## eor v5.16b,v5.16b,v1.16b +## eor v1.16b,v3.16b,v8.16b +## eor v17.16b,v17.16b,v24.16b +## eor v24.16b,v27.16b,v9.16b +## eor v30.16b,v30.16b,v25.16b +## eor v25.16b,v28.16b,v10.16b +## eor v31.16b,v31.16b,v26.16b +## st1 {v4.16b},[x1],#16 +## eor v26.16b,v29.16b,v11.16b +## st1 {v5.16b},[x1],#16 +## mov w6,w5 +## st1 {v17.16b},[x1],#16 +## ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] +## st1 {v30.16b},[x1],#16 +## st1 {v31.16b},[x1],#16 +## b.hs .Loop5x_xts_enc +## +## +## // If left 4 blocks, borrow the five block's processing. +## // This means if (x2 + 1 block) == 0, which is the case +## // when input size % 5 = 4, continue processing and do +## // another iteration in Loop5x_xts_enc which will exit from +## // cbz x6,.Lxts_enc_tail4x. +## // Otherwise, this is the end of the loop continue processing +## // 0, 1, 2 or 3 blocks (with or without tail) starting at +## // Loop5x_enc_after +## cmn x2,#0x10 +## b.ne .Loop5x_enc_after +## orr v11.16b,v10.16b,v10.16b +## orr v10.16b,v9.16b,v9.16b +## orr v9.16b,v8.16b,v8.16b +## orr v8.16b,v6.16b,v6.16b +## fmov x9,d11 +## fmov x10,v11.d[1] +## eor v0.16b,v6.16b,v2.16b +## eor v1.16b,v8.16b,v3.16b +## eor v24.16b,v27.16b,v9.16b +## eor v25.16b,v28.16b,v10.16b +## eor v26.16b,v29.16b,v11.16b +## b.eq .Loop5x_xts_enc +## +## .Loop5x_enc_after: +## add x2,x2,#0x50 +## cbz x2,.Lxts_enc_done // no blocks left +## +## add w6,w5,#2 +## subs x2,x2,#0x30 +## b.lo .Lxts_inner_enc_tail // 1 or 2 blocks left +## // (with tail or not) +## +## eor v0.16b,v6.16b,v27.16b // 3 blocks left +## eor v1.16b,v8.16b,v28.16b +## eor v24.16b,v29.16b,v9.16b +## b .Lxts_outer_enc_tail +## +## .align 4 +## .Lxts_enc_tail4x: +## add x0,x0,#16 +## eor v5.16b,v1.16b,v5.16b +## st1 {v5.16b},[x1],#16 +## eor v17.16b,v24.16b,v17.16b +## st1 {v17.16b},[x1],#16 +## eor v30.16b,v25.16b,v30.16b +## eor v31.16b,v26.16b,v31.16b +## st1 {v30.16b,v31.16b},[x1],#32 +## b .Lxts_enc_done +## .align 4 +## .Lxts_outer_enc_tail: +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## ld1 {v16.4s},[x7],#16 +## subs w6,w6,#2 +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## ld1 {v17.4s},[x7],#16 +## b.gt .Lxts_outer_enc_tail +## +## aese v0.16b,v16.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## eor v4.16b,v6.16b,v7.16b +## subs x2,x2,#0x30 +## // The iv for first block +## fmov x9,d9 +## fmov x10,v9.d[1] +## //mov w19,#0x87 +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr#31 +## eor x9,x11,x9,lsl#1 +## fmov d6,x9 +## fmov v6.d[1],x10 +## eor v5.16b,v8.16b,v7.16b +## csel x6,x2,x6,lo // x6, w6, is zero at this point +## aese v0.16b,v17.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## eor v17.16b,v9.16b,v7.16b +## +## add x6,x6,#0x20 +## add x0,x0,x6 +## mov x7,x3 +## +## aese v0.16b,v20.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v20.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v20.16b +## aesmc v24.16b,v24.16b +## aese v0.16b,v21.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v21.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v21.16b +## aesmc v24.16b,v24.16b +## aese v0.16b,v22.16b +## aesmc v0.16b,v0.16b +## aese v1.16b,v22.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v22.16b +## aesmc v24.16b,v24.16b +## aese v0.16b,v23.16b +## aese v1.16b,v23.16b +## aese v24.16b,v23.16b +## ld1 {v27.16b},[x0],#16 +## add w6,w5,#2 +## ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] +## eor v4.16b,v4.16b,v0.16b +## eor v5.16b,v5.16b,v1.16b +## eor v24.16b,v24.16b,v17.16b +## ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] +## st1 {v4.16b},[x1],#16 +## st1 {v5.16b},[x1],#16 +## st1 {v24.16b},[x1],#16 +## cmn x2,#0x30 +## b.eq .Lxts_enc_done +## .Lxts_encxor_one: +## orr v28.16b,v3.16b,v3.16b +## orr v29.16b,v27.16b,v27.16b +## nop +## +## .Lxts_inner_enc_tail: +## cmn x2,#0x10 +## eor v1.16b,v28.16b,v6.16b +## eor v24.16b,v29.16b,v8.16b +## b.eq .Lxts_enc_tail_loop +## eor v24.16b,v29.16b,v6.16b +## .Lxts_enc_tail_loop: +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## ld1 {v16.4s},[x7],#16 +## subs w6,w6,#2 +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## ld1 {v17.4s},[x7],#16 +## b.gt .Lxts_enc_tail_loop +## +## aese v1.16b,v16.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v16.16b +## aesmc v24.16b,v24.16b +## aese v1.16b,v17.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v17.16b +## aesmc v24.16b,v24.16b +## aese v1.16b,v20.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v20.16b +## aesmc v24.16b,v24.16b +## cmn x2,#0x20 +## aese v1.16b,v21.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v21.16b +## aesmc v24.16b,v24.16b +## eor v5.16b,v6.16b,v7.16b +## aese v1.16b,v22.16b +## aesmc v1.16b,v1.16b +## aese v24.16b,v22.16b +## aesmc v24.16b,v24.16b +## eor v17.16b,v8.16b,v7.16b +## aese v1.16b,v23.16b +## aese v24.16b,v23.16b +## b.eq .Lxts_enc_one +## eor v5.16b,v5.16b,v1.16b +## st1 {v5.16b},[x1],#16 +## eor v17.16b,v17.16b,v24.16b +## orr v6.16b,v8.16b,v8.16b +## st1 {v17.16b},[x1],#16 +## fmov x9,d8 +## fmov x10,v8.d[1] +## mov w19,#0x87 +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr #31 +## eor x9,x11,x9,lsl #1 +## fmov d6,x9 +## fmov v6.d[1],x10 +## b .Lxts_enc_done +## +## .Lxts_enc_one: +## eor v5.16b,v5.16b,v24.16b +## orr v6.16b,v6.16b,v6.16b +## st1 {v5.16b},[x1],#16 +## fmov x9,d6 +## fmov x10,v6.d[1] +## mov w19,#0x87 +## extr x22,x10,x10,#32 +## extr x10,x10,x9,#63 +## and w11,w19,w22,asr #31 +## eor x9,x11,x9,lsl #1 +## fmov d6,x9 +## fmov v6.d[1],x10 +## b .Lxts_enc_done +## .align 5 +## .Lxts_enc_done: +## // Process the tail block with cipher stealing. +## tst x21,#0xf +## b.eq .Lxts_abort +## +## mov x20,x0 +## mov x13,x1 +## sub x1,x1,#16 +## .composite_enc_loop: +## subs x21,x21,#1 +## ldrb w15,[x1,x21] +## ldrb w14,[x20,x21] +## strb w15,[x13,x21] +## strb w14,[x1,x21] +## b.gt .composite_enc_loop +## .Lxts_enc_load_done: +## ld1 {v26.16b},[x1] +## eor v26.16b,v26.16b,v6.16b +## +## // Encrypt the composite block to get the last second encrypted text block +## ldr w6,[x3,#240] // load key schedule... +## ld1 {v0.16b},[x3],#16 +## sub w6,w6,#2 +## ld1 {v1.16b},[x3],#16 // load key schedule... +## .Loop_final_enc: +## aese v26.16b,v0.16b +## aesmc v26.16b,v26.16b +## ld1 {v0.4s},[x3],#16 +## subs w6,w6,#2 +## aese v26.16b,v1.16b +## aesmc v26.16b,v26.16b +## ld1 {v1.4s},[x3],#16 +## b.gt .Loop_final_enc +## +## aese v26.16b,v0.16b +## aesmc v26.16b,v26.16b +## ld1 {v0.4s},[x3] +## aese v26.16b,v1.16b +## eor v26.16b,v26.16b,v0.16b +## eor v26.16b,v26.16b,v6.16b +## st1 {v26.16b},[x1] +## +## .Lxts_abort: +## ldp x21,x22,[sp,#48] +## ldp d8,d9,[sp,#32] +## ldp d10,d11,[sp,#16] +## ldp x19,x20,[sp],#64 +## .Lxts_enc_final_abort: +## ret +## + # Decrypt is taken from # https://github.com/aws/aws-lc/blob/804a11b6f965365156b0a8b6d958233e1372a2e2/generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S#L1475 diff --git a/arm/aes-xts/aes-xts-armv8.txt b/arm/aes-xts/aes-xts-armv8.txt index 191dcb8fb..78438bffb 100644 --- a/arm/aes-xts/aes-xts-armv8.txt +++ b/arm/aes-xts/aes-xts-armv8.txt @@ -22,14 +22,13 @@ 0x4c407000; (* arm_LDR Q0 X0 No_Offset *) 0x6e201cc0; (* arm_EOR_VEC Q0 Q6 Q0 128 *) 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) - 0x4cdfa87c; (* arm_ldstp_2q true (word 28) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa87c; (* arm_LDP Q28 Q29 X3 (Postimmediate_Offset (word 32)) *) 0x4e284b80; (* arm_AESE Q0 Q28 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) - 0x4cdfa870; (* arm_ldstp_2q true (word 16) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa870; (* arm_LDP Q16 Q17 X3 (Postimmediate_Offset (word 32)) *) 0x4e284ba0; (* arm_AESE Q0 Q29 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) 0x710028c6; (* arm_SUBS W6 W6 (rvalue (word 10)) *) - 0x54000120; (* arm_BEQ (word 36) *) 0x4e284a00; (* arm_AESE Q0 Q16 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) 0x4cdf7870; (* arm_LDR Q16 X3 (Postimmediate_Offset (word 16)) *) @@ -38,17 +37,17 @@ 0x4cdf7871; (* arm_LDR Q17 X3 (Postimmediate_Offset (word 16)) *) 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) 0x54ffff2c; (* arm_BGT (word 2097124) *) - 0x4cdfa872; (* arm_ldstp_2q true (word 18) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa872; (* arm_LDP Q18 Q19 X3 (Postimmediate_Offset (word 32)) *) 0x4e284a00; (* arm_AESE Q0 Q16 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) 0x4e284a20; (* arm_AESE Q0 Q17 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) - 0x4cdfa874; (* arm_ldstp_2q true (word 20) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa874; (* arm_LDP Q20 Q21 X3 (Postimmediate_Offset (word 32)) *) 0x4e284a40; (* arm_AESE Q0 Q18 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) 0x4e284a60; (* arm_AESE Q0 Q19 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) - 0x4cdfa876; (* arm_ldstp_2q true (word 22) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa876; (* arm_LDP Q22 Q23 X3 (Postimmediate_Offset (word 32)) *) 0x4e284a80; (* arm_AESE Q0 Q20 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) 0x4e284aa0; (* arm_AESE Q0 Q21 *) @@ -60,7 +59,8 @@ 0x6e271c00; (* arm_EOR_VEC Q0 Q0 Q7 128 *) 0x6e261c00; (* arm_EOR_VEC Q0 Q0 Q6 128 *) 0x4c007020; (* arm_STR Q0 X1 No_Offset *) - 0x140001e7; (* arm_B (word 1948) *) + 0x140001e8; (* arm_B (word 1952) *) + 0xd503201f; (* arm_NOP *) 0xd503201f; (* arm_NOP *) 0xd503201f; (* arm_NOP *) 0xa9bc53f3; (* arm_STP X19 X20 SP (Preimmediate_Offset (iword (-- &64))) *) @@ -102,14 +102,14 @@ 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) 0xb940f065; (* arm_LDR W5 X3 (Immediate_Offset (word 240)) *) 0x4cc87000; (* arm_LDR Q0 X0 (Postreg_Offset X8) *) - 0x4c40a870; (* arm_ldstp_2q true (word 16) X3 No_Offset *) + 0x4c40a870; (* arm_LDP Q16 Q17 X3 No_Offset *) 0x510018a5; (* arm_SUB W5 W5 (rvalue (word 6)) *) 0x8b051067; (* arm_ADD X7 X3 (Shiftedreg X5 LSL 4) *) - 0x510008a5; (* arm_SUB W5 W5 (rvalue (word 2)) *) - 0x4cdfa8f2; (* arm_ldstp_2q true (word 18) X7 (Postimmediate_Offset (word 32)) *) - 0x4cdfa8f4; (* arm_ldstp_2q true (word 20) X7 (Postimmediate_Offset (word 32)) *) - 0x4cdfa8f6; (* arm_ldstp_2q true (word 22) X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f2; (* arm_LDP Q18 Q19 X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f4; (* arm_LDP Q20 Q21 X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f6; (* arm_LDP Q22 Q23 X7 (Postimmediate_Offset (word 32)) *) 0x4c4078e7; (* arm_LDR Q7 X7 No_Offset *) + 0x510008a5; (* arm_SUB W5 W5 (rvalue (word 2)) *) 0x91008067; (* arm_ADD X7 X3 (rvalue (word 32)) *) 0x2a0503e6; (* arm_MOV W6 W5 *) 0x4cdf7018; (* arm_LDR Q24 X0 (Postimmediate_Offset (word 16)) *) @@ -120,7 +120,7 @@ 0x4ea01c1c; (* arm_MOV_VEC Q28 Q0 128 *) 0x4eb81f1b; (* arm_MOV_VEC Q27 Q24 128 *) 0x4eb81f1d; (* arm_MOV_VEC Q29 Q24 128 *) - 0x540027e3; (* arm_BCC (word 1276) *) + 0x54002723; (* arm_BCC (word 1252) *) 0x6e261c00; (* arm_EOR_VEC Q0 Q0 Q6 128 *) 0x6e281f18; (* arm_EOR_VEC Q24 Q24 Q8 128 *) 0x93ca8156; (* arm_ROR X22 X10 32 *) @@ -136,7 +136,7 @@ 0x6e291f1b; (* arm_EOR_VEC Q27 Q24 Q9 128 *) 0x6e291f18; (* arm_EOR_VEC Q24 Q24 Q9 128 *) 0xf100805f; (* arm_CMP X2 (rvalue (word 32)) *) - 0x54001be3; (* arm_BCC (word 892) *) + 0x54001b63; (* arm_BCC (word 876) *) 0x93ca8156; (* arm_ROR X22 X10 32 *) 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) @@ -155,10 +155,6 @@ 0x6e2b1f5a; (* arm_EOR_VEC Q26 Q26 Q11 128 *) 0xd1008042; (* arm_SUB X2 X2 (rvalue (word 32)) *) 0x2a0503e6; (* arm_MOV W6 W5 *) - 0x14000004; (* arm_B (word 16) *) - 0xd503201f; (* arm_NOP *) - 0xd503201f; (* arm_NOP *) - 0xd503201f; (* arm_NOP *) 0x4e284a00; (* arm_AESE Q0 Q16 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) 0x4e284a01; (* arm_AESE Q1 Q16 *) @@ -258,8 +254,12 @@ 0x4e286b39; (* arm_AESMC Q25 Q25 *) 0x4e284ada; (* arm_AESE Q26 Q22 *) 0x4e286b5a; (* arm_AESMC Q26 Q26 *) - 0x6e261ce4; (* arm_EOR_VEC Q4 Q7 Q6 128 *) 0x4e284ae0; (* arm_AESE Q0 Q23 *) + 0x4e284ae1; (* arm_AESE Q1 Q23 *) + 0x4e284af8; (* arm_AESE Q24 Q23 *) + 0x4e284af9; (* arm_AESE Q25 Q23 *) + 0x4e284afa; (* arm_AESE Q26 Q23 *) + 0x6e261ce4; (* arm_EOR_VEC Q4 Q7 Q6 128 *) 0x93ca8156; (* arm_ROR X22 X10 32 *) 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) @@ -268,7 +268,6 @@ 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) 0x6e281ce5; (* arm_EOR_VEC Q5 Q7 Q8 128 *) 0x4cdf7002; (* arm_LDR Q2 X0 (Postimmediate_Offset (word 16)) *) - 0x4e284ae1; (* arm_AESE Q1 Q23 *) 0x93ca8156; (* arm_ROR X22 X10 32 *) 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) @@ -277,7 +276,6 @@ 0x9eaf0148; (* arm_FMOV_ItoF Q8 X10 1 *) 0x6e291cf1; (* arm_EOR_VEC Q17 Q7 Q9 128 *) 0x4cdf7003; (* arm_LDR Q3 X0 (Postimmediate_Offset (word 16)) *) - 0x4e284af8; (* arm_AESE Q24 Q23 *) 0x93ca8156; (* arm_ROR X22 X10 32 *) 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) @@ -286,7 +284,6 @@ 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) 0x6e2a1cfe; (* arm_EOR_VEC Q30 Q7 Q10 128 *) 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) - 0x4e284af9; (* arm_AESE Q25 Q23 *) 0x93ca8156; (* arm_ROR X22 X10 32 *) 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) @@ -295,7 +292,6 @@ 0x9eaf014a; (* arm_FMOV_ItoF Q10 X10 1 *) 0x6e2b1cff; (* arm_EOR_VEC Q31 Q7 Q11 128 *) 0x4cdf701c; (* arm_LDR Q28 X0 (Postimmediate_Offset (word 16)) *) - 0x4e284afa; (* arm_AESE Q26 Q23 *) 0x93ca8156; (* arm_ROR X22 X10 32 *) 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) @@ -304,7 +300,6 @@ 0x9eaf014b; (* arm_FMOV_ItoF Q11 X10 1 *) 0x4cdf701d; (* arm_LDR Q29 X0 (Postimmediate_Offset (word 16)) *) 0xb4000586; (* arm_CBZ X6 (word 176) *) - 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) 0x6e261c40; (* arm_EOR_VEC Q0 Q2 Q6 128 *) 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) @@ -314,15 +309,14 @@ 0x6e391fde; (* arm_EOR_VEC Q30 Q30 Q25 128 *) 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) 0x6e3a1fff; (* arm_EOR_VEC Q31 Q31 Q26 128 *) - 0x4c9f7024; (* arm_STR Q4 X1 (Postimmediate_Offset (word 16)) *) 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) - 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) 0x2a0503e6; (* arm_MOV W6 W5 *) + 0x4c9fa024; (* arm_STP Q4 Q5 X1 (Postimmediate_Offset (word 32)) *) 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) + 0x4c9fa03e; (* arm_STP Q30 Q31 X1 (Postimmediate_Offset (word 32)) *) + 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) - 0x4c9f703e; (* arm_STR Q30 X1 (Postimmediate_Offset (word 16)) *) - 0x4c9f703f; (* arm_STR Q31 X1 (Postimmediate_Offset (word 16)) *) - 0x54ffeba2; (* arm_BCS (word 2096500) *) + 0x54ffebe2; (* arm_BCS (word 2096508) *) 0xb100405f; (* arm_CMN X2 (rvalue (word 16)) *) 0x540001a1; (* arm_BNE (word 52) *) 0x4eaa1d4b; (* arm_MOV_VEC Q11 Q10 128 *) @@ -336,16 +330,18 @@ 0x6e291f78; (* arm_EOR_VEC Q24 Q27 Q9 128 *) 0x6e2a1f99; (* arm_EOR_VEC Q25 Q28 Q10 128 *) 0x6e2b1fba; (* arm_EOR_VEC Q26 Q29 Q11 128 *) - 0x54ffe9e0; (* arm_BEQ (word 2096444) *) + 0x54ffea20; (* arm_BEQ (word 2096452) *) 0x91014042; (* arm_ADD X2 X2 (rvalue (word 80)) *) - 0xb40015a2; (* arm_CBZ X2 (word 692) *) + 0xb4001662; (* arm_CBZ X2 (word 716) *) 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) 0x54000c43; (* arm_BCC (word 392) *) 0x6e3b1cc0; (* arm_EOR_VEC Q0 Q6 Q27 128 *) 0x6e3c1d01; (* arm_EOR_VEC Q1 Q8 Q28 128 *) 0x6e291fb8; (* arm_EOR_VEC Q24 Q29 Q9 128 *) - 0x1400000e; (* arm_B (word 56) *) + 0x14000010; (* arm_B (word 64) *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) 0xd503201f; (* arm_NOP *) 0x91004000; (* arm_ADD X0 X0 (rvalue (word 16)) *) 0x6e251c25; (* arm_EOR_VEC Q5 Q1 Q5 128 *) @@ -354,8 +350,8 @@ 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) 0x6e3e1f3e; (* arm_EOR_VEC Q30 Q25 Q30 128 *) 0x6e3f1f5f; (* arm_EOR_VEC Q31 Q26 Q31 128 *) - 0x4c9fa03e; (* arm_ldstp_2q false (word 30) X1 (Postimmediate_Offset (word 32)) *) - 0x1400009c; (* arm_B (word 624) *) + 0x4c9fa03e; (* arm_STP Q30 Q31 X1 (Postimmediate_Offset (word 32)) *) + 0x140000a0; (* arm_B (word 640) *) 0xd503201f; (* arm_NOP *) 0xd503201f; (* arm_NOP *) 0xd503201f; (* arm_NOP *) @@ -382,27 +378,12 @@ 0x4e284a18; (* arm_AESE Q24 Q16 *) 0x4e286b18; (* arm_AESMC Q24 Q24 *) 0x6e271cc4; (* arm_EOR_VEC Q4 Q6 Q7 128 *) - 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) - 0x9e660129; (* arm_FMOV_FtoI X9 Q9 0 *) - 0x9eae012a; (* arm_FMOV_FtoI X10 Q9 1 *) - 0x93ca8156; (* arm_ROR X22 X10 32 *) - 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) - 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) - 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) - 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) - 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) - 0x6e271d05; (* arm_EOR_VEC Q5 Q8 Q7 128 *) - 0x9a863046; (* arm_CSEL X6 X2 X6 Condition_CC *) 0x4e284a20; (* arm_AESE Q0 Q17 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) 0x4e284a21; (* arm_AESE Q1 Q17 *) 0x4e286821; (* arm_AESMC Q1 Q1 *) 0x4e284a38; (* arm_AESE Q24 Q17 *) 0x4e286b18; (* arm_AESMC Q24 Q24 *) - 0x6e271d31; (* arm_EOR_VEC Q17 Q9 Q7 128 *) - 0x910080c6; (* arm_ADD X6 X6 (rvalue (word 32)) *) - 0x8b060000; (* arm_ADD X0 X0 X6 *) - 0xaa0303e7; (* arm_MOV X7 X3 *) 0x4e284a80; (* arm_AESE Q0 Q20 *) 0x4e286800; (* arm_AESMC Q0 Q0 *) 0x4e284a81; (* arm_AESE Q1 Q20 *) @@ -424,18 +405,31 @@ 0x4e284ae0; (* arm_AESE Q0 Q23 *) 0x4e284ae1; (* arm_AESE Q1 Q23 *) 0x4e284af8; (* arm_AESE Q24 Q23 *) + 0x6e271d31; (* arm_EOR_VEC Q17 Q9 Q7 128 *) + 0xf100c042; (* arm_SUBS X2 X2 (rvalue (word 48)) *) + 0x9e660129; (* arm_FMOV_FtoI X9 Q9 0 *) + 0x9eae012a; (* arm_FMOV_FtoI X10 Q9 1 *) + 0x93ca8156; (* arm_ROR X22 X10 32 *) + 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) + 0x0a967e6b; (* arm_AND W11 W19 (Shiftedreg W22 ASR 31) *) + 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) + 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) + 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) + 0x6e271d05; (* arm_EOR_VEC Q5 Q8 Q7 128 *) + 0x9a863046; (* arm_CSEL X6 X2 X6 Condition_CC *) + 0x910080c6; (* arm_ADD X6 X6 (rvalue (word 32)) *) + 0x8b060000; (* arm_ADD X0 X0 X6 *) + 0xaa0303e7; (* arm_MOV X7 X3 *) 0x4cdf701b; (* arm_LDR Q27 X0 (Postimmediate_Offset (word 16)) *) 0x110008a6; (* arm_ADD W6 W5 (rvalue (word 2)) *) - 0x4cdf78f0; (* arm_LDR Q16 X7 (Postimmediate_Offset (word 16)) *) 0x6e201c84; (* arm_EOR_VEC Q4 Q4 Q0 128 *) 0x6e211ca5; (* arm_EOR_VEC Q5 Q5 Q1 128 *) 0x6e311f18; (* arm_EOR_VEC Q24 Q24 Q17 128 *) - 0x4cdf78f1; (* arm_LDR Q17 X7 (Postimmediate_Offset (word 16)) *) - 0x4c9f7024; (* arm_STR Q4 X1 (Postimmediate_Offset (word 16)) *) - 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) + 0x4cdfa8f0; (* arm_LDP Q16 Q17 X7 (Postimmediate_Offset (word 32)) *) + 0x4c9fa024; (* arm_STP Q4 Q5 X1 (Postimmediate_Offset (word 32)) *) 0x4c9f7038; (* arm_STR Q24 X1 (Postimmediate_Offset (word 16)) *) 0xb100c05f; (* arm_CMN X2 (rvalue (word 48)) *) - 0x54000980; (* arm_BEQ (word 304) *) + 0x54000a40; (* arm_BEQ (word 328) *) 0x4ea31c7c; (* arm_MOV_VEC Q28 Q3 128 *) 0x4ebb1f7d; (* arm_MOV_VEC Q29 Q27 128 *) 0xd503201f; (* arm_NOP *) @@ -496,7 +490,7 @@ 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) - 0x1400000f; (* arm_B (word 60) *) + 0x14000015; (* arm_B (word 84) *) 0x6e381ca5; (* arm_EOR_VEC Q5 Q5 Q24 128 *) 0x4ea61cc6; (* arm_MOV_VEC Q6 Q6 128 *) 0x4c9f7025; (* arm_STR Q5 X1 (Postimmediate_Offset (word 16)) *) @@ -509,7 +503,13 @@ 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) 0x9e670126; (* arm_FMOV_ItoF Q6 X9 0 *) 0x9eaf0146; (* arm_FMOV_ItoF Q6 X10 1 *) - 0x14000002; (* arm_B (word 8) *) + 0x14000008; (* arm_B (word 32) *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) + 0xd503201f; (* arm_NOP *) 0xd503201f; (* arm_NOP *) 0xf2400ebf; (* arm_TST X21 (rvalue (word 15)) *) 0x540003e0; (* arm_BEQ (word 124) *) @@ -571,10 +571,10 @@ 0x4c407000; (* arm_LDR Q0 X0 No_Offset *) 0x6e201cc0; (* arm_EOR_VEC Q0 Q6 Q0 128 *) 0xb940f066; (* arm_LDR W6 X3 (Immediate_Offset (word 240)) *) - 0x4cdfa87c; (* arm_ldstp_2q true (word 28) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa87c; (* arm_LDP Q28 Q29 X3 (Postimmediate_Offset (word 32)) *) 0x4e285b80; (* arm_AESD Q0 Q28 *) 0x4e287800; (* arm_AESIMC Q0 Q0 *) - 0x4cdfa870; (* arm_ldstp_2q true (word 16) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa870; (* arm_LDP Q16 Q17 X3 (Postimmediate_Offset (word 32)) *) 0x4e285ba0; (* arm_AESD Q0 Q29 *) 0x4e287800; (* arm_AESIMC Q0 Q0 *) 0x710028c6; (* arm_SUBS W6 W6 (rvalue (word 10)) *) @@ -587,17 +587,17 @@ 0x4cdf7871; (* arm_LDR Q17 X3 (Postimmediate_Offset (word 16)) *) 0x710008c6; (* arm_SUBS W6 W6 (rvalue (word 2)) *) 0x54ffff2c; (* arm_BGT (word 2097124) *) - 0x4cdfa872; (* arm_ldstp_2q true (word 18) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa872; (* arm_LDP Q18 Q19 X3 (Postimmediate_Offset (word 32)) *) 0x4e285a00; (* arm_AESD Q0 Q16 *) 0x4e287800; (* arm_AESIMC Q0 Q0 *) 0x4e285a20; (* arm_AESD Q0 Q17 *) 0x4e287800; (* arm_AESIMC Q0 Q0 *) - 0x4cdfa874; (* arm_ldstp_2q true (word 20) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa874; (* arm_LDP Q20 Q21 X3 (Postimmediate_Offset (word 32)) *) 0x4e285a40; (* arm_AESD Q0 Q18 *) 0x4e287800; (* arm_AESIMC Q0 Q0 *) 0x4e285a60; (* arm_AESD Q0 Q19 *) 0x4e287800; (* arm_AESIMC Q0 Q0 *) - 0x4cdfa876; (* arm_ldstp_2q true (word 22) X3 (Postimmediate_Offset (word 32)) *) + 0x4cdfa876; (* arm_LDP Q22 Q23 X3 (Postimmediate_Offset (word 32)) *) 0x4e285a80; (* arm_AESD Q0 Q20 *) 0x4e287800; (* arm_AESIMC Q0 Q0 *) 0x4e285aa0; (* arm_AESD Q0 Q21 *) @@ -653,13 +653,13 @@ 0xca090569; (* arm_EOR X9 X11 (Shiftedreg X9 LSL 1) *) 0x9e670129; (* arm_FMOV_ItoF Q9 X9 0 *) 0x9eaf0149; (* arm_FMOV_ItoF Q9 X10 1 *) - 0x4c40a870; (* arm_ldstp_2q true (word 16) X3 No_Offset *) + 0x4c40a870; (* arm_LDP Q16 Q17 X3 No_Offset *) 0x510018a5; (* arm_SUB W5 W5 (rvalue (word 6)) *) 0x8b051067; (* arm_ADD X7 X3 (Shiftedreg X5 LSL 4) *) 0x510008a5; (* arm_SUB W5 W5 (rvalue (word 2)) *) - 0x4cdfa8f2; (* arm_ldstp_2q true (word 18) X7 (Postimmediate_Offset (word 32)) *) - 0x4cdfa8f4; (* arm_ldstp_2q true (word 20) X7 (Postimmediate_Offset (word 32)) *) - 0x4cdfa8f6; (* arm_ldstp_2q true (word 22) X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f2; (* arm_LDP Q18 Q19 X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f4; (* arm_LDP Q20 Q21 X7 (Postimmediate_Offset (word 32)) *) + 0x4cdfa8f6; (* arm_LDP Q22 Q23 X7 (Postimmediate_Offset (word 32)) *) 0x4c4078e7; (* arm_LDR Q7 X7 No_Offset *) 0x93ca8156; (* arm_ROR X22 X10 32 *) 0x93c9fd4a; (* arm_EXTR X10 X10 X9 63 *) @@ -907,7 +907,7 @@ 0x4c9f7031; (* arm_STR Q17 X1 (Postimmediate_Offset (word 16)) *) 0x6e3e1f3e; (* arm_EOR_VEC Q30 Q25 Q30 128 *) 0x6e3f1f5f; (* arm_EOR_VEC Q31 Q26 Q31 128 *) - 0x4c9fa03e; (* arm_ldstp_2q false (word 30) X1 (Postimmediate_Offset (word 32)) *) + 0x4c9fa03e; (* arm_STP Q30 Q31 X1 (Postimmediate_Offset (word 32)) *) 0x54001a00; (* arm_BEQ (word 832) *) 0x4cdf7800; (* arm_LDR Q0 X0 (Postimmediate_Offset (word 16)) *) 0x14000098; (* arm_B (word 608) *)